##// END OF EJS Templates
Better alternative; try each transformer in a row,...
Matthias Bussonnier -
Show More
@@ -1,647 +1,648 b''
1 """Input transformer machinery to support IPython special syntax.
1 """Input transformer machinery to support IPython special syntax.
2
2
3 This includes the machinery to recognise and transform ``%magic`` commands,
3 This includes the machinery to recognise and transform ``%magic`` commands,
4 ``!system`` commands, ``help?`` querying, prompt stripping, and so forth.
4 ``!system`` commands, ``help?`` querying, prompt stripping, and so forth.
5
5
6 Added: IPython 7.0. Replaces inputsplitter and inputtransformer which were
6 Added: IPython 7.0. Replaces inputsplitter and inputtransformer which were
7 deprecated in 7.0.
7 deprecated in 7.0.
8 """
8 """
9
9
10 # Copyright (c) IPython Development Team.
10 # Copyright (c) IPython Development Team.
11 # Distributed under the terms of the Modified BSD License.
11 # Distributed under the terms of the Modified BSD License.
12
12
13 from codeop import compile_command
13 from codeop import compile_command
14 import re
14 import re
15 import tokenize
15 import tokenize
16 from typing import List, Tuple
16 from typing import List, Tuple
17 import warnings
17 import warnings
18
18
19 _indent_re = re.compile(r'^[ \t]+')
19 _indent_re = re.compile(r'^[ \t]+')
20
20
21 def leading_indent(lines):
21 def leading_indent(lines):
22 """Remove leading indentation.
22 """Remove leading indentation.
23
23
24 If the first line starts with a spaces or tabs, the same whitespace will be
24 If the first line starts with a spaces or tabs, the same whitespace will be
25 removed from each following line in the cell.
25 removed from each following line in the cell.
26 """
26 """
27 m = _indent_re.match(lines[0])
27 m = _indent_re.match(lines[0])
28 if not m:
28 if not m:
29 return lines
29 return lines
30 space = m.group(0)
30 space = m.group(0)
31 n = len(space)
31 n = len(space)
32 return [l[n:] if l.startswith(space) else l
32 return [l[n:] if l.startswith(space) else l
33 for l in lines]
33 for l in lines]
34
34
35 class PromptStripper:
35 class PromptStripper:
36 """Remove matching input prompts from a block of input.
36 """Remove matching input prompts from a block of input.
37
37
38 Parameters
38 Parameters
39 ----------
39 ----------
40 prompt_re : regular expression
40 prompt_re : regular expression
41 A regular expression matching any input prompt (including continuation,
41 A regular expression matching any input prompt (including continuation,
42 e.g. ``...``)
42 e.g. ``...``)
43 initial_re : regular expression, optional
43 initial_re : regular expression, optional
44 A regular expression matching only the initial prompt, but not continuation.
44 A regular expression matching only the initial prompt, but not continuation.
45 If no initial expression is given, prompt_re will be used everywhere.
45 If no initial expression is given, prompt_re will be used everywhere.
46 Used mainly for plain Python prompts (``>>>``), where the continuation prompt
46 Used mainly for plain Python prompts (``>>>``), where the continuation prompt
47 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
47 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
48
48
49 If initial_re and prompt_re differ,
49 If initial_re and prompt_re differ,
50 only initial_re will be tested against the first line.
50 only initial_re will be tested against the first line.
51 If any prompt is found on the first two lines,
51 If any prompt is found on the first two lines,
52 prompts will be stripped from the rest of the block.
52 prompts will be stripped from the rest of the block.
53 """
53 """
54 def __init__(self, prompt_re, initial_re=None):
54 def __init__(self, prompt_re, initial_re=None):
55 self.prompt_re = prompt_re
55 self.prompt_re = prompt_re
56 self.initial_re = initial_re or prompt_re
56 self.initial_re = initial_re or prompt_re
57
57
58 def _strip(self, lines):
58 def _strip(self, lines):
59 return [self.prompt_re.sub('', l, count=1) for l in lines]
59 return [self.prompt_re.sub('', l, count=1) for l in lines]
60
60
61 def __call__(self, lines):
61 def __call__(self, lines):
62 if self.initial_re.match(lines[0]) or \
62 if self.initial_re.match(lines[0]) or \
63 (len(lines) > 1 and self.prompt_re.match(lines[1])):
63 (len(lines) > 1 and self.prompt_re.match(lines[1])):
64 return self._strip(lines)
64 return self._strip(lines)
65 return lines
65 return lines
66
66
67 classic_prompt = PromptStripper(
67 classic_prompt = PromptStripper(
68 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
68 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
69 initial_re=re.compile(r'^>>>( |$)')
69 initial_re=re.compile(r'^>>>( |$)')
70 )
70 )
71
71
72 ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)'))
72 ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)'))
73
73
74 def cell_magic(lines):
74 def cell_magic(lines):
75 if not lines[0].startswith('%%'):
75 if not lines[0].startswith('%%'):
76 return lines
76 return lines
77 if re.match('%%\w+\?', lines[0]):
77 if re.match('%%\w+\?', lines[0]):
78 # This case will be handled by help_end
78 # This case will be handled by help_end
79 return lines
79 return lines
80 magic_name, _, first_line = lines[0][2:-1].partition(' ')
80 magic_name, _, first_line = lines[0][2:-1].partition(' ')
81 body = ''.join(lines[1:])
81 body = ''.join(lines[1:])
82 return ['get_ipython().run_cell_magic(%r, %r, %r)\n'
82 return ['get_ipython().run_cell_magic(%r, %r, %r)\n'
83 % (magic_name, first_line, body)]
83 % (magic_name, first_line, body)]
84
84
85
85
86 def _find_assign_op(token_line):
86 def _find_assign_op(token_line):
87 """Get the index of the first assignment in the line ('=' not inside brackets)
87 """Get the index of the first assignment in the line ('=' not inside brackets)
88
88
89 Note: We don't try to support multiple special assignment (a = b = %foo)
89 Note: We don't try to support multiple special assignment (a = b = %foo)
90 """
90 """
91 paren_level = 0
91 paren_level = 0
92 for i, ti in enumerate(token_line):
92 for i, ti in enumerate(token_line):
93 s = ti.string
93 s = ti.string
94 if s == '=' and paren_level == 0:
94 if s == '=' and paren_level == 0:
95 return i
95 return i
96 if s in '([{':
96 if s in '([{':
97 paren_level += 1
97 paren_level += 1
98 elif s in ')]}':
98 elif s in ')]}':
99 if paren_level > 0:
99 if paren_level > 0:
100 paren_level -= 1
100 paren_level -= 1
101
101
102 def find_end_of_continued_line(lines, start_line: int):
102 def find_end_of_continued_line(lines, start_line: int):
103 """Find the last line of a line explicitly extended using backslashes.
103 """Find the last line of a line explicitly extended using backslashes.
104
104
105 Uses 0-indexed line numbers.
105 Uses 0-indexed line numbers.
106 """
106 """
107 end_line = start_line
107 end_line = start_line
108 while lines[end_line].endswith('\\\n'):
108 while lines[end_line].endswith('\\\n'):
109 end_line += 1
109 end_line += 1
110 if end_line >= len(lines):
110 if end_line >= len(lines):
111 break
111 break
112 return end_line
112 return end_line
113
113
114 def assemble_continued_line(lines, start: Tuple[int, int], end_line: int):
114 def assemble_continued_line(lines, start: Tuple[int, int], end_line: int):
115 """Assemble a single line from multiple continued line pieces
115 """Assemble a single line from multiple continued line pieces
116
116
117 Continued lines are lines ending in ``\``, and the line following the last
117 Continued lines are lines ending in ``\``, and the line following the last
118 ``\`` in the block.
118 ``\`` in the block.
119
119
120 For example, this code continues over multiple lines::
120 For example, this code continues over multiple lines::
121
121
122 if (assign_ix is not None) \
122 if (assign_ix is not None) \
123 and (len(line) >= assign_ix + 2) \
123 and (len(line) >= assign_ix + 2) \
124 and (line[assign_ix+1].string == '%') \
124 and (line[assign_ix+1].string == '%') \
125 and (line[assign_ix+2].type == tokenize.NAME):
125 and (line[assign_ix+2].type == tokenize.NAME):
126
126
127 This statement contains four continued line pieces.
127 This statement contains four continued line pieces.
128 Assembling these pieces into a single line would give::
128 Assembling these pieces into a single line would give::
129
129
130 if (assign_ix is not None) and (len(line) >= assign_ix + 2) and (line[...
130 if (assign_ix is not None) and (len(line) >= assign_ix + 2) and (line[...
131
131
132 This uses 0-indexed line numbers. *start* is (lineno, colno).
132 This uses 0-indexed line numbers. *start* is (lineno, colno).
133
133
134 Used to allow ``%magic`` and ``!system`` commands to be continued over
134 Used to allow ``%magic`` and ``!system`` commands to be continued over
135 multiple lines.
135 multiple lines.
136 """
136 """
137 parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1]
137 parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1]
138 return ' '.join([p[:-2] for p in parts[:-1]] # Strip backslash+newline
138 return ' '.join([p[:-2] for p in parts[:-1]] # Strip backslash+newline
139 + [parts[-1][:-1]]) # Strip newline from last line
139 + [parts[-1][:-1]]) # Strip newline from last line
140
140
141 class TokenTransformBase:
141 class TokenTransformBase:
142 """Base class for transformations which examine tokens.
142 """Base class for transformations which examine tokens.
143
143
144 Special syntax should not be transformed when it occurs inside strings or
144 Special syntax should not be transformed when it occurs inside strings or
145 comments. This is hard to reliably avoid with regexes. The solution is to
145 comments. This is hard to reliably avoid with regexes. The solution is to
146 tokenise the code as Python, and recognise the special syntax in the tokens.
146 tokenise the code as Python, and recognise the special syntax in the tokens.
147
147
148 IPython's special syntax is not valid Python syntax, so tokenising may go
148 IPython's special syntax is not valid Python syntax, so tokenising may go
149 wrong after the special syntax starts. These classes therefore find and
149 wrong after the special syntax starts. These classes therefore find and
150 transform *one* instance of special syntax at a time into regular Python
150 transform *one* instance of special syntax at a time into regular Python
151 syntax. After each transformation, tokens are regenerated to find the next
151 syntax. After each transformation, tokens are regenerated to find the next
152 piece of special syntax.
152 piece of special syntax.
153
153
154 Subclasses need to implement one class method (find)
154 Subclasses need to implement one class method (find)
155 and one regular method (transform).
155 and one regular method (transform).
156
156
157 The priority attribute can select which transformation to apply if multiple
157 The priority attribute can select which transformation to apply if multiple
158 transformers match in the same place. Lower numbers have higher priority.
158 transformers match in the same place. Lower numbers have higher priority.
159 This allows "%magic?" to be turned into a help call rather than a magic call.
159 This allows "%magic?" to be turned into a help call rather than a magic call.
160 """
160 """
161 # Lower numbers -> higher priority (for matches in the same location)
161 # Lower numbers -> higher priority (for matches in the same location)
162 priority = 10
162 priority = 10
163
163
164 def sortby(self):
164 def sortby(self):
165 return self.start_line, self.start_col, self.priority
165 return self.start_line, self.start_col, self.priority
166
166
167 def __init__(self, start):
167 def __init__(self, start):
168 self.start_line = start[0] - 1 # Shift from 1-index to 0-index
168 self.start_line = start[0] - 1 # Shift from 1-index to 0-index
169 self.start_col = start[1]
169 self.start_col = start[1]
170
170
171 @classmethod
171 @classmethod
172 def find(cls, tokens_by_line):
172 def find(cls, tokens_by_line):
173 """Find one instance of special syntax in the provided tokens.
173 """Find one instance of special syntax in the provided tokens.
174
174
175 Tokens are grouped into logical lines for convenience,
175 Tokens are grouped into logical lines for convenience,
176 so it is easy to e.g. look at the first token of each line.
176 so it is easy to e.g. look at the first token of each line.
177 *tokens_by_line* is a list of lists of tokenize.TokenInfo objects.
177 *tokens_by_line* is a list of lists of tokenize.TokenInfo objects.
178
178
179 This should return an instance of its class, pointing to the start
179 This should return an instance of its class, pointing to the start
180 position it has found, or None if it found no match.
180 position it has found, or None if it found no match.
181 """
181 """
182 raise NotImplementedError
182 raise NotImplementedError
183
183
184 def transform(self, lines: List[str]):
184 def transform(self, lines: List[str]):
185 """Transform one instance of special syntax found by ``find()``
185 """Transform one instance of special syntax found by ``find()``
186
186
187 Takes a list of strings representing physical lines,
187 Takes a list of strings representing physical lines,
188 returns a similar list of transformed lines.
188 returns a similar list of transformed lines.
189 """
189 """
190 raise NotImplementedError
190 raise NotImplementedError
191
191
192 class MagicAssign(TokenTransformBase):
192 class MagicAssign(TokenTransformBase):
193 """Transformer for assignments from magics (a = %foo)"""
193 """Transformer for assignments from magics (a = %foo)"""
194 @classmethod
194 @classmethod
195 def find(cls, tokens_by_line):
195 def find(cls, tokens_by_line):
196 """Find the first magic assignment (a = %foo) in the cell.
196 """Find the first magic assignment (a = %foo) in the cell.
197 """
197 """
198 for line in tokens_by_line:
198 for line in tokens_by_line:
199 assign_ix = _find_assign_op(line)
199 assign_ix = _find_assign_op(line)
200 if (assign_ix is not None) \
200 if (assign_ix is not None) \
201 and (len(line) >= assign_ix + 2) \
201 and (len(line) >= assign_ix + 2) \
202 and (line[assign_ix+1].string == '%') \
202 and (line[assign_ix+1].string == '%') \
203 and (line[assign_ix+2].type == tokenize.NAME):
203 and (line[assign_ix+2].type == tokenize.NAME):
204 return cls(line[assign_ix+1].start)
204 return cls(line[assign_ix+1].start)
205
205
206 def transform(self, lines: List[str]):
206 def transform(self, lines: List[str]):
207 """Transform a magic assignment found by the ``find()`` classmethod.
207 """Transform a magic assignment found by the ``find()`` classmethod.
208 """
208 """
209 start_line, start_col = self.start_line, self.start_col
209 start_line, start_col = self.start_line, self.start_col
210 lhs = lines[start_line][:start_col]
210 lhs = lines[start_line][:start_col]
211 end_line = find_end_of_continued_line(lines, start_line)
211 end_line = find_end_of_continued_line(lines, start_line)
212 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
212 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
213 assert rhs.startswith('%'), rhs
213 assert rhs.startswith('%'), rhs
214 magic_name, _, args = rhs[1:].partition(' ')
214 magic_name, _, args = rhs[1:].partition(' ')
215
215
216 lines_before = lines[:start_line]
216 lines_before = lines[:start_line]
217 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
217 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
218 new_line = lhs + call + '\n'
218 new_line = lhs + call + '\n'
219 lines_after = lines[end_line+1:]
219 lines_after = lines[end_line+1:]
220
220
221 return lines_before + [new_line] + lines_after
221 return lines_before + [new_line] + lines_after
222
222
223
223
224 class SystemAssign(TokenTransformBase):
224 class SystemAssign(TokenTransformBase):
225 """Transformer for assignments from system commands (a = !foo)"""
225 """Transformer for assignments from system commands (a = !foo)"""
226 @classmethod
226 @classmethod
227 def find(cls, tokens_by_line):
227 def find(cls, tokens_by_line):
228 """Find the first system assignment (a = !foo) in the cell.
228 """Find the first system assignment (a = !foo) in the cell.
229 """
229 """
230 for line in tokens_by_line:
230 for line in tokens_by_line:
231 assign_ix = _find_assign_op(line)
231 assign_ix = _find_assign_op(line)
232 if (assign_ix is not None) \
232 if (assign_ix is not None) \
233 and (len(line) >= assign_ix + 2) \
233 and (len(line) >= assign_ix + 2) \
234 and (line[assign_ix + 1].type == tokenize.ERRORTOKEN):
234 and (line[assign_ix + 1].type == tokenize.ERRORTOKEN):
235 ix = assign_ix + 1
235 ix = assign_ix + 1
236
236
237 while ix < len(line) and line[ix].type == tokenize.ERRORTOKEN:
237 while ix < len(line) and line[ix].type == tokenize.ERRORTOKEN:
238 if line[ix].string == '!':
238 if line[ix].string == '!':
239 return cls(line[ix].start)
239 return cls(line[ix].start)
240 elif not line[ix].string.isspace():
240 elif not line[ix].string.isspace():
241 break
241 break
242 ix += 1
242 ix += 1
243
243
244 def transform(self, lines: List[str]):
244 def transform(self, lines: List[str]):
245 """Transform a system assignment found by the ``find()`` classmethod.
245 """Transform a system assignment found by the ``find()`` classmethod.
246 """
246 """
247 start_line, start_col = self.start_line, self.start_col
247 start_line, start_col = self.start_line, self.start_col
248
248
249 lhs = lines[start_line][:start_col]
249 lhs = lines[start_line][:start_col]
250 end_line = find_end_of_continued_line(lines, start_line)
250 end_line = find_end_of_continued_line(lines, start_line)
251 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
251 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
252 assert rhs.startswith('!'), rhs
252 assert rhs.startswith('!'), rhs
253 cmd = rhs[1:]
253 cmd = rhs[1:]
254
254
255 lines_before = lines[:start_line]
255 lines_before = lines[:start_line]
256 call = "get_ipython().getoutput({!r})".format(cmd)
256 call = "get_ipython().getoutput({!r})".format(cmd)
257 new_line = lhs + call + '\n'
257 new_line = lhs + call + '\n'
258 lines_after = lines[end_line + 1:]
258 lines_after = lines[end_line + 1:]
259
259
260 return lines_before + [new_line] + lines_after
260 return lines_before + [new_line] + lines_after
261
261
262 # The escape sequences that define the syntax transformations IPython will
262 # The escape sequences that define the syntax transformations IPython will
263 # apply to user input. These can NOT be just changed here: many regular
263 # apply to user input. These can NOT be just changed here: many regular
264 # expressions and other parts of the code may use their hardcoded values, and
264 # expressions and other parts of the code may use their hardcoded values, and
265 # for all intents and purposes they constitute the 'IPython syntax', so they
265 # for all intents and purposes they constitute the 'IPython syntax', so they
266 # should be considered fixed.
266 # should be considered fixed.
267
267
268 ESC_SHELL = '!' # Send line to underlying system shell
268 ESC_SHELL = '!' # Send line to underlying system shell
269 ESC_SH_CAP = '!!' # Send line to system shell and capture output
269 ESC_SH_CAP = '!!' # Send line to system shell and capture output
270 ESC_HELP = '?' # Find information about object
270 ESC_HELP = '?' # Find information about object
271 ESC_HELP2 = '??' # Find extra-detailed information about object
271 ESC_HELP2 = '??' # Find extra-detailed information about object
272 ESC_MAGIC = '%' # Call magic function
272 ESC_MAGIC = '%' # Call magic function
273 ESC_MAGIC2 = '%%' # Call cell-magic function
273 ESC_MAGIC2 = '%%' # Call cell-magic function
274 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
274 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
275 ESC_QUOTE2 = ';' # Quote all args as a single string, call
275 ESC_QUOTE2 = ';' # Quote all args as a single string, call
276 ESC_PAREN = '/' # Call first argument with rest of line as arguments
276 ESC_PAREN = '/' # Call first argument with rest of line as arguments
277
277
278 ESCAPE_SINGLES = {'!', '?', '%', ',', ';', '/'}
278 ESCAPE_SINGLES = {'!', '?', '%', ',', ';', '/'}
279 ESCAPE_DOUBLES = {'!!', '??'} # %% (cell magic) is handled separately
279 ESCAPE_DOUBLES = {'!!', '??'} # %% (cell magic) is handled separately
280
280
281 def _make_help_call(target, esc, next_input=None):
281 def _make_help_call(target, esc, next_input=None):
282 """Prepares a pinfo(2)/psearch call from a target name and the escape
282 """Prepares a pinfo(2)/psearch call from a target name and the escape
283 (i.e. ? or ??)"""
283 (i.e. ? or ??)"""
284 method = 'pinfo2' if esc == '??' \
284 method = 'pinfo2' if esc == '??' \
285 else 'psearch' if '*' in target \
285 else 'psearch' if '*' in target \
286 else 'pinfo'
286 else 'pinfo'
287 arg = " ".join([method, target])
287 arg = " ".join([method, target])
288 #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
288 #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
289 t_magic_name, _, t_magic_arg_s = arg.partition(' ')
289 t_magic_name, _, t_magic_arg_s = arg.partition(' ')
290 t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
290 t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
291 if next_input is None:
291 if next_input is None:
292 return 'get_ipython().run_line_magic(%r, %r)' % (t_magic_name, t_magic_arg_s)
292 return 'get_ipython().run_line_magic(%r, %r)' % (t_magic_name, t_magic_arg_s)
293 else:
293 else:
294 return 'get_ipython().set_next_input(%r);get_ipython().run_line_magic(%r, %r)' % \
294 return 'get_ipython().set_next_input(%r);get_ipython().run_line_magic(%r, %r)' % \
295 (next_input, t_magic_name, t_magic_arg_s)
295 (next_input, t_magic_name, t_magic_arg_s)
296
296
297 def _tr_help(content):
297 def _tr_help(content):
298 """Translate lines escaped with: ?
298 """Translate lines escaped with: ?
299
299
300 A naked help line should fire the intro help screen (shell.show_usage())
300 A naked help line should fire the intro help screen (shell.show_usage())
301 """
301 """
302 if not content:
302 if not content:
303 return 'get_ipython().show_usage()'
303 return 'get_ipython().show_usage()'
304
304
305 return _make_help_call(content, '?')
305 return _make_help_call(content, '?')
306
306
307 def _tr_help2(content):
307 def _tr_help2(content):
308 """Translate lines escaped with: ??
308 """Translate lines escaped with: ??
309
309
310 A naked help line should fire the intro help screen (shell.show_usage())
310 A naked help line should fire the intro help screen (shell.show_usage())
311 """
311 """
312 if not content:
312 if not content:
313 return 'get_ipython().show_usage()'
313 return 'get_ipython().show_usage()'
314
314
315 return _make_help_call(content, '??')
315 return _make_help_call(content, '??')
316
316
317 def _tr_magic(content):
317 def _tr_magic(content):
318 "Translate lines escaped with a percent sign: %"
318 "Translate lines escaped with a percent sign: %"
319 name, _, args = content.partition(' ')
319 name, _, args = content.partition(' ')
320 return 'get_ipython().run_line_magic(%r, %r)' % (name, args)
320 return 'get_ipython().run_line_magic(%r, %r)' % (name, args)
321
321
322 def _tr_quote(content):
322 def _tr_quote(content):
323 "Translate lines escaped with a comma: ,"
323 "Translate lines escaped with a comma: ,"
324 name, _, args = content.partition(' ')
324 name, _, args = content.partition(' ')
325 return '%s("%s")' % (name, '", "'.join(args.split()) )
325 return '%s("%s")' % (name, '", "'.join(args.split()) )
326
326
327 def _tr_quote2(content):
327 def _tr_quote2(content):
328 "Translate lines escaped with a semicolon: ;"
328 "Translate lines escaped with a semicolon: ;"
329 name, _, args = content.partition(' ')
329 name, _, args = content.partition(' ')
330 return '%s("%s")' % (name, args)
330 return '%s("%s")' % (name, args)
331
331
332 def _tr_paren(content):
332 def _tr_paren(content):
333 "Translate lines escaped with a slash: /"
333 "Translate lines escaped with a slash: /"
334 name, _, args = content.partition(' ')
334 name, _, args = content.partition(' ')
335 return '%s(%s)' % (name, ", ".join(args.split()))
335 return '%s(%s)' % (name, ", ".join(args.split()))
336
336
337 tr = { ESC_SHELL : 'get_ipython().system({!r})'.format,
337 tr = { ESC_SHELL : 'get_ipython().system({!r})'.format,
338 ESC_SH_CAP : 'get_ipython().getoutput({!r})'.format,
338 ESC_SH_CAP : 'get_ipython().getoutput({!r})'.format,
339 ESC_HELP : _tr_help,
339 ESC_HELP : _tr_help,
340 ESC_HELP2 : _tr_help2,
340 ESC_HELP2 : _tr_help2,
341 ESC_MAGIC : _tr_magic,
341 ESC_MAGIC : _tr_magic,
342 ESC_QUOTE : _tr_quote,
342 ESC_QUOTE : _tr_quote,
343 ESC_QUOTE2 : _tr_quote2,
343 ESC_QUOTE2 : _tr_quote2,
344 ESC_PAREN : _tr_paren }
344 ESC_PAREN : _tr_paren }
345
345
346 class EscapedCommand(TokenTransformBase):
346 class EscapedCommand(TokenTransformBase):
347 """Transformer for escaped commands like %foo, !foo, or /foo"""
347 """Transformer for escaped commands like %foo, !foo, or /foo"""
348 @classmethod
348 @classmethod
349 def find(cls, tokens_by_line):
349 def find(cls, tokens_by_line):
350 """Find the first escaped command (%foo, !foo, etc.) in the cell.
350 """Find the first escaped command (%foo, !foo, etc.) in the cell.
351 """
351 """
352 for line in tokens_by_line:
352 for line in tokens_by_line:
353 ix = 0
353 ix = 0
354 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
354 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
355 ix += 1
355 ix += 1
356 if line[ix].string in ESCAPE_SINGLES:
356 if line[ix].string in ESCAPE_SINGLES:
357 return cls(line[ix].start)
357 return cls(line[ix].start)
358
358
359 def transform(self, lines):
359 def transform(self, lines):
360 """Transform an escaped line found by the ``find()`` classmethod.
360 """Transform an escaped line found by the ``find()`` classmethod.
361 """
361 """
362 start_line, start_col = self.start_line, self.start_col
362 start_line, start_col = self.start_line, self.start_col
363
363
364 indent = lines[start_line][:start_col]
364 indent = lines[start_line][:start_col]
365 end_line = find_end_of_continued_line(lines, start_line)
365 end_line = find_end_of_continued_line(lines, start_line)
366 line = assemble_continued_line(lines, (start_line, start_col), end_line)
366 line = assemble_continued_line(lines, (start_line, start_col), end_line)
367
367
368 if line[:2] in ESCAPE_DOUBLES:
368 if line[:2] in ESCAPE_DOUBLES:
369 escape, content = line[:2], line[2:]
369 escape, content = line[:2], line[2:]
370 else:
370 else:
371 escape, content = line[:1], line[1:]
371 escape, content = line[:1], line[1:]
372 call = tr[escape](content)
372 call = tr[escape](content)
373
373
374 lines_before = lines[:start_line]
374 lines_before = lines[:start_line]
375 new_line = indent + call + '\n'
375 new_line = indent + call + '\n'
376 lines_after = lines[end_line + 1:]
376 lines_after = lines[end_line + 1:]
377
377
378 return lines_before + [new_line] + lines_after
378 return lines_before + [new_line] + lines_after
379
379
380 _help_end_re = re.compile(r"""(%{0,2}
380 _help_end_re = re.compile(r"""(%{0,2}
381 [a-zA-Z_*][\w*]* # Variable name
381 [a-zA-Z_*][\w*]* # Variable name
382 (\.[a-zA-Z_*][\w*]*)* # .etc.etc
382 (\.[a-zA-Z_*][\w*]*)* # .etc.etc
383 )
383 )
384 (\?\??)$ # ? or ??
384 (\?\??)$ # ? or ??
385 """,
385 """,
386 re.VERBOSE)
386 re.VERBOSE)
387
387
388 class HelpEnd(TokenTransformBase):
388 class HelpEnd(TokenTransformBase):
389 """Transformer for help syntax: obj? and obj??"""
389 """Transformer for help syntax: obj? and obj??"""
390 # This needs to be higher priority (lower number) than EscapedCommand so
390 # This needs to be higher priority (lower number) than EscapedCommand so
391 # that inspecting magics (%foo?) works.
391 # that inspecting magics (%foo?) works.
392 priority = 5
392 priority = 5
393
393
394 def __init__(self, start, q_locn):
394 def __init__(self, start, q_locn):
395 super().__init__(start)
395 super().__init__(start)
396 self.q_line = q_locn[0] - 1 # Shift from 1-indexed to 0-indexed
396 self.q_line = q_locn[0] - 1 # Shift from 1-indexed to 0-indexed
397 self.q_col = q_locn[1]
397 self.q_col = q_locn[1]
398
398
399 @classmethod
399 @classmethod
400 def find(cls, tokens_by_line):
400 def find(cls, tokens_by_line):
401 """Find the first help command (foo?) in the cell.
401 """Find the first help command (foo?) in the cell.
402 """
402 """
403 for line in tokens_by_line:
403 for line in tokens_by_line:
404 # Last token is NEWLINE; look at last but one
404 # Last token is NEWLINE; look at last but one
405 if len(line) > 2 and line[-2].string == '?':
405 if len(line) > 2 and line[-2].string == '?':
406 # Find the first token that's not INDENT/DEDENT
406 # Find the first token that's not INDENT/DEDENT
407 ix = 0
407 ix = 0
408 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
408 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
409 ix += 1
409 ix += 1
410 return cls(line[ix].start, line[-2].start)
410 return cls(line[ix].start, line[-2].start)
411
411
412 def transform(self, lines):
412 def transform(self, lines):
413 """Transform a help command found by the ``find()`` classmethod.
413 """Transform a help command found by the ``find()`` classmethod.
414 """
414 """
415 piece = ''.join(lines[self.start_line:self.q_line+1])
415 piece = ''.join(lines[self.start_line:self.q_line+1])
416 indent, content = piece[:self.start_col], piece[self.start_col:]
416 indent, content = piece[:self.start_col], piece[self.start_col:]
417 lines_before = lines[:self.start_line]
417 lines_before = lines[:self.start_line]
418 lines_after = lines[self.q_line + 1:]
418 lines_after = lines[self.q_line + 1:]
419
419
420 m = _help_end_re.search(content)
420 m = _help_end_re.search(content)
421 if not m:
421 if not m:
422 raise SyntaxError(content)
422 raise SyntaxError(content)
423 assert m is not None, content
423 assert m is not None, content
424 target = m.group(1)
424 target = m.group(1)
425 esc = m.group(3)
425 esc = m.group(3)
426
426
427 # If we're mid-command, put it back on the next prompt for the user.
427 # If we're mid-command, put it back on the next prompt for the user.
428 next_input = None
428 next_input = None
429 if (not lines_before) and (not lines_after) \
429 if (not lines_before) and (not lines_after) \
430 and content.strip() != m.group(0):
430 and content.strip() != m.group(0):
431 next_input = content.rstrip('?\n')
431 next_input = content.rstrip('?\n')
432
432
433 call = _make_help_call(target, esc, next_input=next_input)
433 call = _make_help_call(target, esc, next_input=next_input)
434 new_line = indent + call + '\n'
434 new_line = indent + call + '\n'
435
435
436 return lines_before + [new_line] + lines_after
436 return lines_before + [new_line] + lines_after
437
437
438 def make_tokens_by_line(lines):
438 def make_tokens_by_line(lines):
439 """Tokenize a series of lines and group tokens by line.
439 """Tokenize a series of lines and group tokens by line.
440
440
441 The tokens for a multiline Python string or expression are
441 The tokens for a multiline Python string or expression are
442 grouped as one line.
442 grouped as one line.
443 """
443 """
444 # NL tokens are used inside multiline expressions, but also after blank
444 # NL tokens are used inside multiline expressions, but also after blank
445 # lines or comments. This is intentional - see https://bugs.python.org/issue17061
445 # lines or comments. This is intentional - see https://bugs.python.org/issue17061
446 # We want to group the former case together but split the latter, so we
446 # We want to group the former case together but split the latter, so we
447 # track parentheses level, similar to the internals of tokenize.
447 # track parentheses level, similar to the internals of tokenize.
448 NEWLINE, NL = tokenize.NEWLINE, tokenize.NL
448 NEWLINE, NL = tokenize.NEWLINE, tokenize.NL
449 tokens_by_line = [[]]
449 tokens_by_line = [[]]
450 parenlev = 0
450 parenlev = 0
451 try:
451 try:
452 for token in tokenize.generate_tokens(iter(lines).__next__):
452 for token in tokenize.generate_tokens(iter(lines).__next__):
453 tokens_by_line[-1].append(token)
453 tokens_by_line[-1].append(token)
454 if (token.type == NEWLINE) \
454 if (token.type == NEWLINE) \
455 or ((token.type == NL) and (parenlev <= 0)):
455 or ((token.type == NL) and (parenlev <= 0)):
456 tokens_by_line.append([])
456 tokens_by_line.append([])
457 elif token.string in {'(', '[', '{'}:
457 elif token.string in {'(', '[', '{'}:
458 parenlev += 1
458 parenlev += 1
459 elif token.string in {')', ']', '}'}:
459 elif token.string in {')', ']', '}'}:
460 if parenlev > 0:
460 if parenlev > 0:
461 parenlev -= 1
461 parenlev -= 1
462 except tokenize.TokenError:
462 except tokenize.TokenError:
463 # Input ended in a multiline string or expression. That's OK for us.
463 # Input ended in a multiline string or expression. That's OK for us.
464 pass
464 pass
465 if not tokens_by_line[-1]:
465 if not tokens_by_line[-1]:
466 tokens_by_line.pop()
466 tokens_by_line.pop()
467
467
468 return tokens_by_line
468 return tokens_by_line
469
469
470 def show_linewise_tokens(s: str):
470 def show_linewise_tokens(s: str):
471 """For investigation and debugging"""
471 """For investigation and debugging"""
472 if not s.endswith('\n'):
472 if not s.endswith('\n'):
473 s += '\n'
473 s += '\n'
474 lines = s.splitlines(keepends=True)
474 lines = s.splitlines(keepends=True)
475 for line in make_tokens_by_line(lines):
475 for line in make_tokens_by_line(lines):
476 print("Line -------")
476 print("Line -------")
477 for tokinfo in line:
477 for tokinfo in line:
478 print(" ", tokinfo)
478 print(" ", tokinfo)
479
479
480 # Arbitrary limit to prevent getting stuck in infinite loops
480 # Arbitrary limit to prevent getting stuck in infinite loops
481 TRANSFORM_LOOP_LIMIT = 500
481 TRANSFORM_LOOP_LIMIT = 500
482
482
483 class TransformerManager:
483 class TransformerManager:
484 """Applies various transformations to a cell or code block.
484 """Applies various transformations to a cell or code block.
485
485
486 The key methods for external use are ``transform_cell()``
486 The key methods for external use are ``transform_cell()``
487 and ``check_complete()``.
487 and ``check_complete()``.
488 """
488 """
489 def __init__(self):
489 def __init__(self):
490 self.cleanup_transforms = [
490 self.cleanup_transforms = [
491 leading_indent,
491 leading_indent,
492 classic_prompt,
492 classic_prompt,
493 ipython_prompt,
493 ipython_prompt,
494 ]
494 ]
495 self.line_transforms = [
495 self.line_transforms = [
496 cell_magic,
496 cell_magic,
497 ]
497 ]
498 self.token_transformers = [
498 self.token_transformers = [
499 MagicAssign,
499 MagicAssign,
500 SystemAssign,
500 SystemAssign,
501 EscapedCommand,
501 EscapedCommand,
502 HelpEnd,
502 HelpEnd,
503 ]
503 ]
504
504
505 def do_one_token_transform(self, lines):
505 def do_one_token_transform(self, lines):
506 """Find and run the transform earliest in the code.
506 """Find and run the transform earliest in the code.
507
507
508 Returns (changed, lines).
508 Returns (changed, lines).
509
509
510 This method is called repeatedly until changed is False, indicating
510 This method is called repeatedly until changed is False, indicating
511 that all available transformations are complete.
511 that all available transformations are complete.
512
512
513 The tokens following IPython special syntax might not be valid, so
513 The tokens following IPython special syntax might not be valid, so
514 the transformed code is retokenised every time to identify the next
514 the transformed code is retokenised every time to identify the next
515 piece of special syntax. Hopefully long code cells are mostly valid
515 piece of special syntax. Hopefully long code cells are mostly valid
516 Python, not using lots of IPython special syntax, so this shouldn't be
516 Python, not using lots of IPython special syntax, so this shouldn't be
517 a performance issue.
517 a performance issue.
518 """
518 """
519 tokens_by_line = make_tokens_by_line(lines)
519 tokens_by_line = make_tokens_by_line(lines)
520 candidates = []
520 candidates = []
521 for transformer_cls in self.token_transformers:
521 for transformer_cls in self.token_transformers:
522 transformer = transformer_cls.find(tokens_by_line)
522 transformer = transformer_cls.find(tokens_by_line)
523 if transformer:
523 if transformer:
524 candidates.append(transformer)
524 candidates.append(transformer)
525
525
526 if not candidates:
526 if not candidates:
527 # Nothing to transform
527 # Nothing to transform
528 return False, lines
528 return False, lines
529
529 ordered_transformers = sorted(candidates, key=TokenTransformBase.sortby)
530 transformer = min(candidates, key=TokenTransformBase.sortby)
530 for transformer in ordered_transformers:
531 transformed = transformer.transform(lines)
531 try:
532 if transformed is None:
532 return True, transformer.transform(lines)
533 return False, lines
533 except SyntaxError:
534 return True, transformer.transform(lines)
534 pass
535 return False, lines
535
536
536 def do_token_transforms(self, lines):
537 def do_token_transforms(self, lines):
537 for _ in range(TRANSFORM_LOOP_LIMIT):
538 for _ in range(TRANSFORM_LOOP_LIMIT):
538 changed, lines = self.do_one_token_transform(lines)
539 changed, lines = self.do_one_token_transform(lines)
539 if not changed:
540 if not changed:
540 return lines
541 return lines
541
542
542 raise RuntimeError("Input transformation still changing after "
543 raise RuntimeError("Input transformation still changing after "
543 "%d iterations. Aborting." % TRANSFORM_LOOP_LIMIT)
544 "%d iterations. Aborting." % TRANSFORM_LOOP_LIMIT)
544
545
545 def transform_cell(self, cell: str) -> str:
546 def transform_cell(self, cell: str) -> str:
546 """Transforms a cell of input code"""
547 """Transforms a cell of input code"""
547 if not cell.endswith('\n'):
548 if not cell.endswith('\n'):
548 cell += '\n' # Ensure the cell has a trailing newline
549 cell += '\n' # Ensure the cell has a trailing newline
549 lines = cell.splitlines(keepends=True)
550 lines = cell.splitlines(keepends=True)
550 for transform in self.cleanup_transforms + self.line_transforms:
551 for transform in self.cleanup_transforms + self.line_transforms:
551 lines = transform(lines)
552 lines = transform(lines)
552
553
553 lines = self.do_token_transforms(lines)
554 lines = self.do_token_transforms(lines)
554 return ''.join(lines)
555 return ''.join(lines)
555
556
556 def check_complete(self, cell: str):
557 def check_complete(self, cell: str):
557 """Return whether a block of code is ready to execute, or should be continued
558 """Return whether a block of code is ready to execute, or should be continued
558
559
559 Parameters
560 Parameters
560 ----------
561 ----------
561 source : string
562 source : string
562 Python input code, which can be multiline.
563 Python input code, which can be multiline.
563
564
564 Returns
565 Returns
565 -------
566 -------
566 status : str
567 status : str
567 One of 'complete', 'incomplete', or 'invalid' if source is not a
568 One of 'complete', 'incomplete', or 'invalid' if source is not a
568 prefix of valid code.
569 prefix of valid code.
569 indent_spaces : int or None
570 indent_spaces : int or None
570 The number of spaces by which to indent the next line of code. If
571 The number of spaces by which to indent the next line of code. If
571 status is not 'incomplete', this is None.
572 status is not 'incomplete', this is None.
572 """
573 """
573 if not cell.endswith('\n'):
574 if not cell.endswith('\n'):
574 cell += '\n' # Ensure the cell has a trailing newline
575 cell += '\n' # Ensure the cell has a trailing newline
575 lines = cell.splitlines(keepends=True)
576 lines = cell.splitlines(keepends=True)
576 if lines[-1][:-1].endswith('\\'):
577 if lines[-1][:-1].endswith('\\'):
577 # Explicit backslash continuation
578 # Explicit backslash continuation
578 return 'incomplete', find_last_indent(lines)
579 return 'incomplete', find_last_indent(lines)
579
580
580 try:
581 try:
581 for transform in self.cleanup_transforms:
582 for transform in self.cleanup_transforms:
582 lines = transform(lines)
583 lines = transform(lines)
583 except SyntaxError:
584 except SyntaxError:
584 return 'invalid', None
585 return 'invalid', None
585
586
586 if lines[0].startswith('%%'):
587 if lines[0].startswith('%%'):
587 # Special case for cell magics - completion marked by blank line
588 # Special case for cell magics - completion marked by blank line
588 if lines[-1].strip():
589 if lines[-1].strip():
589 return 'incomplete', find_last_indent(lines)
590 return 'incomplete', find_last_indent(lines)
590 else:
591 else:
591 return 'complete', None
592 return 'complete', None
592
593
593 try:
594 try:
594 for transform in self.line_transforms:
595 for transform in self.line_transforms:
595 lines = transform(lines)
596 lines = transform(lines)
596 lines = self.do_token_transforms(lines)
597 lines = self.do_token_transforms(lines)
597 except SyntaxError:
598 except SyntaxError:
598 return 'invalid', None
599 return 'invalid', None
599
600
600 tokens_by_line = make_tokens_by_line(lines)
601 tokens_by_line = make_tokens_by_line(lines)
601 if not tokens_by_line:
602 if not tokens_by_line:
602 return 'incomplete', find_last_indent(lines)
603 return 'incomplete', find_last_indent(lines)
603 if tokens_by_line[-1][-1].type != tokenize.ENDMARKER:
604 if tokens_by_line[-1][-1].type != tokenize.ENDMARKER:
604 # We're in a multiline string or expression
605 # We're in a multiline string or expression
605 return 'incomplete', find_last_indent(lines)
606 return 'incomplete', find_last_indent(lines)
606 if len(tokens_by_line) == 1:
607 if len(tokens_by_line) == 1:
607 return 'incomplete', find_last_indent(lines)
608 return 'incomplete', find_last_indent(lines)
608 # Find the last token on the previous line that's not NEWLINE or COMMENT
609 # Find the last token on the previous line that's not NEWLINE or COMMENT
609 toks_last_line = tokens_by_line[-2]
610 toks_last_line = tokens_by_line[-2]
610 ix = len(toks_last_line) - 1
611 ix = len(toks_last_line) - 1
611 while ix >= 0 and toks_last_line[ix].type in {tokenize.NEWLINE,
612 while ix >= 0 and toks_last_line[ix].type in {tokenize.NEWLINE,
612 tokenize.COMMENT}:
613 tokenize.COMMENT}:
613 ix -= 1
614 ix -= 1
614
615
615 if toks_last_line[ix].string == ':':
616 if toks_last_line[ix].string == ':':
616 # The last line starts a block (e.g. 'if foo:')
617 # The last line starts a block (e.g. 'if foo:')
617 ix = 0
618 ix = 0
618 while toks_last_line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
619 while toks_last_line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
619 ix += 1
620 ix += 1
620 indent = toks_last_line[ix].start[1]
621 indent = toks_last_line[ix].start[1]
621 return 'incomplete', indent + 4
622 return 'incomplete', indent + 4
622
623
623 # If there's a blank line at the end, assume we're ready to execute.
624 # If there's a blank line at the end, assume we're ready to execute.
624 if not lines[-1].strip():
625 if not lines[-1].strip():
625 return 'complete', None
626 return 'complete', None
626
627
627 # At this point, our checks think the code is complete (or invalid).
628 # At this point, our checks think the code is complete (or invalid).
628 # We'll use codeop.compile_command to check this with the real parser.
629 # We'll use codeop.compile_command to check this with the real parser.
629
630
630 try:
631 try:
631 with warnings.catch_warnings():
632 with warnings.catch_warnings():
632 warnings.simplefilter('error', SyntaxWarning)
633 warnings.simplefilter('error', SyntaxWarning)
633 res = compile_command(''.join(lines), symbol='exec')
634 res = compile_command(''.join(lines), symbol='exec')
634 except (SyntaxError, OverflowError, ValueError, TypeError,
635 except (SyntaxError, OverflowError, ValueError, TypeError,
635 MemoryError, SyntaxWarning):
636 MemoryError, SyntaxWarning):
636 return 'invalid', None
637 return 'invalid', None
637 else:
638 else:
638 if res is None:
639 if res is None:
639 return 'incomplete', find_last_indent(lines)
640 return 'incomplete', find_last_indent(lines)
640 return 'complete', None
641 return 'complete', None
641
642
642
643
643 def find_last_indent(lines):
644 def find_last_indent(lines):
644 m = _indent_re.match(lines[-1])
645 m = _indent_re.match(lines[-1])
645 if not m:
646 if not m:
646 return 0
647 return 0
647 return len(m.group(0).replace('\t', ' '*4))
648 return len(m.group(0).replace('\t', ' '*4))
General Comments 0
You need to be logged in to leave comments. Login now