##// END OF EJS Templates
Fix an IndexError in leading_indent
Tony Fast -
Show More
@@ -1,648 +1,650 b''
1 """Input transformer machinery to support IPython special syntax.
1 """Input transformer machinery to support IPython special syntax.
2
2
3 This includes the machinery to recognise and transform ``%magic`` commands,
3 This includes the machinery to recognise and transform ``%magic`` commands,
4 ``!system`` commands, ``help?`` querying, prompt stripping, and so forth.
4 ``!system`` commands, ``help?`` querying, prompt stripping, and so forth.
5
5
6 Added: IPython 7.0. Replaces inputsplitter and inputtransformer which were
6 Added: IPython 7.0. Replaces inputsplitter and inputtransformer which were
7 deprecated in 7.0.
7 deprecated in 7.0.
8 """
8 """
9
9
10 # Copyright (c) IPython Development Team.
10 # Copyright (c) IPython Development Team.
11 # Distributed under the terms of the Modified BSD License.
11 # Distributed under the terms of the Modified BSD License.
12
12
13 from codeop import compile_command
13 from codeop import compile_command
14 import re
14 import re
15 import tokenize
15 import tokenize
16 from typing import List, Tuple
16 from typing import List, Tuple
17 import warnings
17 import warnings
18
18
19 _indent_re = re.compile(r'^[ \t]+')
19 _indent_re = re.compile(r'^[ \t]+')
20
20
21 def leading_indent(lines):
21 def leading_indent(lines):
22 """Remove leading indentation.
22 """Remove leading indentation.
23
23
24 If the first line starts with a spaces or tabs, the same whitespace will be
24 If the first line starts with a spaces or tabs, the same whitespace will be
25 removed from each following line in the cell.
25 removed from each following line in the cell.
26 """
26 """
27 if not lines:
28 return lines
27 m = _indent_re.match(lines[0])
29 m = _indent_re.match(lines[0])
28 if not m:
30 if not m:
29 return lines
31 return lines
30 space = m.group(0)
32 space = m.group(0)
31 n = len(space)
33 n = len(space)
32 return [l[n:] if l.startswith(space) else l
34 return [l[n:] if l.startswith(space) else l
33 for l in lines]
35 for l in lines]
34
36
35 class PromptStripper:
37 class PromptStripper:
36 """Remove matching input prompts from a block of input.
38 """Remove matching input prompts from a block of input.
37
39
38 Parameters
40 Parameters
39 ----------
41 ----------
40 prompt_re : regular expression
42 prompt_re : regular expression
41 A regular expression matching any input prompt (including continuation,
43 A regular expression matching any input prompt (including continuation,
42 e.g. ``...``)
44 e.g. ``...``)
43 initial_re : regular expression, optional
45 initial_re : regular expression, optional
44 A regular expression matching only the initial prompt, but not continuation.
46 A regular expression matching only the initial prompt, but not continuation.
45 If no initial expression is given, prompt_re will be used everywhere.
47 If no initial expression is given, prompt_re will be used everywhere.
46 Used mainly for plain Python prompts (``>>>``), where the continuation prompt
48 Used mainly for plain Python prompts (``>>>``), where the continuation prompt
47 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
49 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
48
50
49 If initial_re and prompt_re differ,
51 If initial_re and prompt_re differ,
50 only initial_re will be tested against the first line.
52 only initial_re will be tested against the first line.
51 If any prompt is found on the first two lines,
53 If any prompt is found on the first two lines,
52 prompts will be stripped from the rest of the block.
54 prompts will be stripped from the rest of the block.
53 """
55 """
54 def __init__(self, prompt_re, initial_re=None):
56 def __init__(self, prompt_re, initial_re=None):
55 self.prompt_re = prompt_re
57 self.prompt_re = prompt_re
56 self.initial_re = initial_re or prompt_re
58 self.initial_re = initial_re or prompt_re
57
59
58 def _strip(self, lines):
60 def _strip(self, lines):
59 return [self.prompt_re.sub('', l, count=1) for l in lines]
61 return [self.prompt_re.sub('', l, count=1) for l in lines]
60
62
61 def __call__(self, lines):
63 def __call__(self, lines):
62 if self.initial_re.match(lines[0]) or \
64 if self.initial_re.match(lines[0]) or \
63 (len(lines) > 1 and self.prompt_re.match(lines[1])):
65 (len(lines) > 1 and self.prompt_re.match(lines[1])):
64 return self._strip(lines)
66 return self._strip(lines)
65 return lines
67 return lines
66
68
67 classic_prompt = PromptStripper(
69 classic_prompt = PromptStripper(
68 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
70 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
69 initial_re=re.compile(r'^>>>( |$)')
71 initial_re=re.compile(r'^>>>( |$)')
70 )
72 )
71
73
72 ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)'))
74 ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)'))
73
75
74 def cell_magic(lines):
76 def cell_magic(lines):
75 if not lines[0].startswith('%%'):
77 if not lines[0].startswith('%%'):
76 return lines
78 return lines
77 if re.match('%%\w+\?', lines[0]):
79 if re.match('%%\w+\?', lines[0]):
78 # This case will be handled by help_end
80 # This case will be handled by help_end
79 return lines
81 return lines
80 magic_name, _, first_line = lines[0][2:-1].partition(' ')
82 magic_name, _, first_line = lines[0][2:-1].partition(' ')
81 body = ''.join(lines[1:])
83 body = ''.join(lines[1:])
82 return ['get_ipython().run_cell_magic(%r, %r, %r)\n'
84 return ['get_ipython().run_cell_magic(%r, %r, %r)\n'
83 % (magic_name, first_line, body)]
85 % (magic_name, first_line, body)]
84
86
85
87
86 def _find_assign_op(token_line):
88 def _find_assign_op(token_line):
87 """Get the index of the first assignment in the line ('=' not inside brackets)
89 """Get the index of the first assignment in the line ('=' not inside brackets)
88
90
89 Note: We don't try to support multiple special assignment (a = b = %foo)
91 Note: We don't try to support multiple special assignment (a = b = %foo)
90 """
92 """
91 paren_level = 0
93 paren_level = 0
92 for i, ti in enumerate(token_line):
94 for i, ti in enumerate(token_line):
93 s = ti.string
95 s = ti.string
94 if s == '=' and paren_level == 0:
96 if s == '=' and paren_level == 0:
95 return i
97 return i
96 if s in '([{':
98 if s in '([{':
97 paren_level += 1
99 paren_level += 1
98 elif s in ')]}':
100 elif s in ')]}':
99 if paren_level > 0:
101 if paren_level > 0:
100 paren_level -= 1
102 paren_level -= 1
101
103
102 def find_end_of_continued_line(lines, start_line: int):
104 def find_end_of_continued_line(lines, start_line: int):
103 """Find the last line of a line explicitly extended using backslashes.
105 """Find the last line of a line explicitly extended using backslashes.
104
106
105 Uses 0-indexed line numbers.
107 Uses 0-indexed line numbers.
106 """
108 """
107 end_line = start_line
109 end_line = start_line
108 while lines[end_line].endswith('\\\n'):
110 while lines[end_line].endswith('\\\n'):
109 end_line += 1
111 end_line += 1
110 if end_line >= len(lines):
112 if end_line >= len(lines):
111 break
113 break
112 return end_line
114 return end_line
113
115
114 def assemble_continued_line(lines, start: Tuple[int, int], end_line: int):
116 def assemble_continued_line(lines, start: Tuple[int, int], end_line: int):
115 """Assemble a single line from multiple continued line pieces
117 """Assemble a single line from multiple continued line pieces
116
118
117 Continued lines are lines ending in ``\``, and the line following the last
119 Continued lines are lines ending in ``\``, and the line following the last
118 ``\`` in the block.
120 ``\`` in the block.
119
121
120 For example, this code continues over multiple lines::
122 For example, this code continues over multiple lines::
121
123
122 if (assign_ix is not None) \
124 if (assign_ix is not None) \
123 and (len(line) >= assign_ix + 2) \
125 and (len(line) >= assign_ix + 2) \
124 and (line[assign_ix+1].string == '%') \
126 and (line[assign_ix+1].string == '%') \
125 and (line[assign_ix+2].type == tokenize.NAME):
127 and (line[assign_ix+2].type == tokenize.NAME):
126
128
127 This statement contains four continued line pieces.
129 This statement contains four continued line pieces.
128 Assembling these pieces into a single line would give::
130 Assembling these pieces into a single line would give::
129
131
130 if (assign_ix is not None) and (len(line) >= assign_ix + 2) and (line[...
132 if (assign_ix is not None) and (len(line) >= assign_ix + 2) and (line[...
131
133
132 This uses 0-indexed line numbers. *start* is (lineno, colno).
134 This uses 0-indexed line numbers. *start* is (lineno, colno).
133
135
134 Used to allow ``%magic`` and ``!system`` commands to be continued over
136 Used to allow ``%magic`` and ``!system`` commands to be continued over
135 multiple lines.
137 multiple lines.
136 """
138 """
137 parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1]
139 parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1]
138 return ' '.join([p[:-2] for p in parts[:-1]] # Strip backslash+newline
140 return ' '.join([p[:-2] for p in parts[:-1]] # Strip backslash+newline
139 + [parts[-1][:-1]]) # Strip newline from last line
141 + [parts[-1][:-1]]) # Strip newline from last line
140
142
141 class TokenTransformBase:
143 class TokenTransformBase:
142 """Base class for transformations which examine tokens.
144 """Base class for transformations which examine tokens.
143
145
144 Special syntax should not be transformed when it occurs inside strings or
146 Special syntax should not be transformed when it occurs inside strings or
145 comments. This is hard to reliably avoid with regexes. The solution is to
147 comments. This is hard to reliably avoid with regexes. The solution is to
146 tokenise the code as Python, and recognise the special syntax in the tokens.
148 tokenise the code as Python, and recognise the special syntax in the tokens.
147
149
148 IPython's special syntax is not valid Python syntax, so tokenising may go
150 IPython's special syntax is not valid Python syntax, so tokenising may go
149 wrong after the special syntax starts. These classes therefore find and
151 wrong after the special syntax starts. These classes therefore find and
150 transform *one* instance of special syntax at a time into regular Python
152 transform *one* instance of special syntax at a time into regular Python
151 syntax. After each transformation, tokens are regenerated to find the next
153 syntax. After each transformation, tokens are regenerated to find the next
152 piece of special syntax.
154 piece of special syntax.
153
155
154 Subclasses need to implement one class method (find)
156 Subclasses need to implement one class method (find)
155 and one regular method (transform).
157 and one regular method (transform).
156
158
157 The priority attribute can select which transformation to apply if multiple
159 The priority attribute can select which transformation to apply if multiple
158 transformers match in the same place. Lower numbers have higher priority.
160 transformers match in the same place. Lower numbers have higher priority.
159 This allows "%magic?" to be turned into a help call rather than a magic call.
161 This allows "%magic?" to be turned into a help call rather than a magic call.
160 """
162 """
161 # Lower numbers -> higher priority (for matches in the same location)
163 # Lower numbers -> higher priority (for matches in the same location)
162 priority = 10
164 priority = 10
163
165
164 def sortby(self):
166 def sortby(self):
165 return self.start_line, self.start_col, self.priority
167 return self.start_line, self.start_col, self.priority
166
168
167 def __init__(self, start):
169 def __init__(self, start):
168 self.start_line = start[0] - 1 # Shift from 1-index to 0-index
170 self.start_line = start[0] - 1 # Shift from 1-index to 0-index
169 self.start_col = start[1]
171 self.start_col = start[1]
170
172
171 @classmethod
173 @classmethod
172 def find(cls, tokens_by_line):
174 def find(cls, tokens_by_line):
173 """Find one instance of special syntax in the provided tokens.
175 """Find one instance of special syntax in the provided tokens.
174
176
175 Tokens are grouped into logical lines for convenience,
177 Tokens are grouped into logical lines for convenience,
176 so it is easy to e.g. look at the first token of each line.
178 so it is easy to e.g. look at the first token of each line.
177 *tokens_by_line* is a list of lists of tokenize.TokenInfo objects.
179 *tokens_by_line* is a list of lists of tokenize.TokenInfo objects.
178
180
179 This should return an instance of its class, pointing to the start
181 This should return an instance of its class, pointing to the start
180 position it has found, or None if it found no match.
182 position it has found, or None if it found no match.
181 """
183 """
182 raise NotImplementedError
184 raise NotImplementedError
183
185
184 def transform(self, lines: List[str]):
186 def transform(self, lines: List[str]):
185 """Transform one instance of special syntax found by ``find()``
187 """Transform one instance of special syntax found by ``find()``
186
188
187 Takes a list of strings representing physical lines,
189 Takes a list of strings representing physical lines,
188 returns a similar list of transformed lines.
190 returns a similar list of transformed lines.
189 """
191 """
190 raise NotImplementedError
192 raise NotImplementedError
191
193
192 class MagicAssign(TokenTransformBase):
194 class MagicAssign(TokenTransformBase):
193 """Transformer for assignments from magics (a = %foo)"""
195 """Transformer for assignments from magics (a = %foo)"""
194 @classmethod
196 @classmethod
195 def find(cls, tokens_by_line):
197 def find(cls, tokens_by_line):
196 """Find the first magic assignment (a = %foo) in the cell.
198 """Find the first magic assignment (a = %foo) in the cell.
197 """
199 """
198 for line in tokens_by_line:
200 for line in tokens_by_line:
199 assign_ix = _find_assign_op(line)
201 assign_ix = _find_assign_op(line)
200 if (assign_ix is not None) \
202 if (assign_ix is not None) \
201 and (len(line) >= assign_ix + 2) \
203 and (len(line) >= assign_ix + 2) \
202 and (line[assign_ix+1].string == '%') \
204 and (line[assign_ix+1].string == '%') \
203 and (line[assign_ix+2].type == tokenize.NAME):
205 and (line[assign_ix+2].type == tokenize.NAME):
204 return cls(line[assign_ix+1].start)
206 return cls(line[assign_ix+1].start)
205
207
206 def transform(self, lines: List[str]):
208 def transform(self, lines: List[str]):
207 """Transform a magic assignment found by the ``find()`` classmethod.
209 """Transform a magic assignment found by the ``find()`` classmethod.
208 """
210 """
209 start_line, start_col = self.start_line, self.start_col
211 start_line, start_col = self.start_line, self.start_col
210 lhs = lines[start_line][:start_col]
212 lhs = lines[start_line][:start_col]
211 end_line = find_end_of_continued_line(lines, start_line)
213 end_line = find_end_of_continued_line(lines, start_line)
212 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
214 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
213 assert rhs.startswith('%'), rhs
215 assert rhs.startswith('%'), rhs
214 magic_name, _, args = rhs[1:].partition(' ')
216 magic_name, _, args = rhs[1:].partition(' ')
215
217
216 lines_before = lines[:start_line]
218 lines_before = lines[:start_line]
217 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
219 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
218 new_line = lhs + call + '\n'
220 new_line = lhs + call + '\n'
219 lines_after = lines[end_line+1:]
221 lines_after = lines[end_line+1:]
220
222
221 return lines_before + [new_line] + lines_after
223 return lines_before + [new_line] + lines_after
222
224
223
225
224 class SystemAssign(TokenTransformBase):
226 class SystemAssign(TokenTransformBase):
225 """Transformer for assignments from system commands (a = !foo)"""
227 """Transformer for assignments from system commands (a = !foo)"""
226 @classmethod
228 @classmethod
227 def find(cls, tokens_by_line):
229 def find(cls, tokens_by_line):
228 """Find the first system assignment (a = !foo) in the cell.
230 """Find the first system assignment (a = !foo) in the cell.
229 """
231 """
230 for line in tokens_by_line:
232 for line in tokens_by_line:
231 assign_ix = _find_assign_op(line)
233 assign_ix = _find_assign_op(line)
232 if (assign_ix is not None) \
234 if (assign_ix is not None) \
233 and (len(line) >= assign_ix + 2) \
235 and (len(line) >= assign_ix + 2) \
234 and (line[assign_ix + 1].type == tokenize.ERRORTOKEN):
236 and (line[assign_ix + 1].type == tokenize.ERRORTOKEN):
235 ix = assign_ix + 1
237 ix = assign_ix + 1
236
238
237 while ix < len(line) and line[ix].type == tokenize.ERRORTOKEN:
239 while ix < len(line) and line[ix].type == tokenize.ERRORTOKEN:
238 if line[ix].string == '!':
240 if line[ix].string == '!':
239 return cls(line[ix].start)
241 return cls(line[ix].start)
240 elif not line[ix].string.isspace():
242 elif not line[ix].string.isspace():
241 break
243 break
242 ix += 1
244 ix += 1
243
245
244 def transform(self, lines: List[str]):
246 def transform(self, lines: List[str]):
245 """Transform a system assignment found by the ``find()`` classmethod.
247 """Transform a system assignment found by the ``find()`` classmethod.
246 """
248 """
247 start_line, start_col = self.start_line, self.start_col
249 start_line, start_col = self.start_line, self.start_col
248
250
249 lhs = lines[start_line][:start_col]
251 lhs = lines[start_line][:start_col]
250 end_line = find_end_of_continued_line(lines, start_line)
252 end_line = find_end_of_continued_line(lines, start_line)
251 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
253 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
252 assert rhs.startswith('!'), rhs
254 assert rhs.startswith('!'), rhs
253 cmd = rhs[1:]
255 cmd = rhs[1:]
254
256
255 lines_before = lines[:start_line]
257 lines_before = lines[:start_line]
256 call = "get_ipython().getoutput({!r})".format(cmd)
258 call = "get_ipython().getoutput({!r})".format(cmd)
257 new_line = lhs + call + '\n'
259 new_line = lhs + call + '\n'
258 lines_after = lines[end_line + 1:]
260 lines_after = lines[end_line + 1:]
259
261
260 return lines_before + [new_line] + lines_after
262 return lines_before + [new_line] + lines_after
261
263
262 # The escape sequences that define the syntax transformations IPython will
264 # The escape sequences that define the syntax transformations IPython will
263 # apply to user input. These can NOT be just changed here: many regular
265 # apply to user input. These can NOT be just changed here: many regular
264 # expressions and other parts of the code may use their hardcoded values, and
266 # expressions and other parts of the code may use their hardcoded values, and
265 # for all intents and purposes they constitute the 'IPython syntax', so they
267 # for all intents and purposes they constitute the 'IPython syntax', so they
266 # should be considered fixed.
268 # should be considered fixed.
267
269
268 ESC_SHELL = '!' # Send line to underlying system shell
270 ESC_SHELL = '!' # Send line to underlying system shell
269 ESC_SH_CAP = '!!' # Send line to system shell and capture output
271 ESC_SH_CAP = '!!' # Send line to system shell and capture output
270 ESC_HELP = '?' # Find information about object
272 ESC_HELP = '?' # Find information about object
271 ESC_HELP2 = '??' # Find extra-detailed information about object
273 ESC_HELP2 = '??' # Find extra-detailed information about object
272 ESC_MAGIC = '%' # Call magic function
274 ESC_MAGIC = '%' # Call magic function
273 ESC_MAGIC2 = '%%' # Call cell-magic function
275 ESC_MAGIC2 = '%%' # Call cell-magic function
274 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
276 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
275 ESC_QUOTE2 = ';' # Quote all args as a single string, call
277 ESC_QUOTE2 = ';' # Quote all args as a single string, call
276 ESC_PAREN = '/' # Call first argument with rest of line as arguments
278 ESC_PAREN = '/' # Call first argument with rest of line as arguments
277
279
278 ESCAPE_SINGLES = {'!', '?', '%', ',', ';', '/'}
280 ESCAPE_SINGLES = {'!', '?', '%', ',', ';', '/'}
279 ESCAPE_DOUBLES = {'!!', '??'} # %% (cell magic) is handled separately
281 ESCAPE_DOUBLES = {'!!', '??'} # %% (cell magic) is handled separately
280
282
281 def _make_help_call(target, esc, next_input=None):
283 def _make_help_call(target, esc, next_input=None):
282 """Prepares a pinfo(2)/psearch call from a target name and the escape
284 """Prepares a pinfo(2)/psearch call from a target name and the escape
283 (i.e. ? or ??)"""
285 (i.e. ? or ??)"""
284 method = 'pinfo2' if esc == '??' \
286 method = 'pinfo2' if esc == '??' \
285 else 'psearch' if '*' in target \
287 else 'psearch' if '*' in target \
286 else 'pinfo'
288 else 'pinfo'
287 arg = " ".join([method, target])
289 arg = " ".join([method, target])
288 #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
290 #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
289 t_magic_name, _, t_magic_arg_s = arg.partition(' ')
291 t_magic_name, _, t_magic_arg_s = arg.partition(' ')
290 t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
292 t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
291 if next_input is None:
293 if next_input is None:
292 return 'get_ipython().run_line_magic(%r, %r)' % (t_magic_name, t_magic_arg_s)
294 return 'get_ipython().run_line_magic(%r, %r)' % (t_magic_name, t_magic_arg_s)
293 else:
295 else:
294 return 'get_ipython().set_next_input(%r);get_ipython().run_line_magic(%r, %r)' % \
296 return 'get_ipython().set_next_input(%r);get_ipython().run_line_magic(%r, %r)' % \
295 (next_input, t_magic_name, t_magic_arg_s)
297 (next_input, t_magic_name, t_magic_arg_s)
296
298
297 def _tr_help(content):
299 def _tr_help(content):
298 """Translate lines escaped with: ?
300 """Translate lines escaped with: ?
299
301
300 A naked help line should fire the intro help screen (shell.show_usage())
302 A naked help line should fire the intro help screen (shell.show_usage())
301 """
303 """
302 if not content:
304 if not content:
303 return 'get_ipython().show_usage()'
305 return 'get_ipython().show_usage()'
304
306
305 return _make_help_call(content, '?')
307 return _make_help_call(content, '?')
306
308
307 def _tr_help2(content):
309 def _tr_help2(content):
308 """Translate lines escaped with: ??
310 """Translate lines escaped with: ??
309
311
310 A naked help line should fire the intro help screen (shell.show_usage())
312 A naked help line should fire the intro help screen (shell.show_usage())
311 """
313 """
312 if not content:
314 if not content:
313 return 'get_ipython().show_usage()'
315 return 'get_ipython().show_usage()'
314
316
315 return _make_help_call(content, '??')
317 return _make_help_call(content, '??')
316
318
317 def _tr_magic(content):
319 def _tr_magic(content):
318 "Translate lines escaped with a percent sign: %"
320 "Translate lines escaped with a percent sign: %"
319 name, _, args = content.partition(' ')
321 name, _, args = content.partition(' ')
320 return 'get_ipython().run_line_magic(%r, %r)' % (name, args)
322 return 'get_ipython().run_line_magic(%r, %r)' % (name, args)
321
323
322 def _tr_quote(content):
324 def _tr_quote(content):
323 "Translate lines escaped with a comma: ,"
325 "Translate lines escaped with a comma: ,"
324 name, _, args = content.partition(' ')
326 name, _, args = content.partition(' ')
325 return '%s("%s")' % (name, '", "'.join(args.split()) )
327 return '%s("%s")' % (name, '", "'.join(args.split()) )
326
328
327 def _tr_quote2(content):
329 def _tr_quote2(content):
328 "Translate lines escaped with a semicolon: ;"
330 "Translate lines escaped with a semicolon: ;"
329 name, _, args = content.partition(' ')
331 name, _, args = content.partition(' ')
330 return '%s("%s")' % (name, args)
332 return '%s("%s")' % (name, args)
331
333
332 def _tr_paren(content):
334 def _tr_paren(content):
333 "Translate lines escaped with a slash: /"
335 "Translate lines escaped with a slash: /"
334 name, _, args = content.partition(' ')
336 name, _, args = content.partition(' ')
335 return '%s(%s)' % (name, ", ".join(args.split()))
337 return '%s(%s)' % (name, ", ".join(args.split()))
336
338
337 tr = { ESC_SHELL : 'get_ipython().system({!r})'.format,
339 tr = { ESC_SHELL : 'get_ipython().system({!r})'.format,
338 ESC_SH_CAP : 'get_ipython().getoutput({!r})'.format,
340 ESC_SH_CAP : 'get_ipython().getoutput({!r})'.format,
339 ESC_HELP : _tr_help,
341 ESC_HELP : _tr_help,
340 ESC_HELP2 : _tr_help2,
342 ESC_HELP2 : _tr_help2,
341 ESC_MAGIC : _tr_magic,
343 ESC_MAGIC : _tr_magic,
342 ESC_QUOTE : _tr_quote,
344 ESC_QUOTE : _tr_quote,
343 ESC_QUOTE2 : _tr_quote2,
345 ESC_QUOTE2 : _tr_quote2,
344 ESC_PAREN : _tr_paren }
346 ESC_PAREN : _tr_paren }
345
347
346 class EscapedCommand(TokenTransformBase):
348 class EscapedCommand(TokenTransformBase):
347 """Transformer for escaped commands like %foo, !foo, or /foo"""
349 """Transformer for escaped commands like %foo, !foo, or /foo"""
348 @classmethod
350 @classmethod
349 def find(cls, tokens_by_line):
351 def find(cls, tokens_by_line):
350 """Find the first escaped command (%foo, !foo, etc.) in the cell.
352 """Find the first escaped command (%foo, !foo, etc.) in the cell.
351 """
353 """
352 for line in tokens_by_line:
354 for line in tokens_by_line:
353 ix = 0
355 ix = 0
354 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
356 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
355 ix += 1
357 ix += 1
356 if line[ix].string in ESCAPE_SINGLES:
358 if line[ix].string in ESCAPE_SINGLES:
357 return cls(line[ix].start)
359 return cls(line[ix].start)
358
360
359 def transform(self, lines):
361 def transform(self, lines):
360 """Transform an escaped line found by the ``find()`` classmethod.
362 """Transform an escaped line found by the ``find()`` classmethod.
361 """
363 """
362 start_line, start_col = self.start_line, self.start_col
364 start_line, start_col = self.start_line, self.start_col
363
365
364 indent = lines[start_line][:start_col]
366 indent = lines[start_line][:start_col]
365 end_line = find_end_of_continued_line(lines, start_line)
367 end_line = find_end_of_continued_line(lines, start_line)
366 line = assemble_continued_line(lines, (start_line, start_col), end_line)
368 line = assemble_continued_line(lines, (start_line, start_col), end_line)
367
369
368 if line[:2] in ESCAPE_DOUBLES:
370 if line[:2] in ESCAPE_DOUBLES:
369 escape, content = line[:2], line[2:]
371 escape, content = line[:2], line[2:]
370 else:
372 else:
371 escape, content = line[:1], line[1:]
373 escape, content = line[:1], line[1:]
372 call = tr[escape](content)
374 call = tr[escape](content)
373
375
374 lines_before = lines[:start_line]
376 lines_before = lines[:start_line]
375 new_line = indent + call + '\n'
377 new_line = indent + call + '\n'
376 lines_after = lines[end_line + 1:]
378 lines_after = lines[end_line + 1:]
377
379
378 return lines_before + [new_line] + lines_after
380 return lines_before + [new_line] + lines_after
379
381
380 _help_end_re = re.compile(r"""(%{0,2}
382 _help_end_re = re.compile(r"""(%{0,2}
381 [a-zA-Z_*][\w*]* # Variable name
383 [a-zA-Z_*][\w*]* # Variable name
382 (\.[a-zA-Z_*][\w*]*)* # .etc.etc
384 (\.[a-zA-Z_*][\w*]*)* # .etc.etc
383 )
385 )
384 (\?\??)$ # ? or ??
386 (\?\??)$ # ? or ??
385 """,
387 """,
386 re.VERBOSE)
388 re.VERBOSE)
387
389
388 class HelpEnd(TokenTransformBase):
390 class HelpEnd(TokenTransformBase):
389 """Transformer for help syntax: obj? and obj??"""
391 """Transformer for help syntax: obj? and obj??"""
390 # This needs to be higher priority (lower number) than EscapedCommand so
392 # This needs to be higher priority (lower number) than EscapedCommand so
391 # that inspecting magics (%foo?) works.
393 # that inspecting magics (%foo?) works.
392 priority = 5
394 priority = 5
393
395
394 def __init__(self, start, q_locn):
396 def __init__(self, start, q_locn):
395 super().__init__(start)
397 super().__init__(start)
396 self.q_line = q_locn[0] - 1 # Shift from 1-indexed to 0-indexed
398 self.q_line = q_locn[0] - 1 # Shift from 1-indexed to 0-indexed
397 self.q_col = q_locn[1]
399 self.q_col = q_locn[1]
398
400
399 @classmethod
401 @classmethod
400 def find(cls, tokens_by_line):
402 def find(cls, tokens_by_line):
401 """Find the first help command (foo?) in the cell.
403 """Find the first help command (foo?) in the cell.
402 """
404 """
403 for line in tokens_by_line:
405 for line in tokens_by_line:
404 # Last token is NEWLINE; look at last but one
406 # Last token is NEWLINE; look at last but one
405 if len(line) > 2 and line[-2].string == '?':
407 if len(line) > 2 and line[-2].string == '?':
406 # Find the first token that's not INDENT/DEDENT
408 # Find the first token that's not INDENT/DEDENT
407 ix = 0
409 ix = 0
408 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
410 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
409 ix += 1
411 ix += 1
410 return cls(line[ix].start, line[-2].start)
412 return cls(line[ix].start, line[-2].start)
411
413
412 def transform(self, lines):
414 def transform(self, lines):
413 """Transform a help command found by the ``find()`` classmethod.
415 """Transform a help command found by the ``find()`` classmethod.
414 """
416 """
415 piece = ''.join(lines[self.start_line:self.q_line+1])
417 piece = ''.join(lines[self.start_line:self.q_line+1])
416 indent, content = piece[:self.start_col], piece[self.start_col:]
418 indent, content = piece[:self.start_col], piece[self.start_col:]
417 lines_before = lines[:self.start_line]
419 lines_before = lines[:self.start_line]
418 lines_after = lines[self.q_line + 1:]
420 lines_after = lines[self.q_line + 1:]
419
421
420 m = _help_end_re.search(content)
422 m = _help_end_re.search(content)
421 if not m:
423 if not m:
422 raise SyntaxError(content)
424 raise SyntaxError(content)
423 assert m is not None, content
425 assert m is not None, content
424 target = m.group(1)
426 target = m.group(1)
425 esc = m.group(3)
427 esc = m.group(3)
426
428
427 # If we're mid-command, put it back on the next prompt for the user.
429 # If we're mid-command, put it back on the next prompt for the user.
428 next_input = None
430 next_input = None
429 if (not lines_before) and (not lines_after) \
431 if (not lines_before) and (not lines_after) \
430 and content.strip() != m.group(0):
432 and content.strip() != m.group(0):
431 next_input = content.rstrip('?\n')
433 next_input = content.rstrip('?\n')
432
434
433 call = _make_help_call(target, esc, next_input=next_input)
435 call = _make_help_call(target, esc, next_input=next_input)
434 new_line = indent + call + '\n'
436 new_line = indent + call + '\n'
435
437
436 return lines_before + [new_line] + lines_after
438 return lines_before + [new_line] + lines_after
437
439
438 def make_tokens_by_line(lines):
440 def make_tokens_by_line(lines):
439 """Tokenize a series of lines and group tokens by line.
441 """Tokenize a series of lines and group tokens by line.
440
442
441 The tokens for a multiline Python string or expression are
443 The tokens for a multiline Python string or expression are
442 grouped as one line.
444 grouped as one line.
443 """
445 """
444 # NL tokens are used inside multiline expressions, but also after blank
446 # NL tokens are used inside multiline expressions, but also after blank
445 # lines or comments. This is intentional - see https://bugs.python.org/issue17061
447 # lines or comments. This is intentional - see https://bugs.python.org/issue17061
446 # We want to group the former case together but split the latter, so we
448 # We want to group the former case together but split the latter, so we
447 # track parentheses level, similar to the internals of tokenize.
449 # track parentheses level, similar to the internals of tokenize.
448 NEWLINE, NL = tokenize.NEWLINE, tokenize.NL
450 NEWLINE, NL = tokenize.NEWLINE, tokenize.NL
449 tokens_by_line = [[]]
451 tokens_by_line = [[]]
450 parenlev = 0
452 parenlev = 0
451 try:
453 try:
452 for token in tokenize.generate_tokens(iter(lines).__next__):
454 for token in tokenize.generate_tokens(iter(lines).__next__):
453 tokens_by_line[-1].append(token)
455 tokens_by_line[-1].append(token)
454 if (token.type == NEWLINE) \
456 if (token.type == NEWLINE) \
455 or ((token.type == NL) and (parenlev <= 0)):
457 or ((token.type == NL) and (parenlev <= 0)):
456 tokens_by_line.append([])
458 tokens_by_line.append([])
457 elif token.string in {'(', '[', '{'}:
459 elif token.string in {'(', '[', '{'}:
458 parenlev += 1
460 parenlev += 1
459 elif token.string in {')', ']', '}'}:
461 elif token.string in {')', ']', '}'}:
460 if parenlev > 0:
462 if parenlev > 0:
461 parenlev -= 1
463 parenlev -= 1
462 except tokenize.TokenError:
464 except tokenize.TokenError:
463 # Input ended in a multiline string or expression. That's OK for us.
465 # Input ended in a multiline string or expression. That's OK for us.
464 pass
466 pass
465 if not tokens_by_line[-1]:
467 if not tokens_by_line[-1]:
466 tokens_by_line.pop()
468 tokens_by_line.pop()
467
469
468 return tokens_by_line
470 return tokens_by_line
469
471
470 def show_linewise_tokens(s: str):
472 def show_linewise_tokens(s: str):
471 """For investigation and debugging"""
473 """For investigation and debugging"""
472 if not s.endswith('\n'):
474 if not s.endswith('\n'):
473 s += '\n'
475 s += '\n'
474 lines = s.splitlines(keepends=True)
476 lines = s.splitlines(keepends=True)
475 for line in make_tokens_by_line(lines):
477 for line in make_tokens_by_line(lines):
476 print("Line -------")
478 print("Line -------")
477 for tokinfo in line:
479 for tokinfo in line:
478 print(" ", tokinfo)
480 print(" ", tokinfo)
479
481
480 # Arbitrary limit to prevent getting stuck in infinite loops
482 # Arbitrary limit to prevent getting stuck in infinite loops
481 TRANSFORM_LOOP_LIMIT = 500
483 TRANSFORM_LOOP_LIMIT = 500
482
484
483 class TransformerManager:
485 class TransformerManager:
484 """Applies various transformations to a cell or code block.
486 """Applies various transformations to a cell or code block.
485
487
486 The key methods for external use are ``transform_cell()``
488 The key methods for external use are ``transform_cell()``
487 and ``check_complete()``.
489 and ``check_complete()``.
488 """
490 """
489 def __init__(self):
491 def __init__(self):
490 self.cleanup_transforms = [
492 self.cleanup_transforms = [
491 leading_indent,
493 leading_indent,
492 classic_prompt,
494 classic_prompt,
493 ipython_prompt,
495 ipython_prompt,
494 ]
496 ]
495 self.line_transforms = [
497 self.line_transforms = [
496 cell_magic,
498 cell_magic,
497 ]
499 ]
498 self.token_transformers = [
500 self.token_transformers = [
499 MagicAssign,
501 MagicAssign,
500 SystemAssign,
502 SystemAssign,
501 EscapedCommand,
503 EscapedCommand,
502 HelpEnd,
504 HelpEnd,
503 ]
505 ]
504
506
505 def do_one_token_transform(self, lines):
507 def do_one_token_transform(self, lines):
506 """Find and run the transform earliest in the code.
508 """Find and run the transform earliest in the code.
507
509
508 Returns (changed, lines).
510 Returns (changed, lines).
509
511
510 This method is called repeatedly until changed is False, indicating
512 This method is called repeatedly until changed is False, indicating
511 that all available transformations are complete.
513 that all available transformations are complete.
512
514
513 The tokens following IPython special syntax might not be valid, so
515 The tokens following IPython special syntax might not be valid, so
514 the transformed code is retokenised every time to identify the next
516 the transformed code is retokenised every time to identify the next
515 piece of special syntax. Hopefully long code cells are mostly valid
517 piece of special syntax. Hopefully long code cells are mostly valid
516 Python, not using lots of IPython special syntax, so this shouldn't be
518 Python, not using lots of IPython special syntax, so this shouldn't be
517 a performance issue.
519 a performance issue.
518 """
520 """
519 tokens_by_line = make_tokens_by_line(lines)
521 tokens_by_line = make_tokens_by_line(lines)
520 candidates = []
522 candidates = []
521 for transformer_cls in self.token_transformers:
523 for transformer_cls in self.token_transformers:
522 transformer = transformer_cls.find(tokens_by_line)
524 transformer = transformer_cls.find(tokens_by_line)
523 if transformer:
525 if transformer:
524 candidates.append(transformer)
526 candidates.append(transformer)
525
527
526 if not candidates:
528 if not candidates:
527 # Nothing to transform
529 # Nothing to transform
528 return False, lines
530 return False, lines
529 ordered_transformers = sorted(candidates, key=TokenTransformBase.sortby)
531 ordered_transformers = sorted(candidates, key=TokenTransformBase.sortby)
530 for transformer in ordered_transformers:
532 for transformer in ordered_transformers:
531 try:
533 try:
532 return True, transformer.transform(lines)
534 return True, transformer.transform(lines)
533 except SyntaxError:
535 except SyntaxError:
534 pass
536 pass
535 return False, lines
537 return False, lines
536
538
537 def do_token_transforms(self, lines):
539 def do_token_transforms(self, lines):
538 for _ in range(TRANSFORM_LOOP_LIMIT):
540 for _ in range(TRANSFORM_LOOP_LIMIT):
539 changed, lines = self.do_one_token_transform(lines)
541 changed, lines = self.do_one_token_transform(lines)
540 if not changed:
542 if not changed:
541 return lines
543 return lines
542
544
543 raise RuntimeError("Input transformation still changing after "
545 raise RuntimeError("Input transformation still changing after "
544 "%d iterations. Aborting." % TRANSFORM_LOOP_LIMIT)
546 "%d iterations. Aborting." % TRANSFORM_LOOP_LIMIT)
545
547
546 def transform_cell(self, cell: str) -> str:
548 def transform_cell(self, cell: str) -> str:
547 """Transforms a cell of input code"""
549 """Transforms a cell of input code"""
548 if not cell.endswith('\n'):
550 if not cell.endswith('\n'):
549 cell += '\n' # Ensure the cell has a trailing newline
551 cell += '\n' # Ensure the cell has a trailing newline
550 lines = cell.splitlines(keepends=True)
552 lines = cell.splitlines(keepends=True)
551 for transform in self.cleanup_transforms + self.line_transforms:
553 for transform in self.cleanup_transforms + self.line_transforms:
552 lines = transform(lines)
554 lines = transform(lines)
553
555
554 lines = self.do_token_transforms(lines)
556 lines = self.do_token_transforms(lines)
555 return ''.join(lines)
557 return ''.join(lines)
556
558
557 def check_complete(self, cell: str):
559 def check_complete(self, cell: str):
558 """Return whether a block of code is ready to execute, or should be continued
560 """Return whether a block of code is ready to execute, or should be continued
559
561
560 Parameters
562 Parameters
561 ----------
563 ----------
562 source : string
564 source : string
563 Python input code, which can be multiline.
565 Python input code, which can be multiline.
564
566
565 Returns
567 Returns
566 -------
568 -------
567 status : str
569 status : str
568 One of 'complete', 'incomplete', or 'invalid' if source is not a
570 One of 'complete', 'incomplete', or 'invalid' if source is not a
569 prefix of valid code.
571 prefix of valid code.
570 indent_spaces : int or None
572 indent_spaces : int or None
571 The number of spaces by which to indent the next line of code. If
573 The number of spaces by which to indent the next line of code. If
572 status is not 'incomplete', this is None.
574 status is not 'incomplete', this is None.
573 """
575 """
574 if not cell.endswith('\n'):
576 if not cell.endswith('\n'):
575 cell += '\n' # Ensure the cell has a trailing newline
577 cell += '\n' # Ensure the cell has a trailing newline
576 lines = cell.splitlines(keepends=True)
578 lines = cell.splitlines(keepends=True)
577 if lines[-1][:-1].endswith('\\'):
579 if lines[-1][:-1].endswith('\\'):
578 # Explicit backslash continuation
580 # Explicit backslash continuation
579 return 'incomplete', find_last_indent(lines)
581 return 'incomplete', find_last_indent(lines)
580
582
581 try:
583 try:
582 for transform in self.cleanup_transforms:
584 for transform in self.cleanup_transforms:
583 lines = transform(lines)
585 lines = transform(lines)
584 except SyntaxError:
586 except SyntaxError:
585 return 'invalid', None
587 return 'invalid', None
586
588
587 if lines[0].startswith('%%'):
589 if lines[0].startswith('%%'):
588 # Special case for cell magics - completion marked by blank line
590 # Special case for cell magics - completion marked by blank line
589 if lines[-1].strip():
591 if lines[-1].strip():
590 return 'incomplete', find_last_indent(lines)
592 return 'incomplete', find_last_indent(lines)
591 else:
593 else:
592 return 'complete', None
594 return 'complete', None
593
595
594 try:
596 try:
595 for transform in self.line_transforms:
597 for transform in self.line_transforms:
596 lines = transform(lines)
598 lines = transform(lines)
597 lines = self.do_token_transforms(lines)
599 lines = self.do_token_transforms(lines)
598 except SyntaxError:
600 except SyntaxError:
599 return 'invalid', None
601 return 'invalid', None
600
602
601 tokens_by_line = make_tokens_by_line(lines)
603 tokens_by_line = make_tokens_by_line(lines)
602 if not tokens_by_line:
604 if not tokens_by_line:
603 return 'incomplete', find_last_indent(lines)
605 return 'incomplete', find_last_indent(lines)
604 if tokens_by_line[-1][-1].type != tokenize.ENDMARKER:
606 if tokens_by_line[-1][-1].type != tokenize.ENDMARKER:
605 # We're in a multiline string or expression
607 # We're in a multiline string or expression
606 return 'incomplete', find_last_indent(lines)
608 return 'incomplete', find_last_indent(lines)
607 if len(tokens_by_line) == 1:
609 if len(tokens_by_line) == 1:
608 return 'incomplete', find_last_indent(lines)
610 return 'incomplete', find_last_indent(lines)
609 # Find the last token on the previous line that's not NEWLINE or COMMENT
611 # Find the last token on the previous line that's not NEWLINE or COMMENT
610 toks_last_line = tokens_by_line[-2]
612 toks_last_line = tokens_by_line[-2]
611 ix = len(toks_last_line) - 1
613 ix = len(toks_last_line) - 1
612 while ix >= 0 and toks_last_line[ix].type in {tokenize.NEWLINE,
614 while ix >= 0 and toks_last_line[ix].type in {tokenize.NEWLINE,
613 tokenize.COMMENT}:
615 tokenize.COMMENT}:
614 ix -= 1
616 ix -= 1
615
617
616 if toks_last_line[ix].string == ':':
618 if toks_last_line[ix].string == ':':
617 # The last line starts a block (e.g. 'if foo:')
619 # The last line starts a block (e.g. 'if foo:')
618 ix = 0
620 ix = 0
619 while toks_last_line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
621 while toks_last_line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
620 ix += 1
622 ix += 1
621 indent = toks_last_line[ix].start[1]
623 indent = toks_last_line[ix].start[1]
622 return 'incomplete', indent + 4
624 return 'incomplete', indent + 4
623
625
624 # If there's a blank line at the end, assume we're ready to execute.
626 # If there's a blank line at the end, assume we're ready to execute.
625 if not lines[-1].strip():
627 if not lines[-1].strip():
626 return 'complete', None
628 return 'complete', None
627
629
628 # At this point, our checks think the code is complete (or invalid).
630 # At this point, our checks think the code is complete (or invalid).
629 # We'll use codeop.compile_command to check this with the real parser.
631 # We'll use codeop.compile_command to check this with the real parser.
630
632
631 try:
633 try:
632 with warnings.catch_warnings():
634 with warnings.catch_warnings():
633 warnings.simplefilter('error', SyntaxWarning)
635 warnings.simplefilter('error', SyntaxWarning)
634 res = compile_command(''.join(lines), symbol='exec')
636 res = compile_command(''.join(lines), symbol='exec')
635 except (SyntaxError, OverflowError, ValueError, TypeError,
637 except (SyntaxError, OverflowError, ValueError, TypeError,
636 MemoryError, SyntaxWarning):
638 MemoryError, SyntaxWarning):
637 return 'invalid', None
639 return 'invalid', None
638 else:
640 else:
639 if res is None:
641 if res is None:
640 return 'incomplete', find_last_indent(lines)
642 return 'incomplete', find_last_indent(lines)
641 return 'complete', None
643 return 'complete', None
642
644
643
645
644 def find_last_indent(lines):
646 def find_last_indent(lines):
645 m = _indent_re.match(lines[-1])
647 m = _indent_re.match(lines[-1])
646 if not m:
648 if not m:
647 return 0
649 return 0
648 return len(m.group(0).replace('\t', ' '*4))
650 return len(m.group(0).replace('\t', ' '*4))
General Comments 0
You need to be logged in to leave comments. Login now