##// END OF EJS Templates
Include empty lines condition in PromptStipper and cell_magic.
Tony Fast -
Show More
@@ -1,650 +1,652 b''
1 """Input transformer machinery to support IPython special syntax.
1 """Input transformer machinery to support IPython special syntax.
2
2
3 This includes the machinery to recognise and transform ``%magic`` commands,
3 This includes the machinery to recognise and transform ``%magic`` commands,
4 ``!system`` commands, ``help?`` querying, prompt stripping, and so forth.
4 ``!system`` commands, ``help?`` querying, prompt stripping, and so forth.
5
5
6 Added: IPython 7.0. Replaces inputsplitter and inputtransformer which were
6 Added: IPython 7.0. Replaces inputsplitter and inputtransformer which were
7 deprecated in 7.0.
7 deprecated in 7.0.
8 """
8 """
9
9
10 # Copyright (c) IPython Development Team.
10 # Copyright (c) IPython Development Team.
11 # Distributed under the terms of the Modified BSD License.
11 # Distributed under the terms of the Modified BSD License.
12
12
13 from codeop import compile_command
13 from codeop import compile_command
14 import re
14 import re
15 import tokenize
15 import tokenize
16 from typing import List, Tuple
16 from typing import List, Tuple
17 import warnings
17 import warnings
18
18
19 _indent_re = re.compile(r'^[ \t]+')
19 _indent_re = re.compile(r'^[ \t]+')
20
20
21 def leading_indent(lines):
21 def leading_indent(lines):
22 """Remove leading indentation.
22 """Remove leading indentation.
23
23
24 If the first line starts with a spaces or tabs, the same whitespace will be
24 If the first line starts with a spaces or tabs, the same whitespace will be
25 removed from each following line in the cell.
25 removed from each following line in the cell.
26 """
26 """
27 if not lines:
27 if not lines:
28 return lines
28 return lines
29 m = _indent_re.match(lines[0])
29 m = _indent_re.match(lines[0])
30 if not m:
30 if not m:
31 return lines
31 return lines
32 space = m.group(0)
32 space = m.group(0)
33 n = len(space)
33 n = len(space)
34 return [l[n:] if l.startswith(space) else l
34 return [l[n:] if l.startswith(space) else l
35 for l in lines]
35 for l in lines]
36
36
37 class PromptStripper:
37 class PromptStripper:
38 """Remove matching input prompts from a block of input.
38 """Remove matching input prompts from a block of input.
39
39
40 Parameters
40 Parameters
41 ----------
41 ----------
42 prompt_re : regular expression
42 prompt_re : regular expression
43 A regular expression matching any input prompt (including continuation,
43 A regular expression matching any input prompt (including continuation,
44 e.g. ``...``)
44 e.g. ``...``)
45 initial_re : regular expression, optional
45 initial_re : regular expression, optional
46 A regular expression matching only the initial prompt, but not continuation.
46 A regular expression matching only the initial prompt, but not continuation.
47 If no initial expression is given, prompt_re will be used everywhere.
47 If no initial expression is given, prompt_re will be used everywhere.
48 Used mainly for plain Python prompts (``>>>``), where the continuation prompt
48 Used mainly for plain Python prompts (``>>>``), where the continuation prompt
49 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
49 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
50
50
51 If initial_re and prompt_re differ,
51 If initial_re and prompt_re differ,
52 only initial_re will be tested against the first line.
52 only initial_re will be tested against the first line.
53 If any prompt is found on the first two lines,
53 If any prompt is found on the first two lines,
54 prompts will be stripped from the rest of the block.
54 prompts will be stripped from the rest of the block.
55 """
55 """
56 def __init__(self, prompt_re, initial_re=None):
56 def __init__(self, prompt_re, initial_re=None):
57 self.prompt_re = prompt_re
57 self.prompt_re = prompt_re
58 self.initial_re = initial_re or prompt_re
58 self.initial_re = initial_re or prompt_re
59
59
60 def _strip(self, lines):
60 def _strip(self, lines):
61 return [self.prompt_re.sub('', l, count=1) for l in lines]
61 return [self.prompt_re.sub('', l, count=1) for l in lines]
62
62
63 def __call__(self, lines):
63 def __call__(self, lines):
64 if not lines:
65 return lines
64 if self.initial_re.match(lines[0]) or \
66 if self.initial_re.match(lines[0]) or \
65 (len(lines) > 1 and self.prompt_re.match(lines[1])):
67 (len(lines) > 1 and self.prompt_re.match(lines[1])):
66 return self._strip(lines)
68 return self._strip(lines)
67 return lines
69 return lines
68
70
69 classic_prompt = PromptStripper(
71 classic_prompt = PromptStripper(
70 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
72 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
71 initial_re=re.compile(r'^>>>( |$)')
73 initial_re=re.compile(r'^>>>( |$)')
72 )
74 )
73
75
74 ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)'))
76 ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)'))
75
77
76 def cell_magic(lines):
78 def cell_magic(lines):
77 if not lines[0].startswith('%%'):
79 if not lines or not lines[0].startswith('%%'):
78 return lines
80 return lines
79 if re.match('%%\w+\?', lines[0]):
81 if re.match('%%\w+\?', lines[0]):
80 # This case will be handled by help_end
82 # This case will be handled by help_end
81 return lines
83 return lines
82 magic_name, _, first_line = lines[0][2:-1].partition(' ')
84 magic_name, _, first_line = lines[0][2:-1].partition(' ')
83 body = ''.join(lines[1:])
85 body = ''.join(lines[1:])
84 return ['get_ipython().run_cell_magic(%r, %r, %r)\n'
86 return ['get_ipython().run_cell_magic(%r, %r, %r)\n'
85 % (magic_name, first_line, body)]
87 % (magic_name, first_line, body)]
86
88
87
89
88 def _find_assign_op(token_line):
90 def _find_assign_op(token_line):
89 """Get the index of the first assignment in the line ('=' not inside brackets)
91 """Get the index of the first assignment in the line ('=' not inside brackets)
90
92
91 Note: We don't try to support multiple special assignment (a = b = %foo)
93 Note: We don't try to support multiple special assignment (a = b = %foo)
92 """
94 """
93 paren_level = 0
95 paren_level = 0
94 for i, ti in enumerate(token_line):
96 for i, ti in enumerate(token_line):
95 s = ti.string
97 s = ti.string
96 if s == '=' and paren_level == 0:
98 if s == '=' and paren_level == 0:
97 return i
99 return i
98 if s in '([{':
100 if s in '([{':
99 paren_level += 1
101 paren_level += 1
100 elif s in ')]}':
102 elif s in ')]}':
101 if paren_level > 0:
103 if paren_level > 0:
102 paren_level -= 1
104 paren_level -= 1
103
105
104 def find_end_of_continued_line(lines, start_line: int):
106 def find_end_of_continued_line(lines, start_line: int):
105 """Find the last line of a line explicitly extended using backslashes.
107 """Find the last line of a line explicitly extended using backslashes.
106
108
107 Uses 0-indexed line numbers.
109 Uses 0-indexed line numbers.
108 """
110 """
109 end_line = start_line
111 end_line = start_line
110 while lines[end_line].endswith('\\\n'):
112 while lines[end_line].endswith('\\\n'):
111 end_line += 1
113 end_line += 1
112 if end_line >= len(lines):
114 if end_line >= len(lines):
113 break
115 break
114 return end_line
116 return end_line
115
117
116 def assemble_continued_line(lines, start: Tuple[int, int], end_line: int):
118 def assemble_continued_line(lines, start: Tuple[int, int], end_line: int):
117 """Assemble a single line from multiple continued line pieces
119 """Assemble a single line from multiple continued line pieces
118
120
119 Continued lines are lines ending in ``\``, and the line following the last
121 Continued lines are lines ending in ``\``, and the line following the last
120 ``\`` in the block.
122 ``\`` in the block.
121
123
122 For example, this code continues over multiple lines::
124 For example, this code continues over multiple lines::
123
125
124 if (assign_ix is not None) \
126 if (assign_ix is not None) \
125 and (len(line) >= assign_ix + 2) \
127 and (len(line) >= assign_ix + 2) \
126 and (line[assign_ix+1].string == '%') \
128 and (line[assign_ix+1].string == '%') \
127 and (line[assign_ix+2].type == tokenize.NAME):
129 and (line[assign_ix+2].type == tokenize.NAME):
128
130
129 This statement contains four continued line pieces.
131 This statement contains four continued line pieces.
130 Assembling these pieces into a single line would give::
132 Assembling these pieces into a single line would give::
131
133
132 if (assign_ix is not None) and (len(line) >= assign_ix + 2) and (line[...
134 if (assign_ix is not None) and (len(line) >= assign_ix + 2) and (line[...
133
135
134 This uses 0-indexed line numbers. *start* is (lineno, colno).
136 This uses 0-indexed line numbers. *start* is (lineno, colno).
135
137
136 Used to allow ``%magic`` and ``!system`` commands to be continued over
138 Used to allow ``%magic`` and ``!system`` commands to be continued over
137 multiple lines.
139 multiple lines.
138 """
140 """
139 parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1]
141 parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1]
140 return ' '.join([p[:-2] for p in parts[:-1]] # Strip backslash+newline
142 return ' '.join([p[:-2] for p in parts[:-1]] # Strip backslash+newline
141 + [parts[-1][:-1]]) # Strip newline from last line
143 + [parts[-1][:-1]]) # Strip newline from last line
142
144
143 class TokenTransformBase:
145 class TokenTransformBase:
144 """Base class for transformations which examine tokens.
146 """Base class for transformations which examine tokens.
145
147
146 Special syntax should not be transformed when it occurs inside strings or
148 Special syntax should not be transformed when it occurs inside strings or
147 comments. This is hard to reliably avoid with regexes. The solution is to
149 comments. This is hard to reliably avoid with regexes. The solution is to
148 tokenise the code as Python, and recognise the special syntax in the tokens.
150 tokenise the code as Python, and recognise the special syntax in the tokens.
149
151
150 IPython's special syntax is not valid Python syntax, so tokenising may go
152 IPython's special syntax is not valid Python syntax, so tokenising may go
151 wrong after the special syntax starts. These classes therefore find and
153 wrong after the special syntax starts. These classes therefore find and
152 transform *one* instance of special syntax at a time into regular Python
154 transform *one* instance of special syntax at a time into regular Python
153 syntax. After each transformation, tokens are regenerated to find the next
155 syntax. After each transformation, tokens are regenerated to find the next
154 piece of special syntax.
156 piece of special syntax.
155
157
156 Subclasses need to implement one class method (find)
158 Subclasses need to implement one class method (find)
157 and one regular method (transform).
159 and one regular method (transform).
158
160
159 The priority attribute can select which transformation to apply if multiple
161 The priority attribute can select which transformation to apply if multiple
160 transformers match in the same place. Lower numbers have higher priority.
162 transformers match in the same place. Lower numbers have higher priority.
161 This allows "%magic?" to be turned into a help call rather than a magic call.
163 This allows "%magic?" to be turned into a help call rather than a magic call.
162 """
164 """
163 # Lower numbers -> higher priority (for matches in the same location)
165 # Lower numbers -> higher priority (for matches in the same location)
164 priority = 10
166 priority = 10
165
167
166 def sortby(self):
168 def sortby(self):
167 return self.start_line, self.start_col, self.priority
169 return self.start_line, self.start_col, self.priority
168
170
169 def __init__(self, start):
171 def __init__(self, start):
170 self.start_line = start[0] - 1 # Shift from 1-index to 0-index
172 self.start_line = start[0] - 1 # Shift from 1-index to 0-index
171 self.start_col = start[1]
173 self.start_col = start[1]
172
174
173 @classmethod
175 @classmethod
174 def find(cls, tokens_by_line):
176 def find(cls, tokens_by_line):
175 """Find one instance of special syntax in the provided tokens.
177 """Find one instance of special syntax in the provided tokens.
176
178
177 Tokens are grouped into logical lines for convenience,
179 Tokens are grouped into logical lines for convenience,
178 so it is easy to e.g. look at the first token of each line.
180 so it is easy to e.g. look at the first token of each line.
179 *tokens_by_line* is a list of lists of tokenize.TokenInfo objects.
181 *tokens_by_line* is a list of lists of tokenize.TokenInfo objects.
180
182
181 This should return an instance of its class, pointing to the start
183 This should return an instance of its class, pointing to the start
182 position it has found, or None if it found no match.
184 position it has found, or None if it found no match.
183 """
185 """
184 raise NotImplementedError
186 raise NotImplementedError
185
187
186 def transform(self, lines: List[str]):
188 def transform(self, lines: List[str]):
187 """Transform one instance of special syntax found by ``find()``
189 """Transform one instance of special syntax found by ``find()``
188
190
189 Takes a list of strings representing physical lines,
191 Takes a list of strings representing physical lines,
190 returns a similar list of transformed lines.
192 returns a similar list of transformed lines.
191 """
193 """
192 raise NotImplementedError
194 raise NotImplementedError
193
195
194 class MagicAssign(TokenTransformBase):
196 class MagicAssign(TokenTransformBase):
195 """Transformer for assignments from magics (a = %foo)"""
197 """Transformer for assignments from magics (a = %foo)"""
196 @classmethod
198 @classmethod
197 def find(cls, tokens_by_line):
199 def find(cls, tokens_by_line):
198 """Find the first magic assignment (a = %foo) in the cell.
200 """Find the first magic assignment (a = %foo) in the cell.
199 """
201 """
200 for line in tokens_by_line:
202 for line in tokens_by_line:
201 assign_ix = _find_assign_op(line)
203 assign_ix = _find_assign_op(line)
202 if (assign_ix is not None) \
204 if (assign_ix is not None) \
203 and (len(line) >= assign_ix + 2) \
205 and (len(line) >= assign_ix + 2) \
204 and (line[assign_ix+1].string == '%') \
206 and (line[assign_ix+1].string == '%') \
205 and (line[assign_ix+2].type == tokenize.NAME):
207 and (line[assign_ix+2].type == tokenize.NAME):
206 return cls(line[assign_ix+1].start)
208 return cls(line[assign_ix+1].start)
207
209
208 def transform(self, lines: List[str]):
210 def transform(self, lines: List[str]):
209 """Transform a magic assignment found by the ``find()`` classmethod.
211 """Transform a magic assignment found by the ``find()`` classmethod.
210 """
212 """
211 start_line, start_col = self.start_line, self.start_col
213 start_line, start_col = self.start_line, self.start_col
212 lhs = lines[start_line][:start_col]
214 lhs = lines[start_line][:start_col]
213 end_line = find_end_of_continued_line(lines, start_line)
215 end_line = find_end_of_continued_line(lines, start_line)
214 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
216 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
215 assert rhs.startswith('%'), rhs
217 assert rhs.startswith('%'), rhs
216 magic_name, _, args = rhs[1:].partition(' ')
218 magic_name, _, args = rhs[1:].partition(' ')
217
219
218 lines_before = lines[:start_line]
220 lines_before = lines[:start_line]
219 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
221 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
220 new_line = lhs + call + '\n'
222 new_line = lhs + call + '\n'
221 lines_after = lines[end_line+1:]
223 lines_after = lines[end_line+1:]
222
224
223 return lines_before + [new_line] + lines_after
225 return lines_before + [new_line] + lines_after
224
226
225
227
226 class SystemAssign(TokenTransformBase):
228 class SystemAssign(TokenTransformBase):
227 """Transformer for assignments from system commands (a = !foo)"""
229 """Transformer for assignments from system commands (a = !foo)"""
228 @classmethod
230 @classmethod
229 def find(cls, tokens_by_line):
231 def find(cls, tokens_by_line):
230 """Find the first system assignment (a = !foo) in the cell.
232 """Find the first system assignment (a = !foo) in the cell.
231 """
233 """
232 for line in tokens_by_line:
234 for line in tokens_by_line:
233 assign_ix = _find_assign_op(line)
235 assign_ix = _find_assign_op(line)
234 if (assign_ix is not None) \
236 if (assign_ix is not None) \
235 and (len(line) >= assign_ix + 2) \
237 and (len(line) >= assign_ix + 2) \
236 and (line[assign_ix + 1].type == tokenize.ERRORTOKEN):
238 and (line[assign_ix + 1].type == tokenize.ERRORTOKEN):
237 ix = assign_ix + 1
239 ix = assign_ix + 1
238
240
239 while ix < len(line) and line[ix].type == tokenize.ERRORTOKEN:
241 while ix < len(line) and line[ix].type == tokenize.ERRORTOKEN:
240 if line[ix].string == '!':
242 if line[ix].string == '!':
241 return cls(line[ix].start)
243 return cls(line[ix].start)
242 elif not line[ix].string.isspace():
244 elif not line[ix].string.isspace():
243 break
245 break
244 ix += 1
246 ix += 1
245
247
246 def transform(self, lines: List[str]):
248 def transform(self, lines: List[str]):
247 """Transform a system assignment found by the ``find()`` classmethod.
249 """Transform a system assignment found by the ``find()`` classmethod.
248 """
250 """
249 start_line, start_col = self.start_line, self.start_col
251 start_line, start_col = self.start_line, self.start_col
250
252
251 lhs = lines[start_line][:start_col]
253 lhs = lines[start_line][:start_col]
252 end_line = find_end_of_continued_line(lines, start_line)
254 end_line = find_end_of_continued_line(lines, start_line)
253 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
255 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
254 assert rhs.startswith('!'), rhs
256 assert rhs.startswith('!'), rhs
255 cmd = rhs[1:]
257 cmd = rhs[1:]
256
258
257 lines_before = lines[:start_line]
259 lines_before = lines[:start_line]
258 call = "get_ipython().getoutput({!r})".format(cmd)
260 call = "get_ipython().getoutput({!r})".format(cmd)
259 new_line = lhs + call + '\n'
261 new_line = lhs + call + '\n'
260 lines_after = lines[end_line + 1:]
262 lines_after = lines[end_line + 1:]
261
263
262 return lines_before + [new_line] + lines_after
264 return lines_before + [new_line] + lines_after
263
265
264 # The escape sequences that define the syntax transformations IPython will
266 # The escape sequences that define the syntax transformations IPython will
265 # apply to user input. These can NOT be just changed here: many regular
267 # apply to user input. These can NOT be just changed here: many regular
266 # expressions and other parts of the code may use their hardcoded values, and
268 # expressions and other parts of the code may use their hardcoded values, and
267 # for all intents and purposes they constitute the 'IPython syntax', so they
269 # for all intents and purposes they constitute the 'IPython syntax', so they
268 # should be considered fixed.
270 # should be considered fixed.
269
271
270 ESC_SHELL = '!' # Send line to underlying system shell
272 ESC_SHELL = '!' # Send line to underlying system shell
271 ESC_SH_CAP = '!!' # Send line to system shell and capture output
273 ESC_SH_CAP = '!!' # Send line to system shell and capture output
272 ESC_HELP = '?' # Find information about object
274 ESC_HELP = '?' # Find information about object
273 ESC_HELP2 = '??' # Find extra-detailed information about object
275 ESC_HELP2 = '??' # Find extra-detailed information about object
274 ESC_MAGIC = '%' # Call magic function
276 ESC_MAGIC = '%' # Call magic function
275 ESC_MAGIC2 = '%%' # Call cell-magic function
277 ESC_MAGIC2 = '%%' # Call cell-magic function
276 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
278 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
277 ESC_QUOTE2 = ';' # Quote all args as a single string, call
279 ESC_QUOTE2 = ';' # Quote all args as a single string, call
278 ESC_PAREN = '/' # Call first argument with rest of line as arguments
280 ESC_PAREN = '/' # Call first argument with rest of line as arguments
279
281
280 ESCAPE_SINGLES = {'!', '?', '%', ',', ';', '/'}
282 ESCAPE_SINGLES = {'!', '?', '%', ',', ';', '/'}
281 ESCAPE_DOUBLES = {'!!', '??'} # %% (cell magic) is handled separately
283 ESCAPE_DOUBLES = {'!!', '??'} # %% (cell magic) is handled separately
282
284
283 def _make_help_call(target, esc, next_input=None):
285 def _make_help_call(target, esc, next_input=None):
284 """Prepares a pinfo(2)/psearch call from a target name and the escape
286 """Prepares a pinfo(2)/psearch call from a target name and the escape
285 (i.e. ? or ??)"""
287 (i.e. ? or ??)"""
286 method = 'pinfo2' if esc == '??' \
288 method = 'pinfo2' if esc == '??' \
287 else 'psearch' if '*' in target \
289 else 'psearch' if '*' in target \
288 else 'pinfo'
290 else 'pinfo'
289 arg = " ".join([method, target])
291 arg = " ".join([method, target])
290 #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
292 #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
291 t_magic_name, _, t_magic_arg_s = arg.partition(' ')
293 t_magic_name, _, t_magic_arg_s = arg.partition(' ')
292 t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
294 t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
293 if next_input is None:
295 if next_input is None:
294 return 'get_ipython().run_line_magic(%r, %r)' % (t_magic_name, t_magic_arg_s)
296 return 'get_ipython().run_line_magic(%r, %r)' % (t_magic_name, t_magic_arg_s)
295 else:
297 else:
296 return 'get_ipython().set_next_input(%r);get_ipython().run_line_magic(%r, %r)' % \
298 return 'get_ipython().set_next_input(%r);get_ipython().run_line_magic(%r, %r)' % \
297 (next_input, t_magic_name, t_magic_arg_s)
299 (next_input, t_magic_name, t_magic_arg_s)
298
300
299 def _tr_help(content):
301 def _tr_help(content):
300 """Translate lines escaped with: ?
302 """Translate lines escaped with: ?
301
303
302 A naked help line should fire the intro help screen (shell.show_usage())
304 A naked help line should fire the intro help screen (shell.show_usage())
303 """
305 """
304 if not content:
306 if not content:
305 return 'get_ipython().show_usage()'
307 return 'get_ipython().show_usage()'
306
308
307 return _make_help_call(content, '?')
309 return _make_help_call(content, '?')
308
310
309 def _tr_help2(content):
311 def _tr_help2(content):
310 """Translate lines escaped with: ??
312 """Translate lines escaped with: ??
311
313
312 A naked help line should fire the intro help screen (shell.show_usage())
314 A naked help line should fire the intro help screen (shell.show_usage())
313 """
315 """
314 if not content:
316 if not content:
315 return 'get_ipython().show_usage()'
317 return 'get_ipython().show_usage()'
316
318
317 return _make_help_call(content, '??')
319 return _make_help_call(content, '??')
318
320
319 def _tr_magic(content):
321 def _tr_magic(content):
320 "Translate lines escaped with a percent sign: %"
322 "Translate lines escaped with a percent sign: %"
321 name, _, args = content.partition(' ')
323 name, _, args = content.partition(' ')
322 return 'get_ipython().run_line_magic(%r, %r)' % (name, args)
324 return 'get_ipython().run_line_magic(%r, %r)' % (name, args)
323
325
324 def _tr_quote(content):
326 def _tr_quote(content):
325 "Translate lines escaped with a comma: ,"
327 "Translate lines escaped with a comma: ,"
326 name, _, args = content.partition(' ')
328 name, _, args = content.partition(' ')
327 return '%s("%s")' % (name, '", "'.join(args.split()) )
329 return '%s("%s")' % (name, '", "'.join(args.split()) )
328
330
329 def _tr_quote2(content):
331 def _tr_quote2(content):
330 "Translate lines escaped with a semicolon: ;"
332 "Translate lines escaped with a semicolon: ;"
331 name, _, args = content.partition(' ')
333 name, _, args = content.partition(' ')
332 return '%s("%s")' % (name, args)
334 return '%s("%s")' % (name, args)
333
335
334 def _tr_paren(content):
336 def _tr_paren(content):
335 "Translate lines escaped with a slash: /"
337 "Translate lines escaped with a slash: /"
336 name, _, args = content.partition(' ')
338 name, _, args = content.partition(' ')
337 return '%s(%s)' % (name, ", ".join(args.split()))
339 return '%s(%s)' % (name, ", ".join(args.split()))
338
340
339 tr = { ESC_SHELL : 'get_ipython().system({!r})'.format,
341 tr = { ESC_SHELL : 'get_ipython().system({!r})'.format,
340 ESC_SH_CAP : 'get_ipython().getoutput({!r})'.format,
342 ESC_SH_CAP : 'get_ipython().getoutput({!r})'.format,
341 ESC_HELP : _tr_help,
343 ESC_HELP : _tr_help,
342 ESC_HELP2 : _tr_help2,
344 ESC_HELP2 : _tr_help2,
343 ESC_MAGIC : _tr_magic,
345 ESC_MAGIC : _tr_magic,
344 ESC_QUOTE : _tr_quote,
346 ESC_QUOTE : _tr_quote,
345 ESC_QUOTE2 : _tr_quote2,
347 ESC_QUOTE2 : _tr_quote2,
346 ESC_PAREN : _tr_paren }
348 ESC_PAREN : _tr_paren }
347
349
348 class EscapedCommand(TokenTransformBase):
350 class EscapedCommand(TokenTransformBase):
349 """Transformer for escaped commands like %foo, !foo, or /foo"""
351 """Transformer for escaped commands like %foo, !foo, or /foo"""
350 @classmethod
352 @classmethod
351 def find(cls, tokens_by_line):
353 def find(cls, tokens_by_line):
352 """Find the first escaped command (%foo, !foo, etc.) in the cell.
354 """Find the first escaped command (%foo, !foo, etc.) in the cell.
353 """
355 """
354 for line in tokens_by_line:
356 for line in tokens_by_line:
355 ix = 0
357 ix = 0
356 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
358 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
357 ix += 1
359 ix += 1
358 if line[ix].string in ESCAPE_SINGLES:
360 if line[ix].string in ESCAPE_SINGLES:
359 return cls(line[ix].start)
361 return cls(line[ix].start)
360
362
361 def transform(self, lines):
363 def transform(self, lines):
362 """Transform an escaped line found by the ``find()`` classmethod.
364 """Transform an escaped line found by the ``find()`` classmethod.
363 """
365 """
364 start_line, start_col = self.start_line, self.start_col
366 start_line, start_col = self.start_line, self.start_col
365
367
366 indent = lines[start_line][:start_col]
368 indent = lines[start_line][:start_col]
367 end_line = find_end_of_continued_line(lines, start_line)
369 end_line = find_end_of_continued_line(lines, start_line)
368 line = assemble_continued_line(lines, (start_line, start_col), end_line)
370 line = assemble_continued_line(lines, (start_line, start_col), end_line)
369
371
370 if line[:2] in ESCAPE_DOUBLES:
372 if line[:2] in ESCAPE_DOUBLES:
371 escape, content = line[:2], line[2:]
373 escape, content = line[:2], line[2:]
372 else:
374 else:
373 escape, content = line[:1], line[1:]
375 escape, content = line[:1], line[1:]
374 call = tr[escape](content)
376 call = tr[escape](content)
375
377
376 lines_before = lines[:start_line]
378 lines_before = lines[:start_line]
377 new_line = indent + call + '\n'
379 new_line = indent + call + '\n'
378 lines_after = lines[end_line + 1:]
380 lines_after = lines[end_line + 1:]
379
381
380 return lines_before + [new_line] + lines_after
382 return lines_before + [new_line] + lines_after
381
383
382 _help_end_re = re.compile(r"""(%{0,2}
384 _help_end_re = re.compile(r"""(%{0,2}
383 [a-zA-Z_*][\w*]* # Variable name
385 [a-zA-Z_*][\w*]* # Variable name
384 (\.[a-zA-Z_*][\w*]*)* # .etc.etc
386 (\.[a-zA-Z_*][\w*]*)* # .etc.etc
385 )
387 )
386 (\?\??)$ # ? or ??
388 (\?\??)$ # ? or ??
387 """,
389 """,
388 re.VERBOSE)
390 re.VERBOSE)
389
391
390 class HelpEnd(TokenTransformBase):
392 class HelpEnd(TokenTransformBase):
391 """Transformer for help syntax: obj? and obj??"""
393 """Transformer for help syntax: obj? and obj??"""
392 # This needs to be higher priority (lower number) than EscapedCommand so
394 # This needs to be higher priority (lower number) than EscapedCommand so
393 # that inspecting magics (%foo?) works.
395 # that inspecting magics (%foo?) works.
394 priority = 5
396 priority = 5
395
397
396 def __init__(self, start, q_locn):
398 def __init__(self, start, q_locn):
397 super().__init__(start)
399 super().__init__(start)
398 self.q_line = q_locn[0] - 1 # Shift from 1-indexed to 0-indexed
400 self.q_line = q_locn[0] - 1 # Shift from 1-indexed to 0-indexed
399 self.q_col = q_locn[1]
401 self.q_col = q_locn[1]
400
402
401 @classmethod
403 @classmethod
402 def find(cls, tokens_by_line):
404 def find(cls, tokens_by_line):
403 """Find the first help command (foo?) in the cell.
405 """Find the first help command (foo?) in the cell.
404 """
406 """
405 for line in tokens_by_line:
407 for line in tokens_by_line:
406 # Last token is NEWLINE; look at last but one
408 # Last token is NEWLINE; look at last but one
407 if len(line) > 2 and line[-2].string == '?':
409 if len(line) > 2 and line[-2].string == '?':
408 # Find the first token that's not INDENT/DEDENT
410 # Find the first token that's not INDENT/DEDENT
409 ix = 0
411 ix = 0
410 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
412 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
411 ix += 1
413 ix += 1
412 return cls(line[ix].start, line[-2].start)
414 return cls(line[ix].start, line[-2].start)
413
415
414 def transform(self, lines):
416 def transform(self, lines):
415 """Transform a help command found by the ``find()`` classmethod.
417 """Transform a help command found by the ``find()`` classmethod.
416 """
418 """
417 piece = ''.join(lines[self.start_line:self.q_line+1])
419 piece = ''.join(lines[self.start_line:self.q_line+1])
418 indent, content = piece[:self.start_col], piece[self.start_col:]
420 indent, content = piece[:self.start_col], piece[self.start_col:]
419 lines_before = lines[:self.start_line]
421 lines_before = lines[:self.start_line]
420 lines_after = lines[self.q_line + 1:]
422 lines_after = lines[self.q_line + 1:]
421
423
422 m = _help_end_re.search(content)
424 m = _help_end_re.search(content)
423 if not m:
425 if not m:
424 raise SyntaxError(content)
426 raise SyntaxError(content)
425 assert m is not None, content
427 assert m is not None, content
426 target = m.group(1)
428 target = m.group(1)
427 esc = m.group(3)
429 esc = m.group(3)
428
430
429 # If we're mid-command, put it back on the next prompt for the user.
431 # If we're mid-command, put it back on the next prompt for the user.
430 next_input = None
432 next_input = None
431 if (not lines_before) and (not lines_after) \
433 if (not lines_before) and (not lines_after) \
432 and content.strip() != m.group(0):
434 and content.strip() != m.group(0):
433 next_input = content.rstrip('?\n')
435 next_input = content.rstrip('?\n')
434
436
435 call = _make_help_call(target, esc, next_input=next_input)
437 call = _make_help_call(target, esc, next_input=next_input)
436 new_line = indent + call + '\n'
438 new_line = indent + call + '\n'
437
439
438 return lines_before + [new_line] + lines_after
440 return lines_before + [new_line] + lines_after
439
441
440 def make_tokens_by_line(lines):
442 def make_tokens_by_line(lines):
441 """Tokenize a series of lines and group tokens by line.
443 """Tokenize a series of lines and group tokens by line.
442
444
443 The tokens for a multiline Python string or expression are
445 The tokens for a multiline Python string or expression are
444 grouped as one line.
446 grouped as one line.
445 """
447 """
446 # NL tokens are used inside multiline expressions, but also after blank
448 # NL tokens are used inside multiline expressions, but also after blank
447 # lines or comments. This is intentional - see https://bugs.python.org/issue17061
449 # lines or comments. This is intentional - see https://bugs.python.org/issue17061
448 # We want to group the former case together but split the latter, so we
450 # We want to group the former case together but split the latter, so we
449 # track parentheses level, similar to the internals of tokenize.
451 # track parentheses level, similar to the internals of tokenize.
450 NEWLINE, NL = tokenize.NEWLINE, tokenize.NL
452 NEWLINE, NL = tokenize.NEWLINE, tokenize.NL
451 tokens_by_line = [[]]
453 tokens_by_line = [[]]
452 parenlev = 0
454 parenlev = 0
453 try:
455 try:
454 for token in tokenize.generate_tokens(iter(lines).__next__):
456 for token in tokenize.generate_tokens(iter(lines).__next__):
455 tokens_by_line[-1].append(token)
457 tokens_by_line[-1].append(token)
456 if (token.type == NEWLINE) \
458 if (token.type == NEWLINE) \
457 or ((token.type == NL) and (parenlev <= 0)):
459 or ((token.type == NL) and (parenlev <= 0)):
458 tokens_by_line.append([])
460 tokens_by_line.append([])
459 elif token.string in {'(', '[', '{'}:
461 elif token.string in {'(', '[', '{'}:
460 parenlev += 1
462 parenlev += 1
461 elif token.string in {')', ']', '}'}:
463 elif token.string in {')', ']', '}'}:
462 if parenlev > 0:
464 if parenlev > 0:
463 parenlev -= 1
465 parenlev -= 1
464 except tokenize.TokenError:
466 except tokenize.TokenError:
465 # Input ended in a multiline string or expression. That's OK for us.
467 # Input ended in a multiline string or expression. That's OK for us.
466 pass
468 pass
467 if not tokens_by_line[-1]:
469 if not tokens_by_line[-1]:
468 tokens_by_line.pop()
470 tokens_by_line.pop()
469
471
470 return tokens_by_line
472 return tokens_by_line
471
473
472 def show_linewise_tokens(s: str):
474 def show_linewise_tokens(s: str):
473 """For investigation and debugging"""
475 """For investigation and debugging"""
474 if not s.endswith('\n'):
476 if not s.endswith('\n'):
475 s += '\n'
477 s += '\n'
476 lines = s.splitlines(keepends=True)
478 lines = s.splitlines(keepends=True)
477 for line in make_tokens_by_line(lines):
479 for line in make_tokens_by_line(lines):
478 print("Line -------")
480 print("Line -------")
479 for tokinfo in line:
481 for tokinfo in line:
480 print(" ", tokinfo)
482 print(" ", tokinfo)
481
483
482 # Arbitrary limit to prevent getting stuck in infinite loops
484 # Arbitrary limit to prevent getting stuck in infinite loops
483 TRANSFORM_LOOP_LIMIT = 500
485 TRANSFORM_LOOP_LIMIT = 500
484
486
485 class TransformerManager:
487 class TransformerManager:
486 """Applies various transformations to a cell or code block.
488 """Applies various transformations to a cell or code block.
487
489
488 The key methods for external use are ``transform_cell()``
490 The key methods for external use are ``transform_cell()``
489 and ``check_complete()``.
491 and ``check_complete()``.
490 """
492 """
491 def __init__(self):
493 def __init__(self):
492 self.cleanup_transforms = [
494 self.cleanup_transforms = [
493 leading_indent,
495 leading_indent,
494 classic_prompt,
496 classic_prompt,
495 ipython_prompt,
497 ipython_prompt,
496 ]
498 ]
497 self.line_transforms = [
499 self.line_transforms = [
498 cell_magic,
500 cell_magic,
499 ]
501 ]
500 self.token_transformers = [
502 self.token_transformers = [
501 MagicAssign,
503 MagicAssign,
502 SystemAssign,
504 SystemAssign,
503 EscapedCommand,
505 EscapedCommand,
504 HelpEnd,
506 HelpEnd,
505 ]
507 ]
506
508
507 def do_one_token_transform(self, lines):
509 def do_one_token_transform(self, lines):
508 """Find and run the transform earliest in the code.
510 """Find and run the transform earliest in the code.
509
511
510 Returns (changed, lines).
512 Returns (changed, lines).
511
513
512 This method is called repeatedly until changed is False, indicating
514 This method is called repeatedly until changed is False, indicating
513 that all available transformations are complete.
515 that all available transformations are complete.
514
516
515 The tokens following IPython special syntax might not be valid, so
517 The tokens following IPython special syntax might not be valid, so
516 the transformed code is retokenised every time to identify the next
518 the transformed code is retokenised every time to identify the next
517 piece of special syntax. Hopefully long code cells are mostly valid
519 piece of special syntax. Hopefully long code cells are mostly valid
518 Python, not using lots of IPython special syntax, so this shouldn't be
520 Python, not using lots of IPython special syntax, so this shouldn't be
519 a performance issue.
521 a performance issue.
520 """
522 """
521 tokens_by_line = make_tokens_by_line(lines)
523 tokens_by_line = make_tokens_by_line(lines)
522 candidates = []
524 candidates = []
523 for transformer_cls in self.token_transformers:
525 for transformer_cls in self.token_transformers:
524 transformer = transformer_cls.find(tokens_by_line)
526 transformer = transformer_cls.find(tokens_by_line)
525 if transformer:
527 if transformer:
526 candidates.append(transformer)
528 candidates.append(transformer)
527
529
528 if not candidates:
530 if not candidates:
529 # Nothing to transform
531 # Nothing to transform
530 return False, lines
532 return False, lines
531 ordered_transformers = sorted(candidates, key=TokenTransformBase.sortby)
533 ordered_transformers = sorted(candidates, key=TokenTransformBase.sortby)
532 for transformer in ordered_transformers:
534 for transformer in ordered_transformers:
533 try:
535 try:
534 return True, transformer.transform(lines)
536 return True, transformer.transform(lines)
535 except SyntaxError:
537 except SyntaxError:
536 pass
538 pass
537 return False, lines
539 return False, lines
538
540
539 def do_token_transforms(self, lines):
541 def do_token_transforms(self, lines):
540 for _ in range(TRANSFORM_LOOP_LIMIT):
542 for _ in range(TRANSFORM_LOOP_LIMIT):
541 changed, lines = self.do_one_token_transform(lines)
543 changed, lines = self.do_one_token_transform(lines)
542 if not changed:
544 if not changed:
543 return lines
545 return lines
544
546
545 raise RuntimeError("Input transformation still changing after "
547 raise RuntimeError("Input transformation still changing after "
546 "%d iterations. Aborting." % TRANSFORM_LOOP_LIMIT)
548 "%d iterations. Aborting." % TRANSFORM_LOOP_LIMIT)
547
549
548 def transform_cell(self, cell: str) -> str:
550 def transform_cell(self, cell: str) -> str:
549 """Transforms a cell of input code"""
551 """Transforms a cell of input code"""
550 if not cell.endswith('\n'):
552 if not cell.endswith('\n'):
551 cell += '\n' # Ensure the cell has a trailing newline
553 cell += '\n' # Ensure the cell has a trailing newline
552 lines = cell.splitlines(keepends=True)
554 lines = cell.splitlines(keepends=True)
553 for transform in self.cleanup_transforms + self.line_transforms:
555 for transform in self.cleanup_transforms + self.line_transforms:
554 lines = transform(lines)
556 lines = transform(lines)
555
557
556 lines = self.do_token_transforms(lines)
558 lines = self.do_token_transforms(lines)
557 return ''.join(lines)
559 return ''.join(lines)
558
560
559 def check_complete(self, cell: str):
561 def check_complete(self, cell: str):
560 """Return whether a block of code is ready to execute, or should be continued
562 """Return whether a block of code is ready to execute, or should be continued
561
563
562 Parameters
564 Parameters
563 ----------
565 ----------
564 source : string
566 source : string
565 Python input code, which can be multiline.
567 Python input code, which can be multiline.
566
568
567 Returns
569 Returns
568 -------
570 -------
569 status : str
571 status : str
570 One of 'complete', 'incomplete', or 'invalid' if source is not a
572 One of 'complete', 'incomplete', or 'invalid' if source is not a
571 prefix of valid code.
573 prefix of valid code.
572 indent_spaces : int or None
574 indent_spaces : int or None
573 The number of spaces by which to indent the next line of code. If
575 The number of spaces by which to indent the next line of code. If
574 status is not 'incomplete', this is None.
576 status is not 'incomplete', this is None.
575 """
577 """
576 if not cell.endswith('\n'):
578 if not cell.endswith('\n'):
577 cell += '\n' # Ensure the cell has a trailing newline
579 cell += '\n' # Ensure the cell has a trailing newline
578 lines = cell.splitlines(keepends=True)
580 lines = cell.splitlines(keepends=True)
579 if lines[-1][:-1].endswith('\\'):
581 if lines[-1][:-1].endswith('\\'):
580 # Explicit backslash continuation
582 # Explicit backslash continuation
581 return 'incomplete', find_last_indent(lines)
583 return 'incomplete', find_last_indent(lines)
582
584
583 try:
585 try:
584 for transform in self.cleanup_transforms:
586 for transform in self.cleanup_transforms:
585 lines = transform(lines)
587 lines = transform(lines)
586 except SyntaxError:
588 except SyntaxError:
587 return 'invalid', None
589 return 'invalid', None
588
590
589 if lines[0].startswith('%%'):
591 if lines[0].startswith('%%'):
590 # Special case for cell magics - completion marked by blank line
592 # Special case for cell magics - completion marked by blank line
591 if lines[-1].strip():
593 if lines[-1].strip():
592 return 'incomplete', find_last_indent(lines)
594 return 'incomplete', find_last_indent(lines)
593 else:
595 else:
594 return 'complete', None
596 return 'complete', None
595
597
596 try:
598 try:
597 for transform in self.line_transforms:
599 for transform in self.line_transforms:
598 lines = transform(lines)
600 lines = transform(lines)
599 lines = self.do_token_transforms(lines)
601 lines = self.do_token_transforms(lines)
600 except SyntaxError:
602 except SyntaxError:
601 return 'invalid', None
603 return 'invalid', None
602
604
603 tokens_by_line = make_tokens_by_line(lines)
605 tokens_by_line = make_tokens_by_line(lines)
604 if not tokens_by_line:
606 if not tokens_by_line:
605 return 'incomplete', find_last_indent(lines)
607 return 'incomplete', find_last_indent(lines)
606 if tokens_by_line[-1][-1].type != tokenize.ENDMARKER:
608 if tokens_by_line[-1][-1].type != tokenize.ENDMARKER:
607 # We're in a multiline string or expression
609 # We're in a multiline string or expression
608 return 'incomplete', find_last_indent(lines)
610 return 'incomplete', find_last_indent(lines)
609 if len(tokens_by_line) == 1:
611 if len(tokens_by_line) == 1:
610 return 'incomplete', find_last_indent(lines)
612 return 'incomplete', find_last_indent(lines)
611 # Find the last token on the previous line that's not NEWLINE or COMMENT
613 # Find the last token on the previous line that's not NEWLINE or COMMENT
612 toks_last_line = tokens_by_line[-2]
614 toks_last_line = tokens_by_line[-2]
613 ix = len(toks_last_line) - 1
615 ix = len(toks_last_line) - 1
614 while ix >= 0 and toks_last_line[ix].type in {tokenize.NEWLINE,
616 while ix >= 0 and toks_last_line[ix].type in {tokenize.NEWLINE,
615 tokenize.COMMENT}:
617 tokenize.COMMENT}:
616 ix -= 1
618 ix -= 1
617
619
618 if toks_last_line[ix].string == ':':
620 if toks_last_line[ix].string == ':':
619 # The last line starts a block (e.g. 'if foo:')
621 # The last line starts a block (e.g. 'if foo:')
620 ix = 0
622 ix = 0
621 while toks_last_line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
623 while toks_last_line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
622 ix += 1
624 ix += 1
623 indent = toks_last_line[ix].start[1]
625 indent = toks_last_line[ix].start[1]
624 return 'incomplete', indent + 4
626 return 'incomplete', indent + 4
625
627
626 # If there's a blank line at the end, assume we're ready to execute.
628 # If there's a blank line at the end, assume we're ready to execute.
627 if not lines[-1].strip():
629 if not lines[-1].strip():
628 return 'complete', None
630 return 'complete', None
629
631
630 # At this point, our checks think the code is complete (or invalid).
632 # At this point, our checks think the code is complete (or invalid).
631 # We'll use codeop.compile_command to check this with the real parser.
633 # We'll use codeop.compile_command to check this with the real parser.
632
634
633 try:
635 try:
634 with warnings.catch_warnings():
636 with warnings.catch_warnings():
635 warnings.simplefilter('error', SyntaxWarning)
637 warnings.simplefilter('error', SyntaxWarning)
636 res = compile_command(''.join(lines), symbol='exec')
638 res = compile_command(''.join(lines), symbol='exec')
637 except (SyntaxError, OverflowError, ValueError, TypeError,
639 except (SyntaxError, OverflowError, ValueError, TypeError,
638 MemoryError, SyntaxWarning):
640 MemoryError, SyntaxWarning):
639 return 'invalid', None
641 return 'invalid', None
640 else:
642 else:
641 if res is None:
643 if res is None:
642 return 'incomplete', find_last_indent(lines)
644 return 'incomplete', find_last_indent(lines)
643 return 'complete', None
645 return 'complete', None
644
646
645
647
646 def find_last_indent(lines):
648 def find_last_indent(lines):
647 m = _indent_re.match(lines[-1])
649 m = _indent_re.match(lines[-1])
648 if not m:
650 if not m:
649 return 0
651 return 0
650 return len(m.group(0).replace('\t', ' '*4))
652 return len(m.group(0).replace('\t', ' '*4))
General Comments 0
You need to be logged in to leave comments. Login now