##// END OF EJS Templates
Switch inputtransformer2 back to stdlib tokenize module
Thomas Kluyver -
Show More
@@ -1,534 +1,553 b''
1 """Input transformer machinery to support IPython special syntax.
1 """Input transformer machinery to support IPython special syntax.
2
2
3 This includes the machinery to recognise and transform ``%magic`` commands,
3 This includes the machinery to recognise and transform ``%magic`` commands,
4 ``!system`` commands, ``help?`` querying, prompt stripping, and so forth.
4 ``!system`` commands, ``help?`` querying, prompt stripping, and so forth.
5 """
5 """
6
6
7 # Copyright (c) IPython Development Team.
7 # Copyright (c) IPython Development Team.
8 # Distributed under the terms of the Modified BSD License.
8 # Distributed under the terms of the Modified BSD License.
9
9
10 from codeop import compile_command
10 from codeop import compile_command
11 import re
11 import re
12 import tokenize
12 from typing import List, Tuple
13 from typing import List, Tuple
13 from IPython.utils import tokenize2
14 from IPython.utils.tokenutil import generate_tokens
15
14
16 _indent_re = re.compile(r'^[ \t]+')
15 _indent_re = re.compile(r'^[ \t]+')
17
16
18 def leading_indent(lines):
17 def leading_indent(lines):
19 """Remove leading indentation.
18 """Remove leading indentation.
20
19
21 If the first line starts with a spaces or tabs, the same whitespace will be
20 If the first line starts with a spaces or tabs, the same whitespace will be
22 removed from each following line.
21 removed from each following line.
23 """
22 """
24 m = _indent_re.match(lines[0])
23 m = _indent_re.match(lines[0])
25 if not m:
24 if not m:
26 return lines
25 return lines
27 space = m.group(0)
26 space = m.group(0)
28 n = len(space)
27 n = len(space)
29 return [l[n:] if l.startswith(space) else l
28 return [l[n:] if l.startswith(space) else l
30 for l in lines]
29 for l in lines]
31
30
32 class PromptStripper:
31 class PromptStripper:
33 """Remove matching input prompts from a block of input.
32 """Remove matching input prompts from a block of input.
34
33
35 Parameters
34 Parameters
36 ----------
35 ----------
37 prompt_re : regular expression
36 prompt_re : regular expression
38 A regular expression matching any input prompt (including continuation)
37 A regular expression matching any input prompt (including continuation)
39 initial_re : regular expression, optional
38 initial_re : regular expression, optional
40 A regular expression matching only the initial prompt, but not continuation.
39 A regular expression matching only the initial prompt, but not continuation.
41 If no initial expression is given, prompt_re will be used everywhere.
40 If no initial expression is given, prompt_re will be used everywhere.
42 Used mainly for plain Python prompts, where the continuation prompt
41 Used mainly for plain Python prompts, where the continuation prompt
43 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
42 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
44
43
45 If initial_re and prompt_re differ,
44 If initial_re and prompt_re differ,
46 only initial_re will be tested against the first line.
45 only initial_re will be tested against the first line.
47 If any prompt is found on the first two lines,
46 If any prompt is found on the first two lines,
48 prompts will be stripped from the rest of the block.
47 prompts will be stripped from the rest of the block.
49 """
48 """
50 def __init__(self, prompt_re, initial_re=None):
49 def __init__(self, prompt_re, initial_re=None):
51 self.prompt_re = prompt_re
50 self.prompt_re = prompt_re
52 self.initial_re = initial_re or prompt_re
51 self.initial_re = initial_re or prompt_re
53
52
54 def _strip(self, lines):
53 def _strip(self, lines):
55 return [self.prompt_re.sub('', l, count=1) for l in lines]
54 return [self.prompt_re.sub('', l, count=1) for l in lines]
56
55
57 def __call__(self, lines):
56 def __call__(self, lines):
58 if self.initial_re.match(lines[0]) or \
57 if self.initial_re.match(lines[0]) or \
59 (len(lines) > 1 and self.prompt_re.match(lines[1])):
58 (len(lines) > 1 and self.prompt_re.match(lines[1])):
60 return self._strip(lines)
59 return self._strip(lines)
61 return lines
60 return lines
62
61
63 classic_prompt = PromptStripper(
62 classic_prompt = PromptStripper(
64 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
63 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
65 initial_re=re.compile(r'^>>>( |$)')
64 initial_re=re.compile(r'^>>>( |$)')
66 )
65 )
67
66
68 ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)'))
67 ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)'))
69
68
70 def cell_magic(lines):
69 def cell_magic(lines):
71 if not lines[0].startswith('%%'):
70 if not lines[0].startswith('%%'):
72 return lines
71 return lines
73 if re.match('%%\w+\?', lines[0]):
72 if re.match('%%\w+\?', lines[0]):
74 # This case will be handled by help_end
73 # This case will be handled by help_end
75 return lines
74 return lines
76 magic_name, _, first_line = lines[0][2:-1].partition(' ')
75 magic_name, _, first_line = lines[0][2:-1].partition(' ')
77 body = ''.join(lines[1:])
76 body = ''.join(lines[1:])
78 return ['get_ipython().run_cell_magic(%r, %r, %r)\n'
77 return ['get_ipython().run_cell_magic(%r, %r, %r)\n'
79 % (magic_name, first_line, body)]
78 % (magic_name, first_line, body)]
80
79
81 # -----
80 # -----
82
81
83 def _find_assign_op(token_line):
82 def _find_assign_op(token_line):
84 # Find the first assignment in the line ('=' not inside brackets)
83 # Find the first assignment in the line ('=' not inside brackets)
85 # We don't try to support multiple special assignment (a = b = %foo)
84 # We don't try to support multiple special assignment (a = b = %foo)
86 paren_level = 0
85 paren_level = 0
87 for i, ti in enumerate(token_line):
86 for i, ti in enumerate(token_line):
88 s = ti.string
87 s = ti.string
89 if s == '=' and paren_level == 0:
88 if s == '=' and paren_level == 0:
90 return i
89 return i
91 if s in '([{':
90 if s in '([{':
92 paren_level += 1
91 paren_level += 1
93 elif s in ')]}':
92 elif s in ')]}':
94 paren_level -= 1
93 paren_level -= 1
95
94
96 def find_end_of_continued_line(lines, start_line: int):
95 def find_end_of_continued_line(lines, start_line: int):
97 """Find the last line of a line explicitly extended using backslashes.
96 """Find the last line of a line explicitly extended using backslashes.
98
97
99 Uses 0-indexed line numbers.
98 Uses 0-indexed line numbers.
100 """
99 """
101 end_line = start_line
100 end_line = start_line
102 while lines[end_line].endswith('\\\n'):
101 while lines[end_line].endswith('\\\n'):
103 end_line += 1
102 end_line += 1
104 if end_line >= len(lines):
103 if end_line >= len(lines):
105 break
104 break
106 return end_line
105 return end_line
107
106
108 def assemble_continued_line(lines, start: Tuple[int, int], end_line: int):
107 def assemble_continued_line(lines, start: Tuple[int, int], end_line: int):
109 """Assemble pieces of a continued line into a single line.
108 """Assemble pieces of a continued line into a single line.
110
109
111 Uses 0-indexed line numbers. *start* is (lineno, colno).
110 Uses 0-indexed line numbers. *start* is (lineno, colno).
112 """
111 """
113 parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1]
112 parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1]
114 return ' '.join([p[:-2] for p in parts[:-1]] # Strip backslash+newline
113 return ' '.join([p[:-2] for p in parts[:-1]] # Strip backslash+newline
115 + [parts[-1][:-1]]) # Strip newline from last line
114 + [parts[-1][:-1]]) # Strip newline from last line
116
115
117 class TokenTransformBase:
116 class TokenTransformBase:
118 # Lower numbers -> higher priority (for matches in the same location)
117 # Lower numbers -> higher priority (for matches in the same location)
119 priority = 10
118 priority = 10
120
119
121 def sortby(self):
120 def sortby(self):
122 return self.start_line, self.start_col, self.priority
121 return self.start_line, self.start_col, self.priority
123
122
124 def __init__(self, start):
123 def __init__(self, start):
125 self.start_line = start[0] - 1 # Shift from 1-index to 0-index
124 self.start_line = start[0] - 1 # Shift from 1-index to 0-index
126 self.start_col = start[1]
125 self.start_col = start[1]
127
126
128 def transform(self, lines: List[str]):
127 def transform(self, lines: List[str]):
129 raise NotImplementedError
128 raise NotImplementedError
130
129
131 class MagicAssign(TokenTransformBase):
130 class MagicAssign(TokenTransformBase):
132 @classmethod
131 @classmethod
133 def find(cls, tokens_by_line):
132 def find(cls, tokens_by_line):
134 """Find the first magic assignment (a = %foo) in the cell.
133 """Find the first magic assignment (a = %foo) in the cell.
135
134
136 Returns (line, column) of the % if found, or None. *line* is 1-indexed.
135 Returns (line, column) of the % if found, or None. *line* is 1-indexed.
137 """
136 """
138 for line in tokens_by_line:
137 for line in tokens_by_line:
139 assign_ix = _find_assign_op(line)
138 assign_ix = _find_assign_op(line)
140 if (assign_ix is not None) \
139 if (assign_ix is not None) \
141 and (len(line) >= assign_ix + 2) \
140 and (len(line) >= assign_ix + 2) \
142 and (line[assign_ix+1].string == '%') \
141 and (line[assign_ix+1].string == '%') \
143 and (line[assign_ix+2].type == tokenize2.NAME):
142 and (line[assign_ix+2].type == tokenize.NAME):
144 return cls(line[assign_ix+1].start)
143 return cls(line[assign_ix+1].start)
145
144
146 def transform(self, lines: List[str]):
145 def transform(self, lines: List[str]):
147 """Transform a magic assignment found by find
146 """Transform a magic assignment found by find
148 """
147 """
149 start_line, start_col = self.start_line, self.start_col
148 start_line, start_col = self.start_line, self.start_col
150 lhs = lines[start_line][:start_col]
149 lhs = lines[start_line][:start_col]
151 end_line = find_end_of_continued_line(lines, start_line)
150 end_line = find_end_of_continued_line(lines, start_line)
152 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
151 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
153 assert rhs.startswith('%'), rhs
152 assert rhs.startswith('%'), rhs
154 magic_name, _, args = rhs[1:].partition(' ')
153 magic_name, _, args = rhs[1:].partition(' ')
155
154
156 lines_before = lines[:start_line]
155 lines_before = lines[:start_line]
157 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
156 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
158 new_line = lhs + call + '\n'
157 new_line = lhs + call + '\n'
159 lines_after = lines[end_line+1:]
158 lines_after = lines[end_line+1:]
160
159
161 return lines_before + [new_line] + lines_after
160 return lines_before + [new_line] + lines_after
162
161
163
162
164 class SystemAssign(TokenTransformBase):
163 class SystemAssign(TokenTransformBase):
165 @classmethod
164 @classmethod
166 def find(cls, tokens_by_line):
165 def find(cls, tokens_by_line):
167 """Find the first system assignment (a = !foo) in the cell.
166 """Find the first system assignment (a = !foo) in the cell.
168
167
169 Returns (line, column) of the ! if found, or None. *line* is 1-indexed.
168 Returns (line, column) of the ! if found, or None. *line* is 1-indexed.
170 """
169 """
171 for line in tokens_by_line:
170 for line in tokens_by_line:
172 assign_ix = _find_assign_op(line)
171 assign_ix = _find_assign_op(line)
173 if (assign_ix is not None) \
172 if (assign_ix is not None) \
174 and (len(line) >= assign_ix + 2) \
173 and (len(line) >= assign_ix + 2) \
175 and (line[assign_ix + 1].type == tokenize2.ERRORTOKEN):
174 and (line[assign_ix + 1].type == tokenize.ERRORTOKEN):
176 ix = assign_ix + 1
175 ix = assign_ix + 1
177
176
178 while ix < len(line) and line[ix].type == tokenize2.ERRORTOKEN:
177 while ix < len(line) and line[ix].type == tokenize.ERRORTOKEN:
179 if line[ix].string == '!':
178 if line[ix].string == '!':
180 return cls(line[ix].start)
179 return cls(line[ix].start)
181 elif not line[ix].string.isspace():
180 elif not line[ix].string.isspace():
182 break
181 break
183 ix += 1
182 ix += 1
184
183
185 def transform(self, lines: List[str]):
184 def transform(self, lines: List[str]):
186 """Transform a system assignment found by find
185 """Transform a system assignment found by find
187 """
186 """
188 start_line, start_col = self.start_line, self.start_col
187 start_line, start_col = self.start_line, self.start_col
189
188
190 lhs = lines[start_line][:start_col]
189 lhs = lines[start_line][:start_col]
191 end_line = find_end_of_continued_line(lines, start_line)
190 end_line = find_end_of_continued_line(lines, start_line)
192 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
191 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
193 assert rhs.startswith('!'), rhs
192 assert rhs.startswith('!'), rhs
194 cmd = rhs[1:]
193 cmd = rhs[1:]
195
194
196 lines_before = lines[:start_line]
195 lines_before = lines[:start_line]
197 call = "get_ipython().getoutput({!r})".format(cmd)
196 call = "get_ipython().getoutput({!r})".format(cmd)
198 new_line = lhs + call + '\n'
197 new_line = lhs + call + '\n'
199 lines_after = lines[end_line + 1:]
198 lines_after = lines[end_line + 1:]
200
199
201 return lines_before + [new_line] + lines_after
200 return lines_before + [new_line] + lines_after
202
201
203 # The escape sequences that define the syntax transformations IPython will
202 # The escape sequences that define the syntax transformations IPython will
204 # apply to user input. These can NOT be just changed here: many regular
203 # apply to user input. These can NOT be just changed here: many regular
205 # expressions and other parts of the code may use their hardcoded values, and
204 # expressions and other parts of the code may use their hardcoded values, and
206 # for all intents and purposes they constitute the 'IPython syntax', so they
205 # for all intents and purposes they constitute the 'IPython syntax', so they
207 # should be considered fixed.
206 # should be considered fixed.
208
207
209 ESC_SHELL = '!' # Send line to underlying system shell
208 ESC_SHELL = '!' # Send line to underlying system shell
210 ESC_SH_CAP = '!!' # Send line to system shell and capture output
209 ESC_SH_CAP = '!!' # Send line to system shell and capture output
211 ESC_HELP = '?' # Find information about object
210 ESC_HELP = '?' # Find information about object
212 ESC_HELP2 = '??' # Find extra-detailed information about object
211 ESC_HELP2 = '??' # Find extra-detailed information about object
213 ESC_MAGIC = '%' # Call magic function
212 ESC_MAGIC = '%' # Call magic function
214 ESC_MAGIC2 = '%%' # Call cell-magic function
213 ESC_MAGIC2 = '%%' # Call cell-magic function
215 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
214 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
216 ESC_QUOTE2 = ';' # Quote all args as a single string, call
215 ESC_QUOTE2 = ';' # Quote all args as a single string, call
217 ESC_PAREN = '/' # Call first argument with rest of line as arguments
216 ESC_PAREN = '/' # Call first argument with rest of line as arguments
218
217
219 ESCAPE_SINGLES = {'!', '?', '%', ',', ';', '/'}
218 ESCAPE_SINGLES = {'!', '?', '%', ',', ';', '/'}
220 ESCAPE_DOUBLES = {'!!', '??'} # %% (cell magic) is handled separately
219 ESCAPE_DOUBLES = {'!!', '??'} # %% (cell magic) is handled separately
221
220
222 def _make_help_call(target, esc, next_input=None):
221 def _make_help_call(target, esc, next_input=None):
223 """Prepares a pinfo(2)/psearch call from a target name and the escape
222 """Prepares a pinfo(2)/psearch call from a target name and the escape
224 (i.e. ? or ??)"""
223 (i.e. ? or ??)"""
225 method = 'pinfo2' if esc == '??' \
224 method = 'pinfo2' if esc == '??' \
226 else 'psearch' if '*' in target \
225 else 'psearch' if '*' in target \
227 else 'pinfo'
226 else 'pinfo'
228 arg = " ".join([method, target])
227 arg = " ".join([method, target])
229 #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
228 #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
230 t_magic_name, _, t_magic_arg_s = arg.partition(' ')
229 t_magic_name, _, t_magic_arg_s = arg.partition(' ')
231 t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
230 t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
232 if next_input is None:
231 if next_input is None:
233 return 'get_ipython().run_line_magic(%r, %r)' % (t_magic_name, t_magic_arg_s)
232 return 'get_ipython().run_line_magic(%r, %r)' % (t_magic_name, t_magic_arg_s)
234 else:
233 else:
235 return 'get_ipython().set_next_input(%r);get_ipython().run_line_magic(%r, %r)' % \
234 return 'get_ipython().set_next_input(%r);get_ipython().run_line_magic(%r, %r)' % \
236 (next_input, t_magic_name, t_magic_arg_s)
235 (next_input, t_magic_name, t_magic_arg_s)
237
236
238 def _tr_help(content):
237 def _tr_help(content):
239 "Translate lines escaped with: ?"
238 "Translate lines escaped with: ?"
240 # A naked help line should just fire the intro help screen
239 # A naked help line should just fire the intro help screen
241 if not content:
240 if not content:
242 return 'get_ipython().show_usage()'
241 return 'get_ipython().show_usage()'
243
242
244 return _make_help_call(content, '?')
243 return _make_help_call(content, '?')
245
244
246 def _tr_help2(content):
245 def _tr_help2(content):
247 "Translate lines escaped with: ??"
246 "Translate lines escaped with: ??"
248 # A naked help line should just fire the intro help screen
247 # A naked help line should just fire the intro help screen
249 if not content:
248 if not content:
250 return 'get_ipython().show_usage()'
249 return 'get_ipython().show_usage()'
251
250
252 return _make_help_call(content, '??')
251 return _make_help_call(content, '??')
253
252
254 def _tr_magic(content):
253 def _tr_magic(content):
255 "Translate lines escaped with: %"
254 "Translate lines escaped with: %"
256 name, _, args = content.partition(' ')
255 name, _, args = content.partition(' ')
257 return 'get_ipython().run_line_magic(%r, %r)' % (name, args)
256 return 'get_ipython().run_line_magic(%r, %r)' % (name, args)
258
257
259 def _tr_quote(content):
258 def _tr_quote(content):
260 "Translate lines escaped with: ,"
259 "Translate lines escaped with: ,"
261 name, _, args = content.partition(' ')
260 name, _, args = content.partition(' ')
262 return '%s("%s")' % (name, '", "'.join(args.split()) )
261 return '%s("%s")' % (name, '", "'.join(args.split()) )
263
262
264 def _tr_quote2(content):
263 def _tr_quote2(content):
265 "Translate lines escaped with: ;"
264 "Translate lines escaped with: ;"
266 name, _, args = content.partition(' ')
265 name, _, args = content.partition(' ')
267 return '%s("%s")' % (name, args)
266 return '%s("%s")' % (name, args)
268
267
269 def _tr_paren(content):
268 def _tr_paren(content):
270 "Translate lines escaped with: /"
269 "Translate lines escaped with: /"
271 name, _, args = content.partition(' ')
270 name, _, args = content.partition(' ')
272 return '%s(%s)' % (name, ", ".join(args.split()))
271 return '%s(%s)' % (name, ", ".join(args.split()))
273
272
274 tr = { ESC_SHELL : 'get_ipython().system({!r})'.format,
273 tr = { ESC_SHELL : 'get_ipython().system({!r})'.format,
275 ESC_SH_CAP : 'get_ipython().getoutput({!r})'.format,
274 ESC_SH_CAP : 'get_ipython().getoutput({!r})'.format,
276 ESC_HELP : _tr_help,
275 ESC_HELP : _tr_help,
277 ESC_HELP2 : _tr_help2,
276 ESC_HELP2 : _tr_help2,
278 ESC_MAGIC : _tr_magic,
277 ESC_MAGIC : _tr_magic,
279 ESC_QUOTE : _tr_quote,
278 ESC_QUOTE : _tr_quote,
280 ESC_QUOTE2 : _tr_quote2,
279 ESC_QUOTE2 : _tr_quote2,
281 ESC_PAREN : _tr_paren }
280 ESC_PAREN : _tr_paren }
282
281
283 class EscapedCommand(TokenTransformBase):
282 class EscapedCommand(TokenTransformBase):
284 @classmethod
283 @classmethod
285 def find(cls, tokens_by_line):
284 def find(cls, tokens_by_line):
286 """Find the first escaped command (%foo, !foo, etc.) in the cell.
285 """Find the first escaped command (%foo, !foo, etc.) in the cell.
287
286
288 Returns (line, column) of the escape if found, or None. *line* is 1-indexed.
287 Returns (line, column) of the escape if found, or None. *line* is 1-indexed.
289 """
288 """
290 for line in tokens_by_line:
289 for line in tokens_by_line:
291 ix = 0
290 ix = 0
292 while line[ix].type in {tokenize2.INDENT, tokenize2.DEDENT}:
291 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
293 ix += 1
292 ix += 1
294 if line[ix].string in ESCAPE_SINGLES:
293 if line[ix].string in ESCAPE_SINGLES:
295 return cls(line[ix].start)
294 return cls(line[ix].start)
296
295
297 def transform(self, lines):
296 def transform(self, lines):
298 start_line, start_col = self.start_line, self.start_col
297 start_line, start_col = self.start_line, self.start_col
299
298
300 indent = lines[start_line][:start_col]
299 indent = lines[start_line][:start_col]
301 end_line = find_end_of_continued_line(lines, start_line)
300 end_line = find_end_of_continued_line(lines, start_line)
302 line = assemble_continued_line(lines, (start_line, start_col), end_line)
301 line = assemble_continued_line(lines, (start_line, start_col), end_line)
303
302
304 if line[:2] in ESCAPE_DOUBLES:
303 if line[:2] in ESCAPE_DOUBLES:
305 escape, content = line[:2], line[2:]
304 escape, content = line[:2], line[2:]
306 else:
305 else:
307 escape, content = line[:1], line[1:]
306 escape, content = line[:1], line[1:]
308 call = tr[escape](content)
307 call = tr[escape](content)
309
308
310 lines_before = lines[:start_line]
309 lines_before = lines[:start_line]
311 new_line = indent + call + '\n'
310 new_line = indent + call + '\n'
312 lines_after = lines[end_line + 1:]
311 lines_after = lines[end_line + 1:]
313
312
314 return lines_before + [new_line] + lines_after
313 return lines_before + [new_line] + lines_after
315
314
316 _help_end_re = re.compile(r"""(%{0,2}
315 _help_end_re = re.compile(r"""(%{0,2}
317 [a-zA-Z_*][\w*]* # Variable name
316 [a-zA-Z_*][\w*]* # Variable name
318 (\.[a-zA-Z_*][\w*]*)* # .etc.etc
317 (\.[a-zA-Z_*][\w*]*)* # .etc.etc
319 )
318 )
320 (\?\??)$ # ? or ??
319 (\?\??)$ # ? or ??
321 """,
320 """,
322 re.VERBOSE)
321 re.VERBOSE)
323
322
324 class HelpEnd(TokenTransformBase):
323 class HelpEnd(TokenTransformBase):
325 # This needs to be higher priority (lower number) than EscapedCommand so
324 # This needs to be higher priority (lower number) than EscapedCommand so
326 # that inspecting magics (%foo?) works.
325 # that inspecting magics (%foo?) works.
327 priority = 5
326 priority = 5
328
327
329 def __init__(self, start, q_locn):
328 def __init__(self, start, q_locn):
330 super().__init__(start)
329 super().__init__(start)
331 self.q_line = q_locn[0] - 1 # Shift from 1-indexed to 0-indexed
330 self.q_line = q_locn[0] - 1 # Shift from 1-indexed to 0-indexed
332 self.q_col = q_locn[1]
331 self.q_col = q_locn[1]
333
332
334 @classmethod
333 @classmethod
335 def find(cls, tokens_by_line):
334 def find(cls, tokens_by_line):
336 for line in tokens_by_line:
335 for line in tokens_by_line:
337 # Last token is NEWLINE; look at last but one
336 # Last token is NEWLINE; look at last but one
338 if len(line) > 2 and line[-2].string == '?':
337 if len(line) > 2 and line[-2].string == '?':
339 # Find the first token that's not INDENT/DEDENT
338 # Find the first token that's not INDENT/DEDENT
340 ix = 0
339 ix = 0
341 while line[ix].type in {tokenize2.INDENT, tokenize2.DEDENT}:
340 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
342 ix += 1
341 ix += 1
343 return cls(line[ix].start, line[-2].start)
342 return cls(line[ix].start, line[-2].start)
344
343
345 def transform(self, lines):
344 def transform(self, lines):
346 piece = ''.join(lines[self.start_line:self.q_line+1])
345 piece = ''.join(lines[self.start_line:self.q_line+1])
347 indent, content = piece[:self.start_col], piece[self.start_col:]
346 indent, content = piece[:self.start_col], piece[self.start_col:]
348 lines_before = lines[:self.start_line]
347 lines_before = lines[:self.start_line]
349 lines_after = lines[self.q_line + 1:]
348 lines_after = lines[self.q_line + 1:]
350
349
351 m = _help_end_re.search(content)
350 m = _help_end_re.search(content)
352 assert m is not None, content
351 assert m is not None, content
353 target = m.group(1)
352 target = m.group(1)
354 esc = m.group(3)
353 esc = m.group(3)
355
354
356 # If we're mid-command, put it back on the next prompt for the user.
355 # If we're mid-command, put it back on the next prompt for the user.
357 next_input = None
356 next_input = None
358 if (not lines_before) and (not lines_after) \
357 if (not lines_before) and (not lines_after) \
359 and content.strip() != m.group(0):
358 and content.strip() != m.group(0):
360 next_input = content.rstrip('?\n')
359 next_input = content.rstrip('?\n')
361
360
362 call = _make_help_call(target, esc, next_input=next_input)
361 call = _make_help_call(target, esc, next_input=next_input)
363 new_line = indent + call + '\n'
362 new_line = indent + call + '\n'
364
363
365 return lines_before + [new_line] + lines_after
364 return lines_before + [new_line] + lines_after
366
365
367 def make_tokens_by_line(lines):
366 def make_tokens_by_line(lines):
367 """Tokenize a series of lines and group tokens by line.
368
369 The tokens for a multiline Python string or expression are
370 grouped as one line.
371 """
372 # NL tokens are used inside multiline expressions, but also after blank
373 # lines or comments. This is intentional - see https://bugs.python.org/issue17061
374 # We want to group the former case together but split the latter, so we
375 # track parentheses level, similar to the internals of tokenize.
376 NEWLINE, NL = tokenize.NEWLINE, tokenize.NL
368 tokens_by_line = [[]]
377 tokens_by_line = [[]]
369 for token in generate_tokens(iter(lines).__next__):
378 parenlev = 0
370 tokens_by_line[-1].append(token)
379 try:
371 if token.type == tokenize2.NEWLINE:
380 for token in tokenize.generate_tokens(iter(lines).__next__):
372 tokens_by_line.append([])
381 tokens_by_line[-1].append(token)
382 if (token.type == NEWLINE) \
383 or ((token.type == NL) and (parenlev <= 0)):
384 tokens_by_line.append([])
385 elif token.string in {'(', '[', '{'}:
386 parenlev += 1
387 elif token.string in {')', ']', '}'}:
388 parenlev -= 1
389 except tokenize.TokenError:
390 # Input ended in a multiline string or expression. That's OK for us.
391 pass
373
392
374 return tokens_by_line
393 return tokens_by_line
375
394
376 def show_linewise_tokens(s: str):
395 def show_linewise_tokens(s: str):
377 """For investigation"""
396 """For investigation"""
378 if not s.endswith('\n'):
397 if not s.endswith('\n'):
379 s += '\n'
398 s += '\n'
380 lines = s.splitlines(keepends=True)
399 lines = s.splitlines(keepends=True)
381 for line in make_tokens_by_line(lines):
400 for line in make_tokens_by_line(lines):
382 print("Line -------")
401 print("Line -------")
383 for tokinfo in line:
402 for tokinfo in line:
384 print(" ", tokinfo)
403 print(" ", tokinfo)
385
404
386 class TransformerManager:
405 class TransformerManager:
387 def __init__(self):
406 def __init__(self):
388 self.cleanup_transforms = [
407 self.cleanup_transforms = [
389 leading_indent,
408 leading_indent,
390 classic_prompt,
409 classic_prompt,
391 ipython_prompt,
410 ipython_prompt,
392 ]
411 ]
393 self.line_transforms = [
412 self.line_transforms = [
394 cell_magic,
413 cell_magic,
395 ]
414 ]
396 self.token_transformers = [
415 self.token_transformers = [
397 MagicAssign,
416 MagicAssign,
398 SystemAssign,
417 SystemAssign,
399 EscapedCommand,
418 EscapedCommand,
400 HelpEnd,
419 HelpEnd,
401 ]
420 ]
402
421
403 def do_one_token_transform(self, lines):
422 def do_one_token_transform(self, lines):
404 """Find and run the transform earliest in the code.
423 """Find and run the transform earliest in the code.
405
424
406 Returns (changed, lines).
425 Returns (changed, lines).
407
426
408 This method is called repeatedly until changed is False, indicating
427 This method is called repeatedly until changed is False, indicating
409 that all available transformations are complete.
428 that all available transformations are complete.
410
429
411 The tokens following IPython special syntax might not be valid, so
430 The tokens following IPython special syntax might not be valid, so
412 the transformed code is retokenised every time to identify the next
431 the transformed code is retokenised every time to identify the next
413 piece of special syntax. Hopefully long code cells are mostly valid
432 piece of special syntax. Hopefully long code cells are mostly valid
414 Python, not using lots of IPython special syntax, so this shouldn't be
433 Python, not using lots of IPython special syntax, so this shouldn't be
415 a performance issue.
434 a performance issue.
416 """
435 """
417 tokens_by_line = make_tokens_by_line(lines)
436 tokens_by_line = make_tokens_by_line(lines)
418 candidates = []
437 candidates = []
419 for transformer_cls in self.token_transformers:
438 for transformer_cls in self.token_transformers:
420 transformer = transformer_cls.find(tokens_by_line)
439 transformer = transformer_cls.find(tokens_by_line)
421 if transformer:
440 if transformer:
422 candidates.append(transformer)
441 candidates.append(transformer)
423
442
424 if not candidates:
443 if not candidates:
425 # Nothing to transform
444 # Nothing to transform
426 return False, lines
445 return False, lines
427
446
428 transformer = min(candidates, key=TokenTransformBase.sortby)
447 transformer = min(candidates, key=TokenTransformBase.sortby)
429 return True, transformer.transform(lines)
448 return True, transformer.transform(lines)
430
449
431 def do_token_transforms(self, lines):
450 def do_token_transforms(self, lines):
432 while True:
451 while True:
433 changed, lines = self.do_one_token_transform(lines)
452 changed, lines = self.do_one_token_transform(lines)
434 if not changed:
453 if not changed:
435 return lines
454 return lines
436
455
437 def transform_cell(self, cell: str):
456 def transform_cell(self, cell: str):
438 if not cell.endswith('\n'):
457 if not cell.endswith('\n'):
439 cell += '\n' # Ensure the cell has a trailing newline
458 cell += '\n' # Ensure the cell has a trailing newline
440 lines = cell.splitlines(keepends=True)
459 lines = cell.splitlines(keepends=True)
441 for transform in self.cleanup_transforms + self.line_transforms:
460 for transform in self.cleanup_transforms + self.line_transforms:
442 #print(transform, lines)
461 #print(transform, lines)
443 lines = transform(lines)
462 lines = transform(lines)
444
463
445 lines = self.do_token_transforms(lines)
464 lines = self.do_token_transforms(lines)
446 return ''.join(lines)
465 return ''.join(lines)
447
466
448 def check_complete(self, cell: str):
467 def check_complete(self, cell: str):
449 """Return whether a block of code is ready to execute, or should be continued
468 """Return whether a block of code is ready to execute, or should be continued
450
469
451 Parameters
470 Parameters
452 ----------
471 ----------
453 source : string
472 source : string
454 Python input code, which can be multiline.
473 Python input code, which can be multiline.
455
474
456 Returns
475 Returns
457 -------
476 -------
458 status : str
477 status : str
459 One of 'complete', 'incomplete', or 'invalid' if source is not a
478 One of 'complete', 'incomplete', or 'invalid' if source is not a
460 prefix of valid code.
479 prefix of valid code.
461 indent_spaces : int or None
480 indent_spaces : int or None
462 The number of spaces by which to indent the next line of code. If
481 The number of spaces by which to indent the next line of code. If
463 status is not 'incomplete', this is None.
482 status is not 'incomplete', this is None.
464 """
483 """
465 if not cell.endswith('\n'):
484 if not cell.endswith('\n'):
466 cell += '\n' # Ensure the cell has a trailing newline
485 cell += '\n' # Ensure the cell has a trailing newline
467 lines = cell.splitlines(keepends=True)
486 lines = cell.splitlines(keepends=True)
468 if lines[-1][:-1].endswith('\\'):
487 if lines[-1][:-1].endswith('\\'):
469 # Explicit backslash continuation
488 # Explicit backslash continuation
470 return 'incomplete', find_last_indent(lines)
489 return 'incomplete', find_last_indent(lines)
471
490
472 try:
491 try:
473 for transform in self.cleanup_transforms:
492 for transform in self.cleanup_transforms:
474 lines = transform(lines)
493 lines = transform(lines)
475 except SyntaxError:
494 except SyntaxError:
476 return 'invalid', None
495 return 'invalid', None
477
496
478 if lines[0].startswith('%%'):
497 if lines[0].startswith('%%'):
479 # Special case for cell magics - completion marked by blank line
498 # Special case for cell magics - completion marked by blank line
480 if lines[-1].strip():
499 if lines[-1].strip():
481 return 'incomplete', find_last_indent(lines)
500 return 'incomplete', find_last_indent(lines)
482 else:
501 else:
483 return 'complete', None
502 return 'complete', None
484
503
485 try:
504 try:
486 for transform in self.line_transforms:
505 for transform in self.line_transforms:
487 lines = transform(lines)
506 lines = transform(lines)
488 lines = self.do_token_transforms(lines)
507 lines = self.do_token_transforms(lines)
489 except SyntaxError:
508 except SyntaxError:
490 return 'invalid', None
509 return 'invalid', None
491
510
492 tokens_by_line = make_tokens_by_line(lines)
511 tokens_by_line = make_tokens_by_line(lines)
493 if tokens_by_line[-1][-1].type != tokenize2.ENDMARKER:
512 if tokens_by_line[-1][-1].type != tokenize.ENDMARKER:
494 # We're in a multiline string or expression
513 # We're in a multiline string or expression
495 return 'incomplete', find_last_indent(lines)
514 return 'incomplete', find_last_indent(lines)
496
515
497 # Find the last token on the previous line that's not NEWLINE or COMMENT
516 # Find the last token on the previous line that's not NEWLINE or COMMENT
498 toks_last_line = tokens_by_line[-2]
517 toks_last_line = tokens_by_line[-2]
499 ix = len(toks_last_line) - 1
518 ix = len(toks_last_line) - 1
500 while ix >= 0 and toks_last_line[ix].type in {tokenize2.NEWLINE,
519 while ix >= 0 and toks_last_line[ix].type in {tokenize.NEWLINE,
501 tokenize2.COMMENT}:
520 tokenize.COMMENT}:
502 ix -= 1
521 ix -= 1
503
522
504 if toks_last_line[ix].string == ':':
523 if toks_last_line[ix].string == ':':
505 # The last line starts a block (e.g. 'if foo:')
524 # The last line starts a block (e.g. 'if foo:')
506 ix = 0
525 ix = 0
507 while toks_last_line[ix].type in {tokenize2.INDENT, tokenize2.DEDENT}:
526 while toks_last_line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
508 ix += 1
527 ix += 1
509 indent = toks_last_line[ix].start[1]
528 indent = toks_last_line[ix].start[1]
510 return 'incomplete', indent + 4
529 return 'incomplete', indent + 4
511
530
512 # If there's a blank line at the end, assume we're ready to execute.
531 # If there's a blank line at the end, assume we're ready to execute.
513 if not lines[-1].strip():
532 if not lines[-1].strip():
514 return 'complete', None
533 return 'complete', None
515
534
516 # At this point, our checks think the code is complete (or invalid).
535 # At this point, our checks think the code is complete (or invalid).
517 # We'll use codeop.compile_command to check this with the real parser.
536 # We'll use codeop.compile_command to check this with the real parser.
518
537
519 try:
538 try:
520 res = compile_command(''.join(lines), symbol='exec')
539 res = compile_command(''.join(lines), symbol='exec')
521 except (SyntaxError, OverflowError, ValueError, TypeError,
540 except (SyntaxError, OverflowError, ValueError, TypeError,
522 MemoryError, SyntaxWarning):
541 MemoryError, SyntaxWarning):
523 return 'invalid', None
542 return 'invalid', None
524 else:
543 else:
525 if res is None:
544 if res is None:
526 return 'incomplete', find_last_indent(lines)
545 return 'incomplete', find_last_indent(lines)
527 return 'complete', None
546 return 'complete', None
528
547
529
548
530 def find_last_indent(lines):
549 def find_last_indent(lines):
531 m = _indent_re.match(lines[-1])
550 m = _indent_re.match(lines[-1])
532 if not m:
551 if not m:
533 return 0
552 return 0
534 return len(m.group(0).replace('\t', ' '*4))
553 return len(m.group(0).replace('\t', ' '*4))
General Comments 0
You need to be logged in to leave comments. Login now