##// END OF EJS Templates
Add transformation for system assignments
Thomas Kluyver -
Show More
@@ -1,203 +1,255 b''
1 import re
1 import re
2 from typing import List, Tuple
2 from typing import List, Tuple
3 from IPython.utils import tokenize2
3 from IPython.utils import tokenize2
4 from IPython.utils.tokenutil import generate_tokens
4 from IPython.utils.tokenutil import generate_tokens
5
5
6 def leading_indent(lines):
6 def leading_indent(lines):
7 """Remove leading indentation.
7 """Remove leading indentation.
8
8
9 If the first line starts with a spaces or tabs, the same whitespace will be
9 If the first line starts with a spaces or tabs, the same whitespace will be
10 removed from each following line.
10 removed from each following line.
11 """
11 """
12 m = re.match(r'^[ \t]+', lines[0])
12 m = re.match(r'^[ \t]+', lines[0])
13 if not m:
13 if not m:
14 return lines
14 return lines
15 space = m.group(0)
15 space = m.group(0)
16 n = len(space)
16 n = len(space)
17 return [l[n:] if l.startswith(space) else l
17 return [l[n:] if l.startswith(space) else l
18 for l in lines]
18 for l in lines]
19
19
20 class PromptStripper:
20 class PromptStripper:
21 """Remove matching input prompts from a block of input.
21 """Remove matching input prompts from a block of input.
22
22
23 Parameters
23 Parameters
24 ----------
24 ----------
25 prompt_re : regular expression
25 prompt_re : regular expression
26 A regular expression matching any input prompt (including continuation)
26 A regular expression matching any input prompt (including continuation)
27 initial_re : regular expression, optional
27 initial_re : regular expression, optional
28 A regular expression matching only the initial prompt, but not continuation.
28 A regular expression matching only the initial prompt, but not continuation.
29 If no initial expression is given, prompt_re will be used everywhere.
29 If no initial expression is given, prompt_re will be used everywhere.
30 Used mainly for plain Python prompts, where the continuation prompt
30 Used mainly for plain Python prompts, where the continuation prompt
31 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
31 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
32
32
33 If initial_re and prompt_re differ,
33 If initial_re and prompt_re differ,
34 only initial_re will be tested against the first line.
34 only initial_re will be tested against the first line.
35 If any prompt is found on the first two lines,
35 If any prompt is found on the first two lines,
36 prompts will be stripped from the rest of the block.
36 prompts will be stripped from the rest of the block.
37 """
37 """
38 def __init__(self, prompt_re, initial_re=None):
38 def __init__(self, prompt_re, initial_re=None):
39 self.prompt_re = prompt_re
39 self.prompt_re = prompt_re
40 self.initial_re = initial_re or prompt_re
40 self.initial_re = initial_re or prompt_re
41
41
42 def _strip(self, lines):
42 def _strip(self, lines):
43 return [self.prompt_re.sub('', l, count=1) for l in lines]
43 return [self.prompt_re.sub('', l, count=1) for l in lines]
44
44
45 def __call__(self, lines):
45 def __call__(self, lines):
46 if self.initial_re.match(lines[0]) or \
46 if self.initial_re.match(lines[0]) or \
47 (len(lines) > 1 and self.prompt_re.match(lines[1])):
47 (len(lines) > 1 and self.prompt_re.match(lines[1])):
48 return self._strip(lines)
48 return self._strip(lines)
49 return lines
49 return lines
50
50
51 classic_prompt = PromptStripper(
51 classic_prompt = PromptStripper(
52 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
52 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
53 initial_re=re.compile(r'^>>>( |$)')
53 initial_re=re.compile(r'^>>>( |$)')
54 )
54 )
55
55
56 ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)'))
56 ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)'))
57
57
58 def cell_magic(lines):
58 def cell_magic(lines):
59 if not lines[0].startswith('%%'):
59 if not lines[0].startswith('%%'):
60 return lines
60 return lines
61 if re.match('%%\w+\?', lines[0]):
61 if re.match('%%\w+\?', lines[0]):
62 # This case will be handled by help_end
62 # This case will be handled by help_end
63 return lines
63 return lines
64 magic_name, first_line = lines[0][2:].partition(' ')
64 magic_name, first_line = lines[0][2:].partition(' ')
65 body = '\n'.join(lines[1:])
65 body = '\n'.join(lines[1:])
66 return ['get_ipython().run_cell_magic(%r, %r, %r)' % (magic_name, first_line, body)]
66 return ['get_ipython().run_cell_magic(%r, %r, %r)' % (magic_name, first_line, body)]
67
67
68 line_transforms = [
68 line_transforms = [
69 leading_indent,
69 leading_indent,
70 classic_prompt,
70 classic_prompt,
71 ipython_prompt,
71 ipython_prompt,
72 cell_magic,
72 cell_magic,
73 ]
73 ]
74
74
75 # -----
75 # -----
76
76
77 def help_end(tokens_by_line):
77 def help_end(tokens_by_line):
78 pass
78 pass
79
79
80 def escaped_command(tokens_by_line):
80 def escaped_command(tokens_by_line):
81 pass
81 pass
82
82
83 def _find_assign_op(token_line):
83 def _find_assign_op(token_line):
84 # Find the first assignment in the line ('=' not inside brackets)
84 # Find the first assignment in the line ('=' not inside brackets)
85 # We don't try to support multiple special assignment (a = b = %foo)
85 # We don't try to support multiple special assignment (a = b = %foo)
86 paren_level = 0
86 paren_level = 0
87 for i, ti in enumerate(token_line):
87 for i, ti in enumerate(token_line):
88 s = ti.string
88 s = ti.string
89 if s == '=' and paren_level == 0:
89 if s == '=' and paren_level == 0:
90 return i
90 return i
91 if s in '([{':
91 if s in '([{':
92 paren_level += 1
92 paren_level += 1
93 elif s in ')]}':
93 elif s in ')]}':
94 paren_level -= 1
94 paren_level -= 1
95
95
96 class MagicAssign:
96 class MagicAssign:
97 @staticmethod
97 @staticmethod
98 def find(tokens_by_line):
98 def find(tokens_by_line):
99 """Find the first magic assignment (a = %foo) in the cell.
99 """Find the first magic assignment (a = %foo) in the cell.
100
100
101 Returns (line, column) of the % if found, or None.
101 Returns (line, column) of the % if found, or None.
102 """
102 """
103 for line in tokens_by_line:
103 for line in tokens_by_line:
104 assign_ix = _find_assign_op(line)
104 assign_ix = _find_assign_op(line)
105 if (assign_ix is not None) \
105 if (assign_ix is not None) \
106 and (len(line) >= assign_ix + 2) \
106 and (len(line) >= assign_ix + 2) \
107 and (line[assign_ix+1].string == '%') \
107 and (line[assign_ix+1].string == '%') \
108 and (line[assign_ix+2].type == tokenize2.NAME):
108 and (line[assign_ix+2].type == tokenize2.NAME):
109 return line[assign_ix+1].start
109 return line[assign_ix+1].start
110
110
111 @staticmethod
111 @staticmethod
112 def transform(lines: List[str], start: Tuple[int, int]):
112 def transform(lines: List[str], start: Tuple[int, int]):
113 """Transform a magic assignment found by find
113 """Transform a magic assignment found by find
114 """
114 """
115 start_line = start[0] - 1 # Shift from 1-index to 0-index
115 start_line = start[0] - 1 # Shift from 1-index to 0-index
116 start_col = start[1]
116 start_col = start[1]
117
117
118 print("Start at", start_line, start_col)
118 print("Start at", start_line, start_col)
119 print("Line", lines[start_line])
119 print("Line", lines[start_line])
120
120
121 lhs, rhs = lines[start_line][:start_col], lines[start_line][start_col:-1]
121 lhs, rhs = lines[start_line][:start_col], lines[start_line][start_col:-1]
122 assert rhs.startswith('%'), rhs
122 assert rhs.startswith('%'), rhs
123 magic_name, _, args = rhs[1:].partition(' ')
123 magic_name, _, args = rhs[1:].partition(' ')
124 args_parts = [args]
124 args_parts = [args]
125 end_line = start_line
125 end_line = start_line
126 # Follow explicit (backslash) line continuations
126 # Follow explicit (backslash) line continuations
127 while end_line < len(lines) and args_parts[-1].endswith('\\'):
127 while end_line < len(lines) and args_parts[-1].endswith('\\'):
128 end_line += 1
128 end_line += 1
129 args_parts[-1] = args_parts[-1][:-1] # Trim backslash
129 args_parts[-1] = args_parts[-1][:-1] # Trim backslash
130 args_parts.append(lines[end_line][:-1]) # Trim newline
130 args_parts.append(lines[end_line][:-1]) # Trim newline
131 args = ' '.join(args_parts)
131 args = ' '.join(args_parts)
132
132
133 lines_before = lines[:start_line]
133 lines_before = lines[:start_line]
134 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
134 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
135 new_line = lhs + call + '\n'
135 new_line = lhs + call + '\n'
136 lines_after = lines[end_line+1:]
136 lines_after = lines[end_line+1:]
137
137
138 return lines_before + [new_line] + lines_after
138 return lines_before + [new_line] + lines_after
139
139
140
141 class SystemAssign:
142 @staticmethod
143 def find(tokens_by_line):
144 """Find the first system assignment (a = !foo) in the cell.
145
146 Returns (line, column) of the ! if found, or None.
147 """
148 for line in tokens_by_line:
149 assign_ix = _find_assign_op(line)
150 if (assign_ix is not None) \
151 and (len(line) >= assign_ix + 2) \
152 and (line[assign_ix + 1].type == tokenize2.ERRORTOKEN):
153 ix = assign_ix + 1
154
155 while ix < len(line) and line[ix].type == tokenize2.ERRORTOKEN:
156 if line[ix].string == '!':
157 return line[ix].start
158 elif not line[ix].string.isspace():
159 break
160 ix += 1
161
162 @staticmethod
163 def transform(lines: List[str], start: Tuple[int, int]):
164 """Transform a system assignment found by find
165 """
166 start_line = start[0] - 1 # Shift from 1-index to 0-index
167 start_col = start[1]
168
169 print("Start at", start_line, start_col)
170 print("Line", lines[start_line])
171
172 lhs, rhs = lines[start_line][:start_col], lines[start_line][
173 start_col:-1]
174 assert rhs.startswith('!'), rhs
175 cmd_parts = [rhs[1:]]
176 end_line = start_line
177 # Follow explicit (backslash) line continuations
178 while end_line < len(lines) and cmd_parts[-1].endswith('\\'):
179 end_line += 1
180 cmd_parts[-1] = cmd_parts[-1][:-1] # Trim backslash
181 cmd_parts.append(lines[end_line][:-1]) # Trim newline
182 cmd = ' '.join(cmd_parts)
183
184 lines_before = lines[:start_line]
185 call = "get_ipython().getoutput({!r})".format(cmd)
186 new_line = lhs + call + '\n'
187 lines_after = lines[end_line + 1:]
188
189 return lines_before + [new_line] + lines_after
190
140 def make_tokens_by_line(lines):
191 def make_tokens_by_line(lines):
141 tokens_by_line = [[]]
192 tokens_by_line = [[]]
142 for token in generate_tokens(iter(lines).__next__):
193 for token in generate_tokens(iter(lines).__next__):
143 tokens_by_line[-1].append(token)
194 tokens_by_line[-1].append(token)
144 if token.type == tokenize2.NEWLINE:
195 if token.type == tokenize2.NEWLINE:
145 tokens_by_line.append([])
196 tokens_by_line.append([])
146
197
147 return tokens_by_line
198 return tokens_by_line
148
199
149 class TokenTransformers:
200 class TokenTransformers:
150 def __init__(self):
201 def __init__(self):
151 self.transformers = [
202 self.transformers = [
152 MagicAssign
203 MagicAssign,
204 SystemAssign,
153 ]
205 ]
154
206
155 def do_one_transform(self, lines):
207 def do_one_transform(self, lines):
156 """Find and run the transform earliest in the code.
208 """Find and run the transform earliest in the code.
157
209
158 Returns (changed, lines).
210 Returns (changed, lines).
159
211
160 This method is called repeatedly until changed is False, indicating
212 This method is called repeatedly until changed is False, indicating
161 that all available transformations are complete.
213 that all available transformations are complete.
162
214
163 The tokens following IPython special syntax might not be valid, so
215 The tokens following IPython special syntax might not be valid, so
164 the transformed code is retokenised every time to identify the next
216 the transformed code is retokenised every time to identify the next
165 piece of special syntax. Hopefully long code cells are mostly valid
217 piece of special syntax. Hopefully long code cells are mostly valid
166 Python, not using lots of IPython special syntax, so this shouldn't be
218 Python, not using lots of IPython special syntax, so this shouldn't be
167 a performance issue.
219 a performance issue.
168 """
220 """
169 tokens_by_line = make_tokens_by_line(lines)
221 tokens_by_line = make_tokens_by_line(lines)
170 candidates = []
222 candidates = []
171 for transformer in self.transformers:
223 for transformer in self.transformers:
172 locn = transformer.find(tokens_by_line)
224 locn = transformer.find(tokens_by_line)
173 if locn:
225 if locn:
174 candidates.append((locn, transformer))
226 candidates.append((locn, transformer))
175
227
176 if not candidates:
228 if not candidates:
177 # Nothing to transform
229 # Nothing to transform
178 return False, lines
230 return False, lines
179
231
180 first_locn, transformer = min(candidates)
232 first_locn, transformer = min(candidates)
181 return True, transformer.transform(lines, first_locn)
233 return True, transformer.transform(lines, first_locn)
182
234
183 def __call__(self, lines):
235 def __call__(self, lines):
184 while True:
236 while True:
185 changed, lines = self.do_one_transform(lines)
237 changed, lines = self.do_one_transform(lines)
186 if not changed:
238 if not changed:
187 return lines
239 return lines
188
240
189 def assign_from_system(tokens_by_line, lines):
241 def assign_from_system(tokens_by_line, lines):
190 pass
242 pass
191
243
192
244
193 def transform_cell(cell):
245 def transform_cell(cell):
194 if not cell.endswith('\n'):
246 if not cell.endswith('\n'):
195 cell += '\n' # Ensure every line has a newline
247 cell += '\n' # Ensure every line has a newline
196 lines = cell.splitlines(keepends=True)
248 lines = cell.splitlines(keepends=True)
197 for transform in line_transforms:
249 for transform in line_transforms:
198 #print(transform, lines)
250 #print(transform, lines)
199 lines = transform(lines)
251 lines = transform(lines)
200
252
201 lines = TokenTransformers()(lines)
253 lines = TokenTransformers()(lines)
202 for line in lines:
254 for line in lines:
203 print('~~', line)
255 print('~~', line)
@@ -1,37 +1,54 b''
1 import nose.tools as nt
1 import nose.tools as nt
2
2
3 from IPython.core import inputtransformer2
3 from IPython.core import inputtransformer2 as ipt2
4 from IPython.core.inputtransformer2 import make_tokens_by_line
4 from IPython.core.inputtransformer2 import make_tokens_by_line
5
5
6 MULTILINE_MAGIC_ASSIGN = ("""\
6 MULTILINE_MAGIC_ASSIGN = ("""\
7 a = f()
7 a = f()
8 b = %foo \\
8 b = %foo \\
9 bar
9 bar
10 g()
10 g()
11 """.splitlines(keepends=True), """\
11 """.splitlines(keepends=True), """\
12 a = f()
12 a = f()
13 b = get_ipython().run_line_magic('foo', ' bar')
13 b = get_ipython().run_line_magic('foo', ' bar')
14 g()
14 g()
15 """.splitlines(keepends=True))
15 """.splitlines(keepends=True))
16
16
17 MULTILINE_SYSTEM_ASSIGN = ("""\
17 MULTILINE_SYSTEM_ASSIGN = ("""\
18 a = f()
18 a = f()
19 b = !foo \\
19 b = !foo \\
20 bar
20 bar
21 g()
21 g()
22 """.splitlines(keepends=True), """\
22 """.splitlines(keepends=True), """\
23 a = f()
23 a = f()
24 b = get_ipython().getoutput('foo bar')
24 b = get_ipython().getoutput('foo bar')
25 g()
25 g()
26 """.splitlines(keepends=True))
26 """.splitlines(keepends=True))
27
27
28 def test_find_assign_magic():
28 def test_find_assign_magic():
29 tbl = make_tokens_by_line(MULTILINE_MAGIC_ASSIGN[0])
29 tbl = make_tokens_by_line(MULTILINE_MAGIC_ASSIGN[0])
30 nt.assert_equal(inputtransformer2.MagicAssign.find(tbl), (2, 4))
30 nt.assert_equal(ipt2.MagicAssign.find(tbl), (2, 4))
31
31
32 tbl = make_tokens_by_line(MULTILINE_SYSTEM_ASSIGN[0]) # Nothing to find
32 tbl = make_tokens_by_line(MULTILINE_SYSTEM_ASSIGN[0]) # Nothing to find
33 nt.assert_equal(inputtransformer2.MagicAssign.find(tbl), None)
33 nt.assert_equal(ipt2.MagicAssign.find(tbl), None)
34
34
35 def test_transform_assign_magic():
35 def test_transform_assign_magic():
36 res = inputtransformer2.MagicAssign.transform(MULTILINE_MAGIC_ASSIGN[0], (2, 4))
36 res = ipt2.MagicAssign.transform(MULTILINE_MAGIC_ASSIGN[0], (2, 4))
37 nt.assert_equal(res, MULTILINE_MAGIC_ASSIGN[1])
37 nt.assert_equal(res, MULTILINE_MAGIC_ASSIGN[1])
38
39 def test_find_assign_system():
40 tbl = make_tokens_by_line(MULTILINE_SYSTEM_ASSIGN[0])
41 nt.assert_equal(ipt2.SystemAssign.find(tbl), (2, 4))
42
43 tbl = make_tokens_by_line(["a = !ls\n"])
44 nt.assert_equal(ipt2.SystemAssign.find(tbl), (1, 5))
45
46 tbl = make_tokens_by_line(["a=!ls\n"])
47 nt.assert_equal(ipt2.SystemAssign.find(tbl), (1, 2))
48
49 tbl = make_tokens_by_line(MULTILINE_MAGIC_ASSIGN[0]) # Nothing to find
50 nt.assert_equal(ipt2.SystemAssign.find(tbl), None)
51
52 def test_transform_assign_system():
53 res = ipt2.SystemAssign.transform(MULTILINE_SYSTEM_ASSIGN[0], (2, 4))
54 nt.assert_equal(res, MULTILINE_SYSTEM_ASSIGN[1])
General Comments 0
You need to be logged in to leave comments. Login now