##// END OF EJS Templates
Factor out handling of line continuations
Thomas Kluyver -
Show More
@@ -1,255 +1,258 b''
1 1 import re
2 2 from typing import List, Tuple
3 3 from IPython.utils import tokenize2
4 4 from IPython.utils.tokenutil import generate_tokens
5 5
6 6 def leading_indent(lines):
7 7 """Remove leading indentation.
8 8
9 9 If the first line starts with a spaces or tabs, the same whitespace will be
10 10 removed from each following line.
11 11 """
12 12 m = re.match(r'^[ \t]+', lines[0])
13 13 if not m:
14 14 return lines
15 15 space = m.group(0)
16 16 n = len(space)
17 17 return [l[n:] if l.startswith(space) else l
18 18 for l in lines]
19 19
20 20 class PromptStripper:
21 21 """Remove matching input prompts from a block of input.
22 22
23 23 Parameters
24 24 ----------
25 25 prompt_re : regular expression
26 26 A regular expression matching any input prompt (including continuation)
27 27 initial_re : regular expression, optional
28 28 A regular expression matching only the initial prompt, but not continuation.
29 29 If no initial expression is given, prompt_re will be used everywhere.
30 30 Used mainly for plain Python prompts, where the continuation prompt
31 31 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
32 32
33 33 If initial_re and prompt_re differ,
34 34 only initial_re will be tested against the first line.
35 35 If any prompt is found on the first two lines,
36 36 prompts will be stripped from the rest of the block.
37 37 """
38 38 def __init__(self, prompt_re, initial_re=None):
39 39 self.prompt_re = prompt_re
40 40 self.initial_re = initial_re or prompt_re
41 41
42 42 def _strip(self, lines):
43 43 return [self.prompt_re.sub('', l, count=1) for l in lines]
44 44
45 45 def __call__(self, lines):
46 46 if self.initial_re.match(lines[0]) or \
47 47 (len(lines) > 1 and self.prompt_re.match(lines[1])):
48 48 return self._strip(lines)
49 49 return lines
50 50
51 51 classic_prompt = PromptStripper(
52 52 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
53 53 initial_re=re.compile(r'^>>>( |$)')
54 54 )
55 55
56 56 ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)'))
57 57
58 58 def cell_magic(lines):
59 59 if not lines[0].startswith('%%'):
60 60 return lines
61 61 if re.match('%%\w+\?', lines[0]):
62 62 # This case will be handled by help_end
63 63 return lines
64 64 magic_name, first_line = lines[0][2:].partition(' ')
65 65 body = '\n'.join(lines[1:])
66 66 return ['get_ipython().run_cell_magic(%r, %r, %r)' % (magic_name, first_line, body)]
67 67
68 68 line_transforms = [
69 69 leading_indent,
70 70 classic_prompt,
71 71 ipython_prompt,
72 72 cell_magic,
73 73 ]
74 74
75 75 # -----
76 76
77 77 def help_end(tokens_by_line):
78 78 pass
79 79
80 80 def escaped_command(tokens_by_line):
81 81 pass
82 82
83 83 def _find_assign_op(token_line):
84 84 # Find the first assignment in the line ('=' not inside brackets)
85 85 # We don't try to support multiple special assignment (a = b = %foo)
86 86 paren_level = 0
87 87 for i, ti in enumerate(token_line):
88 88 s = ti.string
89 89 if s == '=' and paren_level == 0:
90 90 return i
91 91 if s in '([{':
92 92 paren_level += 1
93 93 elif s in ')]}':
94 94 paren_level -= 1
95 95
96 def find_end_of_continued_line(lines, start_line: int):
97 """Find the last line of a line explicitly extended using backslashes.
98
99 Uses 0-indexed line numbers.
100 """
101 end_line = start_line
102 while lines[end_line].endswith('\\\n'):
103 end_line += 1
104 if end_line >= len(lines):
105 break
106 return end_line
107
108 def assemble_continued_line(lines, start: Tuple[int, int], end_line: int):
109 """Assemble pieces of a continued line into a single line.
110
111 Uses 0-indexed line numbers. *start* is (lineno, colno).
112 """
113 parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1]
114 return ' '.join([p[:-2] for p in parts[:-1]] # Strip backslash+newline
115 + [parts[-1][:-1]]) # Strip newline from last line
116
96 117 class MagicAssign:
97 118 @staticmethod
98 119 def find(tokens_by_line):
99 120 """Find the first magic assignment (a = %foo) in the cell.
100 121
101 Returns (line, column) of the % if found, or None.
122 Returns (line, column) of the % if found, or None. *line* is 1-indexed.
102 123 """
103 124 for line in tokens_by_line:
104 125 assign_ix = _find_assign_op(line)
105 126 if (assign_ix is not None) \
106 127 and (len(line) >= assign_ix + 2) \
107 128 and (line[assign_ix+1].string == '%') \
108 129 and (line[assign_ix+2].type == tokenize2.NAME):
109 130 return line[assign_ix+1].start
110 131
111 132 @staticmethod
112 133 def transform(lines: List[str], start: Tuple[int, int]):
113 134 """Transform a magic assignment found by find
114 135 """
115 136 start_line = start[0] - 1 # Shift from 1-index to 0-index
116 137 start_col = start[1]
117
118 print("Start at", start_line, start_col)
119 print("Line", lines[start_line])
120
121 lhs, rhs = lines[start_line][:start_col], lines[start_line][start_col:-1]
138
139 lhs = lines[start_line][:start_col]
140 end_line = find_end_of_continued_line(lines, start_line)
141 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
122 142 assert rhs.startswith('%'), rhs
123 143 magic_name, _, args = rhs[1:].partition(' ')
124 args_parts = [args]
125 end_line = start_line
126 # Follow explicit (backslash) line continuations
127 while end_line < len(lines) and args_parts[-1].endswith('\\'):
128 end_line += 1
129 args_parts[-1] = args_parts[-1][:-1] # Trim backslash
130 args_parts.append(lines[end_line][:-1]) # Trim newline
131 args = ' '.join(args_parts)
132 144
133 145 lines_before = lines[:start_line]
134 146 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
135 147 new_line = lhs + call + '\n'
136 148 lines_after = lines[end_line+1:]
137 149
138 150 return lines_before + [new_line] + lines_after
139 151
140 152
141 153 class SystemAssign:
142 154 @staticmethod
143 155 def find(tokens_by_line):
144 156 """Find the first system assignment (a = !foo) in the cell.
145 157
146 Returns (line, column) of the ! if found, or None.
158 Returns (line, column) of the ! if found, or None. *line* is 1-indexed.
147 159 """
148 160 for line in tokens_by_line:
149 161 assign_ix = _find_assign_op(line)
150 162 if (assign_ix is not None) \
151 163 and (len(line) >= assign_ix + 2) \
152 164 and (line[assign_ix + 1].type == tokenize2.ERRORTOKEN):
153 165 ix = assign_ix + 1
154 166
155 167 while ix < len(line) and line[ix].type == tokenize2.ERRORTOKEN:
156 168 if line[ix].string == '!':
157 169 return line[ix].start
158 170 elif not line[ix].string.isspace():
159 171 break
160 172 ix += 1
161 173
162 174 @staticmethod
163 175 def transform(lines: List[str], start: Tuple[int, int]):
164 176 """Transform a system assignment found by find
165 177 """
166 178 start_line = start[0] - 1 # Shift from 1-index to 0-index
167 179 start_col = start[1]
168 180
169 print("Start at", start_line, start_col)
170 print("Line", lines[start_line])
171
172 lhs, rhs = lines[start_line][:start_col], lines[start_line][
173 start_col:-1]
181 lhs = lines[start_line][:start_col]
182 end_line = find_end_of_continued_line(lines, start_line)
183 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
174 184 assert rhs.startswith('!'), rhs
175 cmd_parts = [rhs[1:]]
176 end_line = start_line
177 # Follow explicit (backslash) line continuations
178 while end_line < len(lines) and cmd_parts[-1].endswith('\\'):
179 end_line += 1
180 cmd_parts[-1] = cmd_parts[-1][:-1] # Trim backslash
181 cmd_parts.append(lines[end_line][:-1]) # Trim newline
182 cmd = ' '.join(cmd_parts)
185 cmd = rhs[1:]
183 186
184 187 lines_before = lines[:start_line]
185 188 call = "get_ipython().getoutput({!r})".format(cmd)
186 189 new_line = lhs + call + '\n'
187 190 lines_after = lines[end_line + 1:]
188 191
189 192 return lines_before + [new_line] + lines_after
190 193
191 194 def make_tokens_by_line(lines):
192 195 tokens_by_line = [[]]
193 196 for token in generate_tokens(iter(lines).__next__):
194 197 tokens_by_line[-1].append(token)
195 198 if token.type == tokenize2.NEWLINE:
196 199 tokens_by_line.append([])
197 200
198 201 return tokens_by_line
199 202
200 203 class TokenTransformers:
201 204 def __init__(self):
202 205 self.transformers = [
203 206 MagicAssign,
204 207 SystemAssign,
205 208 ]
206 209
207 210 def do_one_transform(self, lines):
208 211 """Find and run the transform earliest in the code.
209 212
210 213 Returns (changed, lines).
211 214
212 215 This method is called repeatedly until changed is False, indicating
213 216 that all available transformations are complete.
214 217
215 218 The tokens following IPython special syntax might not be valid, so
216 219 the transformed code is retokenised every time to identify the next
217 220 piece of special syntax. Hopefully long code cells are mostly valid
218 221 Python, not using lots of IPython special syntax, so this shouldn't be
219 222 a performance issue.
220 223 """
221 224 tokens_by_line = make_tokens_by_line(lines)
222 225 candidates = []
223 226 for transformer in self.transformers:
224 227 locn = transformer.find(tokens_by_line)
225 228 if locn:
226 229 candidates.append((locn, transformer))
227 230
228 231 if not candidates:
229 232 # Nothing to transform
230 233 return False, lines
231 234
232 235 first_locn, transformer = min(candidates)
233 236 return True, transformer.transform(lines, first_locn)
234 237
235 238 def __call__(self, lines):
236 239 while True:
237 240 changed, lines = self.do_one_transform(lines)
238 241 if not changed:
239 242 return lines
240 243
241 244 def assign_from_system(tokens_by_line, lines):
242 245 pass
243 246
244 247
245 248 def transform_cell(cell):
246 249 if not cell.endswith('\n'):
247 250 cell += '\n' # Ensure every line has a newline
248 251 lines = cell.splitlines(keepends=True)
249 252 for transform in line_transforms:
250 253 #print(transform, lines)
251 254 lines = transform(lines)
252 255
253 256 lines = TokenTransformers()(lines)
254 257 for line in lines:
255 258 print('~~', line)
@@ -1,54 +1,60 b''
1 1 import nose.tools as nt
2 2
3 3 from IPython.core import inputtransformer2 as ipt2
4 4 from IPython.core.inputtransformer2 import make_tokens_by_line
5 5
6 6 MULTILINE_MAGIC_ASSIGN = ("""\
7 7 a = f()
8 8 b = %foo \\
9 9 bar
10 10 g()
11 11 """.splitlines(keepends=True), """\
12 12 a = f()
13 13 b = get_ipython().run_line_magic('foo', ' bar')
14 14 g()
15 15 """.splitlines(keepends=True))
16 16
17 17 MULTILINE_SYSTEM_ASSIGN = ("""\
18 18 a = f()
19 19 b = !foo \\
20 20 bar
21 21 g()
22 22 """.splitlines(keepends=True), """\
23 23 a = f()
24 24 b = get_ipython().getoutput('foo bar')
25 25 g()
26 26 """.splitlines(keepends=True))
27 27
28 def test_continued_line():
29 lines = MULTILINE_MAGIC_ASSIGN[0]
30 nt.assert_equal(ipt2.find_end_of_continued_line(lines, 1), 2)
31
32 nt.assert_equal(ipt2.assemble_continued_line(lines, (1, 5), 2), "foo bar")
33
28 34 def test_find_assign_magic():
29 35 tbl = make_tokens_by_line(MULTILINE_MAGIC_ASSIGN[0])
30 36 nt.assert_equal(ipt2.MagicAssign.find(tbl), (2, 4))
31 37
32 38 tbl = make_tokens_by_line(MULTILINE_SYSTEM_ASSIGN[0]) # Nothing to find
33 39 nt.assert_equal(ipt2.MagicAssign.find(tbl), None)
34 40
35 41 def test_transform_assign_magic():
36 42 res = ipt2.MagicAssign.transform(MULTILINE_MAGIC_ASSIGN[0], (2, 4))
37 43 nt.assert_equal(res, MULTILINE_MAGIC_ASSIGN[1])
38 44
39 45 def test_find_assign_system():
40 46 tbl = make_tokens_by_line(MULTILINE_SYSTEM_ASSIGN[0])
41 47 nt.assert_equal(ipt2.SystemAssign.find(tbl), (2, 4))
42 48
43 49 tbl = make_tokens_by_line(["a = !ls\n"])
44 50 nt.assert_equal(ipt2.SystemAssign.find(tbl), (1, 5))
45 51
46 52 tbl = make_tokens_by_line(["a=!ls\n"])
47 53 nt.assert_equal(ipt2.SystemAssign.find(tbl), (1, 2))
48 54
49 55 tbl = make_tokens_by_line(MULTILINE_MAGIC_ASSIGN[0]) # Nothing to find
50 56 nt.assert_equal(ipt2.SystemAssign.find(tbl), None)
51 57
52 58 def test_transform_assign_system():
53 59 res = ipt2.SystemAssign.transform(MULTILINE_SYSTEM_ASSIGN[0], (2, 4))
54 60 nt.assert_equal(res, MULTILINE_SYSTEM_ASSIGN[1])
General Comments 0
You need to be logged in to leave comments. Login now