##// END OF EJS Templates
Factor out handling of line continuations
Thomas Kluyver -
Show More
@@ -1,255 +1,258 b''
1 import re
1 import re
2 from typing import List, Tuple
2 from typing import List, Tuple
3 from IPython.utils import tokenize2
3 from IPython.utils import tokenize2
4 from IPython.utils.tokenutil import generate_tokens
4 from IPython.utils.tokenutil import generate_tokens
5
5
6 def leading_indent(lines):
6 def leading_indent(lines):
7 """Remove leading indentation.
7 """Remove leading indentation.
8
8
9 If the first line starts with a spaces or tabs, the same whitespace will be
9 If the first line starts with a spaces or tabs, the same whitespace will be
10 removed from each following line.
10 removed from each following line.
11 """
11 """
12 m = re.match(r'^[ \t]+', lines[0])
12 m = re.match(r'^[ \t]+', lines[0])
13 if not m:
13 if not m:
14 return lines
14 return lines
15 space = m.group(0)
15 space = m.group(0)
16 n = len(space)
16 n = len(space)
17 return [l[n:] if l.startswith(space) else l
17 return [l[n:] if l.startswith(space) else l
18 for l in lines]
18 for l in lines]
19
19
20 class PromptStripper:
20 class PromptStripper:
21 """Remove matching input prompts from a block of input.
21 """Remove matching input prompts from a block of input.
22
22
23 Parameters
23 Parameters
24 ----------
24 ----------
25 prompt_re : regular expression
25 prompt_re : regular expression
26 A regular expression matching any input prompt (including continuation)
26 A regular expression matching any input prompt (including continuation)
27 initial_re : regular expression, optional
27 initial_re : regular expression, optional
28 A regular expression matching only the initial prompt, but not continuation.
28 A regular expression matching only the initial prompt, but not continuation.
29 If no initial expression is given, prompt_re will be used everywhere.
29 If no initial expression is given, prompt_re will be used everywhere.
30 Used mainly for plain Python prompts, where the continuation prompt
30 Used mainly for plain Python prompts, where the continuation prompt
31 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
31 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
32
32
33 If initial_re and prompt_re differ,
33 If initial_re and prompt_re differ,
34 only initial_re will be tested against the first line.
34 only initial_re will be tested against the first line.
35 If any prompt is found on the first two lines,
35 If any prompt is found on the first two lines,
36 prompts will be stripped from the rest of the block.
36 prompts will be stripped from the rest of the block.
37 """
37 """
38 def __init__(self, prompt_re, initial_re=None):
38 def __init__(self, prompt_re, initial_re=None):
39 self.prompt_re = prompt_re
39 self.prompt_re = prompt_re
40 self.initial_re = initial_re or prompt_re
40 self.initial_re = initial_re or prompt_re
41
41
42 def _strip(self, lines):
42 def _strip(self, lines):
43 return [self.prompt_re.sub('', l, count=1) for l in lines]
43 return [self.prompt_re.sub('', l, count=1) for l in lines]
44
44
45 def __call__(self, lines):
45 def __call__(self, lines):
46 if self.initial_re.match(lines[0]) or \
46 if self.initial_re.match(lines[0]) or \
47 (len(lines) > 1 and self.prompt_re.match(lines[1])):
47 (len(lines) > 1 and self.prompt_re.match(lines[1])):
48 return self._strip(lines)
48 return self._strip(lines)
49 return lines
49 return lines
50
50
51 classic_prompt = PromptStripper(
51 classic_prompt = PromptStripper(
52 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
52 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
53 initial_re=re.compile(r'^>>>( |$)')
53 initial_re=re.compile(r'^>>>( |$)')
54 )
54 )
55
55
56 ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)'))
56 ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)'))
57
57
58 def cell_magic(lines):
58 def cell_magic(lines):
59 if not lines[0].startswith('%%'):
59 if not lines[0].startswith('%%'):
60 return lines
60 return lines
61 if re.match('%%\w+\?', lines[0]):
61 if re.match('%%\w+\?', lines[0]):
62 # This case will be handled by help_end
62 # This case will be handled by help_end
63 return lines
63 return lines
64 magic_name, first_line = lines[0][2:].partition(' ')
64 magic_name, first_line = lines[0][2:].partition(' ')
65 body = '\n'.join(lines[1:])
65 body = '\n'.join(lines[1:])
66 return ['get_ipython().run_cell_magic(%r, %r, %r)' % (magic_name, first_line, body)]
66 return ['get_ipython().run_cell_magic(%r, %r, %r)' % (magic_name, first_line, body)]
67
67
68 line_transforms = [
68 line_transforms = [
69 leading_indent,
69 leading_indent,
70 classic_prompt,
70 classic_prompt,
71 ipython_prompt,
71 ipython_prompt,
72 cell_magic,
72 cell_magic,
73 ]
73 ]
74
74
75 # -----
75 # -----
76
76
77 def help_end(tokens_by_line):
77 def help_end(tokens_by_line):
78 pass
78 pass
79
79
80 def escaped_command(tokens_by_line):
80 def escaped_command(tokens_by_line):
81 pass
81 pass
82
82
83 def _find_assign_op(token_line):
83 def _find_assign_op(token_line):
84 # Find the first assignment in the line ('=' not inside brackets)
84 # Find the first assignment in the line ('=' not inside brackets)
85 # We don't try to support multiple special assignment (a = b = %foo)
85 # We don't try to support multiple special assignment (a = b = %foo)
86 paren_level = 0
86 paren_level = 0
87 for i, ti in enumerate(token_line):
87 for i, ti in enumerate(token_line):
88 s = ti.string
88 s = ti.string
89 if s == '=' and paren_level == 0:
89 if s == '=' and paren_level == 0:
90 return i
90 return i
91 if s in '([{':
91 if s in '([{':
92 paren_level += 1
92 paren_level += 1
93 elif s in ')]}':
93 elif s in ')]}':
94 paren_level -= 1
94 paren_level -= 1
95
95
96 def find_end_of_continued_line(lines, start_line: int):
97 """Find the last line of a line explicitly extended using backslashes.
98
99 Uses 0-indexed line numbers.
100 """
101 end_line = start_line
102 while lines[end_line].endswith('\\\n'):
103 end_line += 1
104 if end_line >= len(lines):
105 break
106 return end_line
107
108 def assemble_continued_line(lines, start: Tuple[int, int], end_line: int):
109 """Assemble pieces of a continued line into a single line.
110
111 Uses 0-indexed line numbers. *start* is (lineno, colno).
112 """
113 parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1]
114 return ' '.join([p[:-2] for p in parts[:-1]] # Strip backslash+newline
115 + [parts[-1][:-1]]) # Strip newline from last line
116
96 class MagicAssign:
117 class MagicAssign:
97 @staticmethod
118 @staticmethod
98 def find(tokens_by_line):
119 def find(tokens_by_line):
99 """Find the first magic assignment (a = %foo) in the cell.
120 """Find the first magic assignment (a = %foo) in the cell.
100
121
101 Returns (line, column) of the % if found, or None.
122 Returns (line, column) of the % if found, or None. *line* is 1-indexed.
102 """
123 """
103 for line in tokens_by_line:
124 for line in tokens_by_line:
104 assign_ix = _find_assign_op(line)
125 assign_ix = _find_assign_op(line)
105 if (assign_ix is not None) \
126 if (assign_ix is not None) \
106 and (len(line) >= assign_ix + 2) \
127 and (len(line) >= assign_ix + 2) \
107 and (line[assign_ix+1].string == '%') \
128 and (line[assign_ix+1].string == '%') \
108 and (line[assign_ix+2].type == tokenize2.NAME):
129 and (line[assign_ix+2].type == tokenize2.NAME):
109 return line[assign_ix+1].start
130 return line[assign_ix+1].start
110
131
111 @staticmethod
132 @staticmethod
112 def transform(lines: List[str], start: Tuple[int, int]):
133 def transform(lines: List[str], start: Tuple[int, int]):
113 """Transform a magic assignment found by find
134 """Transform a magic assignment found by find
114 """
135 """
115 start_line = start[0] - 1 # Shift from 1-index to 0-index
136 start_line = start[0] - 1 # Shift from 1-index to 0-index
116 start_col = start[1]
137 start_col = start[1]
117
138
118 print("Start at", start_line, start_col)
139 lhs = lines[start_line][:start_col]
119 print("Line", lines[start_line])
140 end_line = find_end_of_continued_line(lines, start_line)
120
141 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
121 lhs, rhs = lines[start_line][:start_col], lines[start_line][start_col:-1]
122 assert rhs.startswith('%'), rhs
142 assert rhs.startswith('%'), rhs
123 magic_name, _, args = rhs[1:].partition(' ')
143 magic_name, _, args = rhs[1:].partition(' ')
124 args_parts = [args]
125 end_line = start_line
126 # Follow explicit (backslash) line continuations
127 while end_line < len(lines) and args_parts[-1].endswith('\\'):
128 end_line += 1
129 args_parts[-1] = args_parts[-1][:-1] # Trim backslash
130 args_parts.append(lines[end_line][:-1]) # Trim newline
131 args = ' '.join(args_parts)
132
144
133 lines_before = lines[:start_line]
145 lines_before = lines[:start_line]
134 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
146 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
135 new_line = lhs + call + '\n'
147 new_line = lhs + call + '\n'
136 lines_after = lines[end_line+1:]
148 lines_after = lines[end_line+1:]
137
149
138 return lines_before + [new_line] + lines_after
150 return lines_before + [new_line] + lines_after
139
151
140
152
141 class SystemAssign:
153 class SystemAssign:
142 @staticmethod
154 @staticmethod
143 def find(tokens_by_line):
155 def find(tokens_by_line):
144 """Find the first system assignment (a = !foo) in the cell.
156 """Find the first system assignment (a = !foo) in the cell.
145
157
146 Returns (line, column) of the ! if found, or None.
158 Returns (line, column) of the ! if found, or None. *line* is 1-indexed.
147 """
159 """
148 for line in tokens_by_line:
160 for line in tokens_by_line:
149 assign_ix = _find_assign_op(line)
161 assign_ix = _find_assign_op(line)
150 if (assign_ix is not None) \
162 if (assign_ix is not None) \
151 and (len(line) >= assign_ix + 2) \
163 and (len(line) >= assign_ix + 2) \
152 and (line[assign_ix + 1].type == tokenize2.ERRORTOKEN):
164 and (line[assign_ix + 1].type == tokenize2.ERRORTOKEN):
153 ix = assign_ix + 1
165 ix = assign_ix + 1
154
166
155 while ix < len(line) and line[ix].type == tokenize2.ERRORTOKEN:
167 while ix < len(line) and line[ix].type == tokenize2.ERRORTOKEN:
156 if line[ix].string == '!':
168 if line[ix].string == '!':
157 return line[ix].start
169 return line[ix].start
158 elif not line[ix].string.isspace():
170 elif not line[ix].string.isspace():
159 break
171 break
160 ix += 1
172 ix += 1
161
173
162 @staticmethod
174 @staticmethod
163 def transform(lines: List[str], start: Tuple[int, int]):
175 def transform(lines: List[str], start: Tuple[int, int]):
164 """Transform a system assignment found by find
176 """Transform a system assignment found by find
165 """
177 """
166 start_line = start[0] - 1 # Shift from 1-index to 0-index
178 start_line = start[0] - 1 # Shift from 1-index to 0-index
167 start_col = start[1]
179 start_col = start[1]
168
180
169 print("Start at", start_line, start_col)
181 lhs = lines[start_line][:start_col]
170 print("Line", lines[start_line])
182 end_line = find_end_of_continued_line(lines, start_line)
171
183 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
172 lhs, rhs = lines[start_line][:start_col], lines[start_line][
173 start_col:-1]
174 assert rhs.startswith('!'), rhs
184 assert rhs.startswith('!'), rhs
175 cmd_parts = [rhs[1:]]
185 cmd = rhs[1:]
176 end_line = start_line
177 # Follow explicit (backslash) line continuations
178 while end_line < len(lines) and cmd_parts[-1].endswith('\\'):
179 end_line += 1
180 cmd_parts[-1] = cmd_parts[-1][:-1] # Trim backslash
181 cmd_parts.append(lines[end_line][:-1]) # Trim newline
182 cmd = ' '.join(cmd_parts)
183
186
184 lines_before = lines[:start_line]
187 lines_before = lines[:start_line]
185 call = "get_ipython().getoutput({!r})".format(cmd)
188 call = "get_ipython().getoutput({!r})".format(cmd)
186 new_line = lhs + call + '\n'
189 new_line = lhs + call + '\n'
187 lines_after = lines[end_line + 1:]
190 lines_after = lines[end_line + 1:]
188
191
189 return lines_before + [new_line] + lines_after
192 return lines_before + [new_line] + lines_after
190
193
191 def make_tokens_by_line(lines):
194 def make_tokens_by_line(lines):
192 tokens_by_line = [[]]
195 tokens_by_line = [[]]
193 for token in generate_tokens(iter(lines).__next__):
196 for token in generate_tokens(iter(lines).__next__):
194 tokens_by_line[-1].append(token)
197 tokens_by_line[-1].append(token)
195 if token.type == tokenize2.NEWLINE:
198 if token.type == tokenize2.NEWLINE:
196 tokens_by_line.append([])
199 tokens_by_line.append([])
197
200
198 return tokens_by_line
201 return tokens_by_line
199
202
200 class TokenTransformers:
203 class TokenTransformers:
201 def __init__(self):
204 def __init__(self):
202 self.transformers = [
205 self.transformers = [
203 MagicAssign,
206 MagicAssign,
204 SystemAssign,
207 SystemAssign,
205 ]
208 ]
206
209
207 def do_one_transform(self, lines):
210 def do_one_transform(self, lines):
208 """Find and run the transform earliest in the code.
211 """Find and run the transform earliest in the code.
209
212
210 Returns (changed, lines).
213 Returns (changed, lines).
211
214
212 This method is called repeatedly until changed is False, indicating
215 This method is called repeatedly until changed is False, indicating
213 that all available transformations are complete.
216 that all available transformations are complete.
214
217
215 The tokens following IPython special syntax might not be valid, so
218 The tokens following IPython special syntax might not be valid, so
216 the transformed code is retokenised every time to identify the next
219 the transformed code is retokenised every time to identify the next
217 piece of special syntax. Hopefully long code cells are mostly valid
220 piece of special syntax. Hopefully long code cells are mostly valid
218 Python, not using lots of IPython special syntax, so this shouldn't be
221 Python, not using lots of IPython special syntax, so this shouldn't be
219 a performance issue.
222 a performance issue.
220 """
223 """
221 tokens_by_line = make_tokens_by_line(lines)
224 tokens_by_line = make_tokens_by_line(lines)
222 candidates = []
225 candidates = []
223 for transformer in self.transformers:
226 for transformer in self.transformers:
224 locn = transformer.find(tokens_by_line)
227 locn = transformer.find(tokens_by_line)
225 if locn:
228 if locn:
226 candidates.append((locn, transformer))
229 candidates.append((locn, transformer))
227
230
228 if not candidates:
231 if not candidates:
229 # Nothing to transform
232 # Nothing to transform
230 return False, lines
233 return False, lines
231
234
232 first_locn, transformer = min(candidates)
235 first_locn, transformer = min(candidates)
233 return True, transformer.transform(lines, first_locn)
236 return True, transformer.transform(lines, first_locn)
234
237
235 def __call__(self, lines):
238 def __call__(self, lines):
236 while True:
239 while True:
237 changed, lines = self.do_one_transform(lines)
240 changed, lines = self.do_one_transform(lines)
238 if not changed:
241 if not changed:
239 return lines
242 return lines
240
243
241 def assign_from_system(tokens_by_line, lines):
244 def assign_from_system(tokens_by_line, lines):
242 pass
245 pass
243
246
244
247
245 def transform_cell(cell):
248 def transform_cell(cell):
246 if not cell.endswith('\n'):
249 if not cell.endswith('\n'):
247 cell += '\n' # Ensure every line has a newline
250 cell += '\n' # Ensure every line has a newline
248 lines = cell.splitlines(keepends=True)
251 lines = cell.splitlines(keepends=True)
249 for transform in line_transforms:
252 for transform in line_transforms:
250 #print(transform, lines)
253 #print(transform, lines)
251 lines = transform(lines)
254 lines = transform(lines)
252
255
253 lines = TokenTransformers()(lines)
256 lines = TokenTransformers()(lines)
254 for line in lines:
257 for line in lines:
255 print('~~', line)
258 print('~~', line)
@@ -1,54 +1,60 b''
1 import nose.tools as nt
1 import nose.tools as nt
2
2
3 from IPython.core import inputtransformer2 as ipt2
3 from IPython.core import inputtransformer2 as ipt2
4 from IPython.core.inputtransformer2 import make_tokens_by_line
4 from IPython.core.inputtransformer2 import make_tokens_by_line
5
5
6 MULTILINE_MAGIC_ASSIGN = ("""\
6 MULTILINE_MAGIC_ASSIGN = ("""\
7 a = f()
7 a = f()
8 b = %foo \\
8 b = %foo \\
9 bar
9 bar
10 g()
10 g()
11 """.splitlines(keepends=True), """\
11 """.splitlines(keepends=True), """\
12 a = f()
12 a = f()
13 b = get_ipython().run_line_magic('foo', ' bar')
13 b = get_ipython().run_line_magic('foo', ' bar')
14 g()
14 g()
15 """.splitlines(keepends=True))
15 """.splitlines(keepends=True))
16
16
17 MULTILINE_SYSTEM_ASSIGN = ("""\
17 MULTILINE_SYSTEM_ASSIGN = ("""\
18 a = f()
18 a = f()
19 b = !foo \\
19 b = !foo \\
20 bar
20 bar
21 g()
21 g()
22 """.splitlines(keepends=True), """\
22 """.splitlines(keepends=True), """\
23 a = f()
23 a = f()
24 b = get_ipython().getoutput('foo bar')
24 b = get_ipython().getoutput('foo bar')
25 g()
25 g()
26 """.splitlines(keepends=True))
26 """.splitlines(keepends=True))
27
27
28 def test_continued_line():
29 lines = MULTILINE_MAGIC_ASSIGN[0]
30 nt.assert_equal(ipt2.find_end_of_continued_line(lines, 1), 2)
31
32 nt.assert_equal(ipt2.assemble_continued_line(lines, (1, 5), 2), "foo bar")
33
28 def test_find_assign_magic():
34 def test_find_assign_magic():
29 tbl = make_tokens_by_line(MULTILINE_MAGIC_ASSIGN[0])
35 tbl = make_tokens_by_line(MULTILINE_MAGIC_ASSIGN[0])
30 nt.assert_equal(ipt2.MagicAssign.find(tbl), (2, 4))
36 nt.assert_equal(ipt2.MagicAssign.find(tbl), (2, 4))
31
37
32 tbl = make_tokens_by_line(MULTILINE_SYSTEM_ASSIGN[0]) # Nothing to find
38 tbl = make_tokens_by_line(MULTILINE_SYSTEM_ASSIGN[0]) # Nothing to find
33 nt.assert_equal(ipt2.MagicAssign.find(tbl), None)
39 nt.assert_equal(ipt2.MagicAssign.find(tbl), None)
34
40
35 def test_transform_assign_magic():
41 def test_transform_assign_magic():
36 res = ipt2.MagicAssign.transform(MULTILINE_MAGIC_ASSIGN[0], (2, 4))
42 res = ipt2.MagicAssign.transform(MULTILINE_MAGIC_ASSIGN[0], (2, 4))
37 nt.assert_equal(res, MULTILINE_MAGIC_ASSIGN[1])
43 nt.assert_equal(res, MULTILINE_MAGIC_ASSIGN[1])
38
44
39 def test_find_assign_system():
45 def test_find_assign_system():
40 tbl = make_tokens_by_line(MULTILINE_SYSTEM_ASSIGN[0])
46 tbl = make_tokens_by_line(MULTILINE_SYSTEM_ASSIGN[0])
41 nt.assert_equal(ipt2.SystemAssign.find(tbl), (2, 4))
47 nt.assert_equal(ipt2.SystemAssign.find(tbl), (2, 4))
42
48
43 tbl = make_tokens_by_line(["a = !ls\n"])
49 tbl = make_tokens_by_line(["a = !ls\n"])
44 nt.assert_equal(ipt2.SystemAssign.find(tbl), (1, 5))
50 nt.assert_equal(ipt2.SystemAssign.find(tbl), (1, 5))
45
51
46 tbl = make_tokens_by_line(["a=!ls\n"])
52 tbl = make_tokens_by_line(["a=!ls\n"])
47 nt.assert_equal(ipt2.SystemAssign.find(tbl), (1, 2))
53 nt.assert_equal(ipt2.SystemAssign.find(tbl), (1, 2))
48
54
49 tbl = make_tokens_by_line(MULTILINE_MAGIC_ASSIGN[0]) # Nothing to find
55 tbl = make_tokens_by_line(MULTILINE_MAGIC_ASSIGN[0]) # Nothing to find
50 nt.assert_equal(ipt2.SystemAssign.find(tbl), None)
56 nt.assert_equal(ipt2.SystemAssign.find(tbl), None)
51
57
52 def test_transform_assign_system():
58 def test_transform_assign_system():
53 res = ipt2.SystemAssign.transform(MULTILINE_SYSTEM_ASSIGN[0], (2, 4))
59 res = ipt2.SystemAssign.transform(MULTILINE_SYSTEM_ASSIGN[0], (2, 4))
54 nt.assert_equal(res, MULTILINE_SYSTEM_ASSIGN[1])
60 nt.assert_equal(res, MULTILINE_SYSTEM_ASSIGN[1])
General Comments 0
You need to be logged in to leave comments. Login now