##// END OF EJS Templates
Debugging function to see tokens
Thomas Kluyver -
Show More
@@ -1,258 +1,268 b''
1 1 import re
2 2 from typing import List, Tuple
3 3 from IPython.utils import tokenize2
4 4 from IPython.utils.tokenutil import generate_tokens
5 5
6 6 def leading_indent(lines):
7 7 """Remove leading indentation.
8 8
9 9 If the first line starts with a spaces or tabs, the same whitespace will be
10 10 removed from each following line.
11 11 """
12 12 m = re.match(r'^[ \t]+', lines[0])
13 13 if not m:
14 14 return lines
15 15 space = m.group(0)
16 16 n = len(space)
17 17 return [l[n:] if l.startswith(space) else l
18 18 for l in lines]
19 19
20 20 class PromptStripper:
21 21 """Remove matching input prompts from a block of input.
22 22
23 23 Parameters
24 24 ----------
25 25 prompt_re : regular expression
26 26 A regular expression matching any input prompt (including continuation)
27 27 initial_re : regular expression, optional
28 28 A regular expression matching only the initial prompt, but not continuation.
29 29 If no initial expression is given, prompt_re will be used everywhere.
30 30 Used mainly for plain Python prompts, where the continuation prompt
31 31 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
32 32
33 33 If initial_re and prompt_re differ,
34 34 only initial_re will be tested against the first line.
35 35 If any prompt is found on the first two lines,
36 36 prompts will be stripped from the rest of the block.
37 37 """
38 38 def __init__(self, prompt_re, initial_re=None):
39 39 self.prompt_re = prompt_re
40 40 self.initial_re = initial_re or prompt_re
41 41
42 42 def _strip(self, lines):
43 43 return [self.prompt_re.sub('', l, count=1) for l in lines]
44 44
45 45 def __call__(self, lines):
46 46 if self.initial_re.match(lines[0]) or \
47 47 (len(lines) > 1 and self.prompt_re.match(lines[1])):
48 48 return self._strip(lines)
49 49 return lines
50 50
51 51 classic_prompt = PromptStripper(
52 52 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
53 53 initial_re=re.compile(r'^>>>( |$)')
54 54 )
55 55
56 56 ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)'))
57 57
58 58 def cell_magic(lines):
59 59 if not lines[0].startswith('%%'):
60 60 return lines
61 61 if re.match('%%\w+\?', lines[0]):
62 62 # This case will be handled by help_end
63 63 return lines
64 64 magic_name, first_line = lines[0][2:].partition(' ')
65 65 body = '\n'.join(lines[1:])
66 66 return ['get_ipython().run_cell_magic(%r, %r, %r)' % (magic_name, first_line, body)]
67 67
68 68 line_transforms = [
69 69 leading_indent,
70 70 classic_prompt,
71 71 ipython_prompt,
72 72 cell_magic,
73 73 ]
74 74
75 75 # -----
76 76
77 77 def help_end(tokens_by_line):
78 78 pass
79 79
80 80 def escaped_command(tokens_by_line):
81 81 pass
82 82
83 83 def _find_assign_op(token_line):
84 84 # Find the first assignment in the line ('=' not inside brackets)
85 85 # We don't try to support multiple special assignment (a = b = %foo)
86 86 paren_level = 0
87 87 for i, ti in enumerate(token_line):
88 88 s = ti.string
89 89 if s == '=' and paren_level == 0:
90 90 return i
91 91 if s in '([{':
92 92 paren_level += 1
93 93 elif s in ')]}':
94 94 paren_level -= 1
95 95
96 96 def find_end_of_continued_line(lines, start_line: int):
97 97 """Find the last line of a line explicitly extended using backslashes.
98 98
99 99 Uses 0-indexed line numbers.
100 100 """
101 101 end_line = start_line
102 102 while lines[end_line].endswith('\\\n'):
103 103 end_line += 1
104 104 if end_line >= len(lines):
105 105 break
106 106 return end_line
107 107
108 108 def assemble_continued_line(lines, start: Tuple[int, int], end_line: int):
109 109 """Assemble pieces of a continued line into a single line.
110 110
111 111 Uses 0-indexed line numbers. *start* is (lineno, colno).
112 112 """
113 113 parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1]
114 114 return ' '.join([p[:-2] for p in parts[:-1]] # Strip backslash+newline
115 115 + [parts[-1][:-1]]) # Strip newline from last line
116 116
117 117 class MagicAssign:
118 118 @staticmethod
119 119 def find(tokens_by_line):
120 120 """Find the first magic assignment (a = %foo) in the cell.
121 121
122 122 Returns (line, column) of the % if found, or None. *line* is 1-indexed.
123 123 """
124 124 for line in tokens_by_line:
125 125 assign_ix = _find_assign_op(line)
126 126 if (assign_ix is not None) \
127 127 and (len(line) >= assign_ix + 2) \
128 128 and (line[assign_ix+1].string == '%') \
129 129 and (line[assign_ix+2].type == tokenize2.NAME):
130 130 return line[assign_ix+1].start
131 131
132 132 @staticmethod
133 133 def transform(lines: List[str], start: Tuple[int, int]):
134 134 """Transform a magic assignment found by find
135 135 """
136 136 start_line = start[0] - 1 # Shift from 1-index to 0-index
137 137 start_col = start[1]
138 138
139 139 lhs = lines[start_line][:start_col]
140 140 end_line = find_end_of_continued_line(lines, start_line)
141 141 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
142 142 assert rhs.startswith('%'), rhs
143 143 magic_name, _, args = rhs[1:].partition(' ')
144 144
145 145 lines_before = lines[:start_line]
146 146 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
147 147 new_line = lhs + call + '\n'
148 148 lines_after = lines[end_line+1:]
149 149
150 150 return lines_before + [new_line] + lines_after
151 151
152 152
153 153 class SystemAssign:
154 154 @staticmethod
155 155 def find(tokens_by_line):
156 156 """Find the first system assignment (a = !foo) in the cell.
157 157
158 158 Returns (line, column) of the ! if found, or None. *line* is 1-indexed.
159 159 """
160 160 for line in tokens_by_line:
161 161 assign_ix = _find_assign_op(line)
162 162 if (assign_ix is not None) \
163 163 and (len(line) >= assign_ix + 2) \
164 164 and (line[assign_ix + 1].type == tokenize2.ERRORTOKEN):
165 165 ix = assign_ix + 1
166 166
167 167 while ix < len(line) and line[ix].type == tokenize2.ERRORTOKEN:
168 168 if line[ix].string == '!':
169 169 return line[ix].start
170 170 elif not line[ix].string.isspace():
171 171 break
172 172 ix += 1
173 173
174 174 @staticmethod
175 175 def transform(lines: List[str], start: Tuple[int, int]):
176 176 """Transform a system assignment found by find
177 177 """
178 178 start_line = start[0] - 1 # Shift from 1-index to 0-index
179 179 start_col = start[1]
180 180
181 181 lhs = lines[start_line][:start_col]
182 182 end_line = find_end_of_continued_line(lines, start_line)
183 183 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
184 184 assert rhs.startswith('!'), rhs
185 185 cmd = rhs[1:]
186 186
187 187 lines_before = lines[:start_line]
188 188 call = "get_ipython().getoutput({!r})".format(cmd)
189 189 new_line = lhs + call + '\n'
190 190 lines_after = lines[end_line + 1:]
191 191
192 192 return lines_before + [new_line] + lines_after
193 193
194 194 def make_tokens_by_line(lines):
195 195 tokens_by_line = [[]]
196 196 for token in generate_tokens(iter(lines).__next__):
197 197 tokens_by_line[-1].append(token)
198 198 if token.type == tokenize2.NEWLINE:
199 199 tokens_by_line.append([])
200 200
201 201 return tokens_by_line
202 202
203 def show_linewise_tokens(s: str):
204 """For investigation"""
205 if not s.endswith('\n'):
206 s += '\n'
207 lines = s.splitlines(keepends=True)
208 for line in make_tokens_by_line(lines):
209 print("Line -------")
210 for tokinfo in line:
211 print(" ", tokinfo)
212
203 213 class TokenTransformers:
204 214 def __init__(self):
205 215 self.transformers = [
206 216 MagicAssign,
207 217 SystemAssign,
208 218 ]
209 219
210 220 def do_one_transform(self, lines):
211 221 """Find and run the transform earliest in the code.
212 222
213 223 Returns (changed, lines).
214 224
215 225 This method is called repeatedly until changed is False, indicating
216 226 that all available transformations are complete.
217 227
218 228 The tokens following IPython special syntax might not be valid, so
219 229 the transformed code is retokenised every time to identify the next
220 230 piece of special syntax. Hopefully long code cells are mostly valid
221 231 Python, not using lots of IPython special syntax, so this shouldn't be
222 232 a performance issue.
223 233 """
224 234 tokens_by_line = make_tokens_by_line(lines)
225 235 candidates = []
226 236 for transformer in self.transformers:
227 237 locn = transformer.find(tokens_by_line)
228 238 if locn:
229 239 candidates.append((locn, transformer))
230 240
231 241 if not candidates:
232 242 # Nothing to transform
233 243 return False, lines
234 244
235 245 first_locn, transformer = min(candidates)
236 246 return True, transformer.transform(lines, first_locn)
237 247
238 248 def __call__(self, lines):
239 249 while True:
240 250 changed, lines = self.do_one_transform(lines)
241 251 if not changed:
242 252 return lines
243 253
244 254 def assign_from_system(tokens_by_line, lines):
245 255 pass
246 256
247 257
248 258 def transform_cell(cell):
249 259 if not cell.endswith('\n'):
250 260 cell += '\n' # Ensure every line has a newline
251 261 lines = cell.splitlines(keepends=True)
252 262 for transform in line_transforms:
253 263 #print(transform, lines)
254 264 lines = transform(lines)
255 265
256 266 lines = TokenTransformers()(lines)
257 267 for line in lines:
258 268 print('~~', line)
General Comments 0
You need to be logged in to leave comments. Login now