##// END OF EJS Templates
Debugging function to see tokens
Thomas Kluyver -
Show More
@@ -1,258 +1,268 b''
1 import re
1 import re
2 from typing import List, Tuple
2 from typing import List, Tuple
3 from IPython.utils import tokenize2
3 from IPython.utils import tokenize2
4 from IPython.utils.tokenutil import generate_tokens
4 from IPython.utils.tokenutil import generate_tokens
5
5
6 def leading_indent(lines):
6 def leading_indent(lines):
7 """Remove leading indentation.
7 """Remove leading indentation.
8
8
9 If the first line starts with a spaces or tabs, the same whitespace will be
9 If the first line starts with a spaces or tabs, the same whitespace will be
10 removed from each following line.
10 removed from each following line.
11 """
11 """
12 m = re.match(r'^[ \t]+', lines[0])
12 m = re.match(r'^[ \t]+', lines[0])
13 if not m:
13 if not m:
14 return lines
14 return lines
15 space = m.group(0)
15 space = m.group(0)
16 n = len(space)
16 n = len(space)
17 return [l[n:] if l.startswith(space) else l
17 return [l[n:] if l.startswith(space) else l
18 for l in lines]
18 for l in lines]
19
19
20 class PromptStripper:
20 class PromptStripper:
21 """Remove matching input prompts from a block of input.
21 """Remove matching input prompts from a block of input.
22
22
23 Parameters
23 Parameters
24 ----------
24 ----------
25 prompt_re : regular expression
25 prompt_re : regular expression
26 A regular expression matching any input prompt (including continuation)
26 A regular expression matching any input prompt (including continuation)
27 initial_re : regular expression, optional
27 initial_re : regular expression, optional
28 A regular expression matching only the initial prompt, but not continuation.
28 A regular expression matching only the initial prompt, but not continuation.
29 If no initial expression is given, prompt_re will be used everywhere.
29 If no initial expression is given, prompt_re will be used everywhere.
30 Used mainly for plain Python prompts, where the continuation prompt
30 Used mainly for plain Python prompts, where the continuation prompt
31 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
31 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
32
32
33 If initial_re and prompt_re differ,
33 If initial_re and prompt_re differ,
34 only initial_re will be tested against the first line.
34 only initial_re will be tested against the first line.
35 If any prompt is found on the first two lines,
35 If any prompt is found on the first two lines,
36 prompts will be stripped from the rest of the block.
36 prompts will be stripped from the rest of the block.
37 """
37 """
38 def __init__(self, prompt_re, initial_re=None):
38 def __init__(self, prompt_re, initial_re=None):
39 self.prompt_re = prompt_re
39 self.prompt_re = prompt_re
40 self.initial_re = initial_re or prompt_re
40 self.initial_re = initial_re or prompt_re
41
41
42 def _strip(self, lines):
42 def _strip(self, lines):
43 return [self.prompt_re.sub('', l, count=1) for l in lines]
43 return [self.prompt_re.sub('', l, count=1) for l in lines]
44
44
45 def __call__(self, lines):
45 def __call__(self, lines):
46 if self.initial_re.match(lines[0]) or \
46 if self.initial_re.match(lines[0]) or \
47 (len(lines) > 1 and self.prompt_re.match(lines[1])):
47 (len(lines) > 1 and self.prompt_re.match(lines[1])):
48 return self._strip(lines)
48 return self._strip(lines)
49 return lines
49 return lines
50
50
51 classic_prompt = PromptStripper(
51 classic_prompt = PromptStripper(
52 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
52 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
53 initial_re=re.compile(r'^>>>( |$)')
53 initial_re=re.compile(r'^>>>( |$)')
54 )
54 )
55
55
56 ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)'))
56 ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)'))
57
57
58 def cell_magic(lines):
58 def cell_magic(lines):
59 if not lines[0].startswith('%%'):
59 if not lines[0].startswith('%%'):
60 return lines
60 return lines
61 if re.match('%%\w+\?', lines[0]):
61 if re.match('%%\w+\?', lines[0]):
62 # This case will be handled by help_end
62 # This case will be handled by help_end
63 return lines
63 return lines
64 magic_name, first_line = lines[0][2:].partition(' ')
64 magic_name, first_line = lines[0][2:].partition(' ')
65 body = '\n'.join(lines[1:])
65 body = '\n'.join(lines[1:])
66 return ['get_ipython().run_cell_magic(%r, %r, %r)' % (magic_name, first_line, body)]
66 return ['get_ipython().run_cell_magic(%r, %r, %r)' % (magic_name, first_line, body)]
67
67
68 line_transforms = [
68 line_transforms = [
69 leading_indent,
69 leading_indent,
70 classic_prompt,
70 classic_prompt,
71 ipython_prompt,
71 ipython_prompt,
72 cell_magic,
72 cell_magic,
73 ]
73 ]
74
74
75 # -----
75 # -----
76
76
77 def help_end(tokens_by_line):
77 def help_end(tokens_by_line):
78 pass
78 pass
79
79
80 def escaped_command(tokens_by_line):
80 def escaped_command(tokens_by_line):
81 pass
81 pass
82
82
83 def _find_assign_op(token_line):
83 def _find_assign_op(token_line):
84 # Find the first assignment in the line ('=' not inside brackets)
84 # Find the first assignment in the line ('=' not inside brackets)
85 # We don't try to support multiple special assignment (a = b = %foo)
85 # We don't try to support multiple special assignment (a = b = %foo)
86 paren_level = 0
86 paren_level = 0
87 for i, ti in enumerate(token_line):
87 for i, ti in enumerate(token_line):
88 s = ti.string
88 s = ti.string
89 if s == '=' and paren_level == 0:
89 if s == '=' and paren_level == 0:
90 return i
90 return i
91 if s in '([{':
91 if s in '([{':
92 paren_level += 1
92 paren_level += 1
93 elif s in ')]}':
93 elif s in ')]}':
94 paren_level -= 1
94 paren_level -= 1
95
95
96 def find_end_of_continued_line(lines, start_line: int):
96 def find_end_of_continued_line(lines, start_line: int):
97 """Find the last line of a line explicitly extended using backslashes.
97 """Find the last line of a line explicitly extended using backslashes.
98
98
99 Uses 0-indexed line numbers.
99 Uses 0-indexed line numbers.
100 """
100 """
101 end_line = start_line
101 end_line = start_line
102 while lines[end_line].endswith('\\\n'):
102 while lines[end_line].endswith('\\\n'):
103 end_line += 1
103 end_line += 1
104 if end_line >= len(lines):
104 if end_line >= len(lines):
105 break
105 break
106 return end_line
106 return end_line
107
107
108 def assemble_continued_line(lines, start: Tuple[int, int], end_line: int):
108 def assemble_continued_line(lines, start: Tuple[int, int], end_line: int):
109 """Assemble pieces of a continued line into a single line.
109 """Assemble pieces of a continued line into a single line.
110
110
111 Uses 0-indexed line numbers. *start* is (lineno, colno).
111 Uses 0-indexed line numbers. *start* is (lineno, colno).
112 """
112 """
113 parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1]
113 parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1]
114 return ' '.join([p[:-2] for p in parts[:-1]] # Strip backslash+newline
114 return ' '.join([p[:-2] for p in parts[:-1]] # Strip backslash+newline
115 + [parts[-1][:-1]]) # Strip newline from last line
115 + [parts[-1][:-1]]) # Strip newline from last line
116
116
117 class MagicAssign:
117 class MagicAssign:
118 @staticmethod
118 @staticmethod
119 def find(tokens_by_line):
119 def find(tokens_by_line):
120 """Find the first magic assignment (a = %foo) in the cell.
120 """Find the first magic assignment (a = %foo) in the cell.
121
121
122 Returns (line, column) of the % if found, or None. *line* is 1-indexed.
122 Returns (line, column) of the % if found, or None. *line* is 1-indexed.
123 """
123 """
124 for line in tokens_by_line:
124 for line in tokens_by_line:
125 assign_ix = _find_assign_op(line)
125 assign_ix = _find_assign_op(line)
126 if (assign_ix is not None) \
126 if (assign_ix is not None) \
127 and (len(line) >= assign_ix + 2) \
127 and (len(line) >= assign_ix + 2) \
128 and (line[assign_ix+1].string == '%') \
128 and (line[assign_ix+1].string == '%') \
129 and (line[assign_ix+2].type == tokenize2.NAME):
129 and (line[assign_ix+2].type == tokenize2.NAME):
130 return line[assign_ix+1].start
130 return line[assign_ix+1].start
131
131
132 @staticmethod
132 @staticmethod
133 def transform(lines: List[str], start: Tuple[int, int]):
133 def transform(lines: List[str], start: Tuple[int, int]):
134 """Transform a magic assignment found by find
134 """Transform a magic assignment found by find
135 """
135 """
136 start_line = start[0] - 1 # Shift from 1-index to 0-index
136 start_line = start[0] - 1 # Shift from 1-index to 0-index
137 start_col = start[1]
137 start_col = start[1]
138
138
139 lhs = lines[start_line][:start_col]
139 lhs = lines[start_line][:start_col]
140 end_line = find_end_of_continued_line(lines, start_line)
140 end_line = find_end_of_continued_line(lines, start_line)
141 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
141 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
142 assert rhs.startswith('%'), rhs
142 assert rhs.startswith('%'), rhs
143 magic_name, _, args = rhs[1:].partition(' ')
143 magic_name, _, args = rhs[1:].partition(' ')
144
144
145 lines_before = lines[:start_line]
145 lines_before = lines[:start_line]
146 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
146 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
147 new_line = lhs + call + '\n'
147 new_line = lhs + call + '\n'
148 lines_after = lines[end_line+1:]
148 lines_after = lines[end_line+1:]
149
149
150 return lines_before + [new_line] + lines_after
150 return lines_before + [new_line] + lines_after
151
151
152
152
153 class SystemAssign:
153 class SystemAssign:
154 @staticmethod
154 @staticmethod
155 def find(tokens_by_line):
155 def find(tokens_by_line):
156 """Find the first system assignment (a = !foo) in the cell.
156 """Find the first system assignment (a = !foo) in the cell.
157
157
158 Returns (line, column) of the ! if found, or None. *line* is 1-indexed.
158 Returns (line, column) of the ! if found, or None. *line* is 1-indexed.
159 """
159 """
160 for line in tokens_by_line:
160 for line in tokens_by_line:
161 assign_ix = _find_assign_op(line)
161 assign_ix = _find_assign_op(line)
162 if (assign_ix is not None) \
162 if (assign_ix is not None) \
163 and (len(line) >= assign_ix + 2) \
163 and (len(line) >= assign_ix + 2) \
164 and (line[assign_ix + 1].type == tokenize2.ERRORTOKEN):
164 and (line[assign_ix + 1].type == tokenize2.ERRORTOKEN):
165 ix = assign_ix + 1
165 ix = assign_ix + 1
166
166
167 while ix < len(line) and line[ix].type == tokenize2.ERRORTOKEN:
167 while ix < len(line) and line[ix].type == tokenize2.ERRORTOKEN:
168 if line[ix].string == '!':
168 if line[ix].string == '!':
169 return line[ix].start
169 return line[ix].start
170 elif not line[ix].string.isspace():
170 elif not line[ix].string.isspace():
171 break
171 break
172 ix += 1
172 ix += 1
173
173
174 @staticmethod
174 @staticmethod
175 def transform(lines: List[str], start: Tuple[int, int]):
175 def transform(lines: List[str], start: Tuple[int, int]):
176 """Transform a system assignment found by find
176 """Transform a system assignment found by find
177 """
177 """
178 start_line = start[0] - 1 # Shift from 1-index to 0-index
178 start_line = start[0] - 1 # Shift from 1-index to 0-index
179 start_col = start[1]
179 start_col = start[1]
180
180
181 lhs = lines[start_line][:start_col]
181 lhs = lines[start_line][:start_col]
182 end_line = find_end_of_continued_line(lines, start_line)
182 end_line = find_end_of_continued_line(lines, start_line)
183 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
183 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
184 assert rhs.startswith('!'), rhs
184 assert rhs.startswith('!'), rhs
185 cmd = rhs[1:]
185 cmd = rhs[1:]
186
186
187 lines_before = lines[:start_line]
187 lines_before = lines[:start_line]
188 call = "get_ipython().getoutput({!r})".format(cmd)
188 call = "get_ipython().getoutput({!r})".format(cmd)
189 new_line = lhs + call + '\n'
189 new_line = lhs + call + '\n'
190 lines_after = lines[end_line + 1:]
190 lines_after = lines[end_line + 1:]
191
191
192 return lines_before + [new_line] + lines_after
192 return lines_before + [new_line] + lines_after
193
193
194 def make_tokens_by_line(lines):
194 def make_tokens_by_line(lines):
195 tokens_by_line = [[]]
195 tokens_by_line = [[]]
196 for token in generate_tokens(iter(lines).__next__):
196 for token in generate_tokens(iter(lines).__next__):
197 tokens_by_line[-1].append(token)
197 tokens_by_line[-1].append(token)
198 if token.type == tokenize2.NEWLINE:
198 if token.type == tokenize2.NEWLINE:
199 tokens_by_line.append([])
199 tokens_by_line.append([])
200
200
201 return tokens_by_line
201 return tokens_by_line
202
202
203 def show_linewise_tokens(s: str):
204 """For investigation"""
205 if not s.endswith('\n'):
206 s += '\n'
207 lines = s.splitlines(keepends=True)
208 for line in make_tokens_by_line(lines):
209 print("Line -------")
210 for tokinfo in line:
211 print(" ", tokinfo)
212
203 class TokenTransformers:
213 class TokenTransformers:
204 def __init__(self):
214 def __init__(self):
205 self.transformers = [
215 self.transformers = [
206 MagicAssign,
216 MagicAssign,
207 SystemAssign,
217 SystemAssign,
208 ]
218 ]
209
219
210 def do_one_transform(self, lines):
220 def do_one_transform(self, lines):
211 """Find and run the transform earliest in the code.
221 """Find and run the transform earliest in the code.
212
222
213 Returns (changed, lines).
223 Returns (changed, lines).
214
224
215 This method is called repeatedly until changed is False, indicating
225 This method is called repeatedly until changed is False, indicating
216 that all available transformations are complete.
226 that all available transformations are complete.
217
227
218 The tokens following IPython special syntax might not be valid, so
228 The tokens following IPython special syntax might not be valid, so
219 the transformed code is retokenised every time to identify the next
229 the transformed code is retokenised every time to identify the next
220 piece of special syntax. Hopefully long code cells are mostly valid
230 piece of special syntax. Hopefully long code cells are mostly valid
221 Python, not using lots of IPython special syntax, so this shouldn't be
231 Python, not using lots of IPython special syntax, so this shouldn't be
222 a performance issue.
232 a performance issue.
223 """
233 """
224 tokens_by_line = make_tokens_by_line(lines)
234 tokens_by_line = make_tokens_by_line(lines)
225 candidates = []
235 candidates = []
226 for transformer in self.transformers:
236 for transformer in self.transformers:
227 locn = transformer.find(tokens_by_line)
237 locn = transformer.find(tokens_by_line)
228 if locn:
238 if locn:
229 candidates.append((locn, transformer))
239 candidates.append((locn, transformer))
230
240
231 if not candidates:
241 if not candidates:
232 # Nothing to transform
242 # Nothing to transform
233 return False, lines
243 return False, lines
234
244
235 first_locn, transformer = min(candidates)
245 first_locn, transformer = min(candidates)
236 return True, transformer.transform(lines, first_locn)
246 return True, transformer.transform(lines, first_locn)
237
247
238 def __call__(self, lines):
248 def __call__(self, lines):
239 while True:
249 while True:
240 changed, lines = self.do_one_transform(lines)
250 changed, lines = self.do_one_transform(lines)
241 if not changed:
251 if not changed:
242 return lines
252 return lines
243
253
244 def assign_from_system(tokens_by_line, lines):
254 def assign_from_system(tokens_by_line, lines):
245 pass
255 pass
246
256
247
257
248 def transform_cell(cell):
258 def transform_cell(cell):
249 if not cell.endswith('\n'):
259 if not cell.endswith('\n'):
250 cell += '\n' # Ensure every line has a newline
260 cell += '\n' # Ensure every line has a newline
251 lines = cell.splitlines(keepends=True)
261 lines = cell.splitlines(keepends=True)
252 for transform in line_transforms:
262 for transform in line_transforms:
253 #print(transform, lines)
263 #print(transform, lines)
254 lines = transform(lines)
264 lines = transform(lines)
255
265
256 lines = TokenTransformers()(lines)
266 lines = TokenTransformers()(lines)
257 for line in lines:
267 for line in lines:
258 print('~~', line)
268 print('~~', line)
General Comments 0
You need to be logged in to leave comments. Login now