##// END OF EJS Templates
Working on new input transformation machinery
Thomas Kluyver -
Show More
@@ -0,0 +1,203 b''
1 import re
2 from typing import List, Tuple
3 from IPython.utils import tokenize2
4 from IPython.utils.tokenutil import generate_tokens
5
6 def leading_indent(lines):
7 """Remove leading indentation.
8
9 If the first line starts with a spaces or tabs, the same whitespace will be
10 removed from each following line.
11 """
12 m = re.match(r'^[ \t]+', lines[0])
13 if not m:
14 return lines
15 space = m.group(0)
16 n = len(space)
17 return [l[n:] if l.startswith(space) else l
18 for l in lines]
19
20 class PromptStripper:
21 """Remove matching input prompts from a block of input.
22
23 Parameters
24 ----------
25 prompt_re : regular expression
26 A regular expression matching any input prompt (including continuation)
27 initial_re : regular expression, optional
28 A regular expression matching only the initial prompt, but not continuation.
29 If no initial expression is given, prompt_re will be used everywhere.
30 Used mainly for plain Python prompts, where the continuation prompt
31 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
32
33 If initial_re and prompt_re differ,
34 only initial_re will be tested against the first line.
35 If any prompt is found on the first two lines,
36 prompts will be stripped from the rest of the block.
37 """
38 def __init__(self, prompt_re, initial_re=None):
39 self.prompt_re = prompt_re
40 self.initial_re = initial_re or prompt_re
41
42 def _strip(self, lines):
43 return [self.prompt_re.sub('', l, count=1) for l in lines]
44
45 def __call__(self, lines):
46 if self.initial_re.match(lines[0]) or \
47 (len(lines) > 1 and self.prompt_re.match(lines[1])):
48 return self._strip(lines)
49 return lines
50
51 classic_prompt = PromptStripper(
52 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
53 initial_re=re.compile(r'^>>>( |$)')
54 )
55
56 ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)'))
57
58 def cell_magic(lines):
59 if not lines[0].startswith('%%'):
60 return lines
61 if re.match('%%\w+\?', lines[0]):
62 # This case will be handled by help_end
63 return lines
64 magic_name, first_line = lines[0][2:].partition(' ')
65 body = '\n'.join(lines[1:])
66 return ['get_ipython().run_cell_magic(%r, %r, %r)' % (magic_name, first_line, body)]
67
68 line_transforms = [
69 leading_indent,
70 classic_prompt,
71 ipython_prompt,
72 cell_magic,
73 ]
74
75 # -----
76
77 def help_end(tokens_by_line):
78 pass
79
80 def escaped_command(tokens_by_line):
81 pass
82
83 def _find_assign_op(token_line):
84 # Find the first assignment in the line ('=' not inside brackets)
85 # We don't try to support multiple special assignment (a = b = %foo)
86 paren_level = 0
87 for i, ti in enumerate(token_line):
88 s = ti.string
89 if s == '=' and paren_level == 0:
90 return i
91 if s in '([{':
92 paren_level += 1
93 elif s in ')]}':
94 paren_level -= 1
95
96 class MagicAssign:
97 @staticmethod
98 def find(tokens_by_line):
99 """Find the first magic assignment (a = %foo) in the cell.
100
101 Returns (line, column) of the % if found, or None.
102 """
103 for line in tokens_by_line:
104 assign_ix = _find_assign_op(line)
105 if (assign_ix is not None) \
106 and (len(line) >= assign_ix + 2) \
107 and (line[assign_ix+1].string == '%') \
108 and (line[assign_ix+2].type == tokenize2.NAME):
109 return line[assign_ix+1].start
110
111 @staticmethod
112 def transform(lines: List[str], start: Tuple[int, int]):
113 """Transform a magic assignment found by find
114 """
115 start_line = start[0] - 1 # Shift from 1-index to 0-index
116 start_col = start[1]
117
118 print("Start at", start_line, start_col)
119 print("Line", lines[start_line])
120
121 lhs, rhs = lines[start_line][:start_col], lines[start_line][start_col:-1]
122 assert rhs.startswith('%'), rhs
123 magic_name, _, args = rhs[1:].partition(' ')
124 args_parts = [args]
125 end_line = start_line
126 # Follow explicit (backslash) line continuations
127 while end_line < len(lines) and args_parts[-1].endswith('\\'):
128 end_line += 1
129 args_parts[-1] = args_parts[-1][:-1] # Trim backslash
130 args_parts.append(lines[end_line][:-1]) # Trim newline
131 args = ' '.join(args_parts)
132
133 lines_before = lines[:start_line]
134 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
135 new_line = lhs + call + '\n'
136 lines_after = lines[end_line+1:]
137
138 return lines_before + [new_line] + lines_after
139
140 def make_tokens_by_line(lines):
141 tokens_by_line = [[]]
142 for token in generate_tokens(iter(lines).__next__):
143 tokens_by_line[-1].append(token)
144 if token.type == tokenize2.NEWLINE:
145 tokens_by_line.append([])
146
147 return tokens_by_line
148
149 class TokenTransformers:
150 def __init__(self):
151 self.transformers = [
152 MagicAssign
153 ]
154
155 def do_one_transform(self, lines):
156 """Find and run the transform earliest in the code.
157
158 Returns (changed, lines).
159
160 This method is called repeatedly until changed is False, indicating
161 that all available transformations are complete.
162
163 The tokens following IPython special syntax might not be valid, so
164 the transformed code is retokenised every time to identify the next
165 piece of special syntax. Hopefully long code cells are mostly valid
166 Python, not using lots of IPython special syntax, so this shouldn't be
167 a performance issue.
168 """
169 tokens_by_line = make_tokens_by_line(lines)
170 candidates = []
171 for transformer in self.transformers:
172 locn = transformer.find(tokens_by_line)
173 if locn:
174 candidates.append((locn, transformer))
175
176 if not candidates:
177 # Nothing to transform
178 return False, lines
179
180 first_locn, transformer = min(candidates)
181 return True, transformer.transform(lines, first_locn)
182
183 def __call__(self, lines):
184 while True:
185 changed, lines = self.do_one_transform(lines)
186 if not changed:
187 return lines
188
189 def assign_from_system(tokens_by_line, lines):
190 pass
191
192
193 def transform_cell(cell):
194 if not cell.endswith('\n'):
195 cell += '\n' # Ensure every line has a newline
196 lines = cell.splitlines(keepends=True)
197 for transform in line_transforms:
198 #print(transform, lines)
199 lines = transform(lines)
200
201 lines = TokenTransformers()(lines)
202 for line in lines:
203 print('~~', line)
General Comments 0
You need to be logged in to leave comments. Login now