upstream/ipython Commit - r24154:13bf7e13

1

import re

2

from typing import List, Tuple

3

from IPython.utils import tokenize2

4

from IPython.utils.tokenutil import generate_tokens

5

6

def leading_indent(lines):

7

"""Remove leading indentation.

8

9

If the first line starts with a spaces or tabs, the same whitespace will be

10

removed from each following line.

11

"""

12

m = re.match(r'^[ \t]+', lines[0])

13

if not m:

14

return lines

15

space = m.group(0)

16

n = len(space)

17

return [l[n:] if l.startswith(space) else l

18

for l in lines]

19

20

class PromptStripper:

21

"""Remove matching input prompts from a block of input.

22

23

Parameters

24

----------

25

prompt_re : regular expression

26

A regular expression matching any input prompt (including continuation)

27

initial_re : regular expression, optional

28

A regular expression matching only the initial prompt, but not continuation.

29

If no initial expression is given, prompt_re will be used everywhere.

30

Used mainly for plain Python prompts, where the continuation prompt

31

``...`` is a valid Python expression in Python 3, so shouldn't be stripped.

32

33

If initial_re and prompt_re differ,

34

only initial_re will be tested against the first line.

35

If any prompt is found on the first two lines,

36

prompts will be stripped from the rest of the block.

37

"""

38

def __init__(self, prompt_re, initial_re=None):

39

self.prompt_re = prompt_re

40

self.initial_re = initial_re or prompt_re

41

42

def _strip(self, lines):

43

return [self.prompt_re.sub('', l, count=1) for l in lines]

44

45

def __call__(self, lines):

46

if self.initial_re.match(lines[0]) or \

47

(len(lines) > 1 and self.prompt_re.match(lines[1])):

48

return self._strip(lines)

49

return lines

50

51

classic_prompt = PromptStripper(

52

prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),

53

initial_re=re.compile(r'^>>>( |$)')

54

)

55

56

ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)'))

57

58

def cell_magic(lines):

59

if not lines[0].startswith('%%'):

60

return lines

61

if re.match('%%\w+\?', lines[0]):

62

# This case will be handled by help_end

63

return lines

64

magic_name, first_line = lines[0][2:].partition(' ')

65

body = '\n'.join(lines[1:])

66

return ['get_ipython().run_cell_magic(%r, %r, %r)' % (magic_name, first_line, body)]

67

68

line_transforms = [

69

leading_indent,

70

classic_prompt,

71

ipython_prompt,

72

cell_magic,

73

]

74

75

# -----

76

77

def help_end(tokens_by_line):

78

pass

79

80

def escaped_command(tokens_by_line):

81

pass

82

83

def _find_assign_op(token_line):

84

# Find the first assignment in the line ('=' not inside brackets)

85

# We don't try to support multiple special assignment (a = b = %foo)

86

paren_level = 0

87

for i, ti in enumerate(token_line):

88

s = ti.string

89

if s == '=' and paren_level == 0:

90

return i

91

if s in '([{':

92

paren_level += 1

93

elif s in ')]}':

94

paren_level -= 1

95

96

class MagicAssign:

97

@staticmethod

98

def find(tokens_by_line):

99

"""Find the first magic assignment (a = %foo) in the cell.

100

101

Returns (line, column) of the % if found, or None.

102

"""

103

for line in tokens_by_line:

104

assign_ix = _find_assign_op(line)

105

if (assign_ix is not None) \

106

and (len(line) >= assign_ix + 2) \

107

and (line[assign_ix+1].string == '%') \

108

and (line[assign_ix+2].type == tokenize2.NAME):

109

return line[assign_ix+1].start

110

111

@staticmethod

112

def transform(lines: List[str], start: Tuple[int, int]):

113

"""Transform a magic assignment found by find

114

"""

115

start_line = start[0] - 1 # Shift from 1-index to 0-index

116

start_col = start[1]

117

118

print("Start at", start_line, start_col)

119

print("Line", lines[start_line])

120

121

lhs, rhs = lines[start_line][:start_col], lines[start_line][start_col:-1]

122

assert rhs.startswith('%'), rhs

123

magic_name, _, args = rhs[1:].partition(' ')

124

args_parts = [args]

125

end_line = start_line

126

# Follow explicit (backslash) line continuations

127

while end_line < len(lines) and args_parts[-1].endswith('\\'):

128

end_line += 1

129

args_parts[-1] = args_parts[-1][:-1] # Trim backslash

130

args_parts.append(lines[end_line][:-1]) # Trim newline

131

args = ' '.join(args_parts)

132

133

lines_before = lines[:start_line]

134

call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)

135

new_line = lhs + call + '\n'

136

lines_after = lines[end_line+1:]

137

138

return lines_before + [new_line] + lines_after

139

140

def make_tokens_by_line(lines):

141

tokens_by_line = [[]]

142

for token in generate_tokens(iter(lines).__next__):

143

tokens_by_line[-1].append(token)

144

if token.type == tokenize2.NEWLINE:

145

tokens_by_line.append([])

146

147

return tokens_by_line

148

149

class TokenTransformers:

150

def __init__(self):

151

self.transformers = [

152

MagicAssign

153

]

154

155

def do_one_transform(self, lines):

156

"""Find and run the transform earliest in the code.

157

158

Returns (changed, lines).

159

160

This method is called repeatedly until changed is False, indicating

161

that all available transformations are complete.

162

163

The tokens following IPython special syntax might not be valid, so

164

the transformed code is retokenised every time to identify the next

165

piece of special syntax. Hopefully long code cells are mostly valid

166

Python, not using lots of IPython special syntax, so this shouldn't be

167

a performance issue.

168

"""

169

tokens_by_line = make_tokens_by_line(lines)

170

candidates = []

171

for transformer in self.transformers:

172

locn = transformer.find(tokens_by_line)

173

if locn:

174

candidates.append((locn, transformer))

175

176

if not candidates:

177

# Nothing to transform

178

return False, lines

179

180

first_locn, transformer = min(candidates)

181

return True, transformer.transform(lines, first_locn)

182

183

def __call__(self, lines):

184

while True:

185

changed, lines = self.do_one_transform(lines)

186

if not changed:

187

return lines

188

189

def assign_from_system(tokens_by_line, lines):

190

pass

191

192

193

def transform_cell(cell):

194

if not cell.endswith('\n'):

195

cell += '\n' # Ensure every line has a newline

196

lines = cell.splitlines(keepends=True)

197

for transform in line_transforms:

198

#print(transform, lines)

199

lines = transform(lines)

200

201

lines = TokenTransformers()(lines)

202

for line in lines:

203

print('~~', line)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

@@ -0,0 +1,203 b''
	1	import re
	2	from typing import List, Tuple
	3	from IPython.utils import tokenize2
	4	from IPython.utils.tokenutil import generate_tokens
	5
	6	def leading_indent(lines):
	7	"""Remove leading indentation.
	8
	9	If the first line starts with a spaces or tabs, the same whitespace will be
	10	removed from each following line.
	11	"""
	12	m = re.match(r'^[ \t]+', lines[0])
	13	if not m:
	14	return lines
	15	space = m.group(0)
	16	n = len(space)
	17	return [l[n:] if l.startswith(space) else l
	18	for l in lines]
	19
	20	class PromptStripper:
	21	"""Remove matching input prompts from a block of input.
	22
	23	Parameters
	24	----------
	25	prompt_re : regular expression
	26	A regular expression matching any input prompt (including continuation)
	27	initial_re : regular expression, optional
	28	A regular expression matching only the initial prompt, but not continuation.
	29	If no initial expression is given, prompt_re will be used everywhere.
	30	Used mainly for plain Python prompts, where the continuation prompt
	31	``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
	32
	33	If initial_re and prompt_re differ,
	34	only initial_re will be tested against the first line.
	35	If any prompt is found on the first two lines,
	36	prompts will be stripped from the rest of the block.
	37	"""
	38	def __init__(self, prompt_re, initial_re=None):
	39	self.prompt_re = prompt_re
	40	self.initial_re = initial_re or prompt_re
	41
	42	def _strip(self, lines):
	43	return [self.prompt_re.sub('', l, count=1) for l in lines]
	44
	45	def __call__(self, lines):
	46	if self.initial_re.match(lines[0]) or \
	47	(len(lines) > 1 and self.prompt_re.match(lines[1])):
	48	return self._strip(lines)
	49	return lines
	50
	51	classic_prompt = PromptStripper(
	52	prompt_re=re.compile(r'^(>>>\|\.\.\.)( \|$)'),
	53	initial_re=re.compile(r'^>>>( \|$)')
	54	)
	55
	56	ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: \|\s*\.{3,}: ?)'))
	57
	58	def cell_magic(lines):
	59	if not lines[0].startswith('%%'):
	60	return lines
	61	if re.match('%%\w+\?', lines[0]):
	62	# This case will be handled by help_end
	63	return lines
	64	magic_name, first_line = lines[0][2:].partition(' ')
	65	body = '\n'.join(lines[1:])
	66	return ['get_ipython().run_cell_magic(%r, %r, %r)' % (magic_name, first_line, body)]
	67
	68	line_transforms = [
	69	leading_indent,
	70	classic_prompt,
	71	ipython_prompt,
	72	cell_magic,
	73	]
	74
	75	# -----
	76
	77	def help_end(tokens_by_line):
	78	pass
	79
	80	def escaped_command(tokens_by_line):
	81	pass
	82
	83	def _find_assign_op(token_line):
	84	# Find the first assignment in the line ('=' not inside brackets)
	85	# We don't try to support multiple special assignment (a = b = %foo)
	86	paren_level = 0
	87	for i, ti in enumerate(token_line):
	88	s = ti.string
	89	if s == '=' and paren_level == 0:
	90	return i
	91	if s in '([{':
	92	paren_level += 1
	93	elif s in ')]}':
	94	paren_level -= 1
	95
	96	class MagicAssign:
	97	@staticmethod
	98	def find(tokens_by_line):
	99	"""Find the first magic assignment (a = %foo) in the cell.
	100
	101	Returns (line, column) of the % if found, or None.
	102	"""
	103	for line in tokens_by_line:
	104	assign_ix = _find_assign_op(line)
	105	if (assign_ix is not None) \
	106	and (len(line) >= assign_ix + 2) \
	107	and (line[assign_ix+1].string == '%') \
	108	and (line[assign_ix+2].type == tokenize2.NAME):
	109	return line[assign_ix+1].start
	110
	111	@staticmethod
	112	def transform(lines: List[str], start: Tuple[int, int]):
	113	"""Transform a magic assignment found by find
	114	"""
	115	start_line = start[0] - 1 # Shift from 1-index to 0-index
	116	start_col = start[1]
	117
	118	print("Start at", start_line, start_col)
	119	print("Line", lines[start_line])
	120
	121	lhs, rhs = lines[start_line][:start_col], lines[start_line][start_col:-1]
	122	assert rhs.startswith('%'), rhs
	123	magic_name, _, args = rhs[1:].partition(' ')
	124	args_parts = [args]
	125	end_line = start_line
	126	# Follow explicit (backslash) line continuations
	127	while end_line < len(lines) and args_parts[-1].endswith('\\'):
	128	end_line += 1
	129	args_parts[-1] = args_parts[-1][:-1] # Trim backslash
	130	args_parts.append(lines[end_line][:-1]) # Trim newline
	131	args = ' '.join(args_parts)
	132
	133	lines_before = lines[:start_line]
	134	call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
	135	new_line = lhs + call + '\n'
	136	lines_after = lines[end_line+1:]
	137
	138	return lines_before + [new_line] + lines_after
	139
	140	def make_tokens_by_line(lines):
	141	tokens_by_line = [[]]
	142	for token in generate_tokens(iter(lines).__next__):
	143	tokens_by_line[-1].append(token)
	144	if token.type == tokenize2.NEWLINE:
	145	tokens_by_line.append([])
	146
	147	return tokens_by_line
	148
	149	class TokenTransformers:
	150	def __init__(self):
	151	self.transformers = [
	152	MagicAssign
	153	]
	154
	155	def do_one_transform(self, lines):
	156	"""Find and run the transform earliest in the code.
	157
	158	Returns (changed, lines).
	159
	160	This method is called repeatedly until changed is False, indicating
	161	that all available transformations are complete.
	162
	163	The tokens following IPython special syntax might not be valid, so
	164	the transformed code is retokenised every time to identify the next
	165	piece of special syntax. Hopefully long code cells are mostly valid
	166	Python, not using lots of IPython special syntax, so this shouldn't be
	167	a performance issue.
	168	"""
	169	tokens_by_line = make_tokens_by_line(lines)
	170	candidates = []
	171	for transformer in self.transformers:
	172	locn = transformer.find(tokens_by_line)
	173	if locn:
	174	candidates.append((locn, transformer))
	175
	176	if not candidates:
	177	# Nothing to transform
	178	return False, lines
	179
	180	first_locn, transformer = min(candidates)
	181	return True, transformer.transform(lines, first_locn)
	182
	183	def __call__(self, lines):
	184	while True:
	185	changed, lines = self.do_one_transform(lines)
	186	if not changed:
	187	return lines
	188
	189	def assign_from_system(tokens_by_line, lines):
	190	pass
	191
	192
	193	def transform_cell(cell):
	194	if not cell.endswith('\n'):
	195	cell += '\n' # Ensure every line has a newline
	196	lines = cell.splitlines(keepends=True)
	197	for transform in line_transforms:
	198	#print(transform, lines)
	199	lines = transform(lines)
	200
	201	lines = TokenTransformers()(lines)
	202	for line in lines:
	203	print('~~', line)