upstream/ipython Commit - r24154:13bf7e13

1

import re

2

from typing import List, Tuple

3

from IPython.utils import tokenize2

4

from IPython.utils.tokenutil import generate_tokens

5

6

def leading_indent(lines):

7

"""Remove leading indentation.

8

9

If the first line starts with a spaces or tabs, the same whitespace will be

10

removed from each following line.

11

"""

12

m = re.match(r'^[ \t]+', lines[0])

13

if not m:

14

return lines

15

space = m.group(0)

16

n = len(space)

17

return [l[n:] if l.startswith(space) else l

18

for l in lines]

19

20

class PromptStripper:

21

"""Remove matching input prompts from a block of input.

22

23

Parameters

24

----------

25

prompt_re : regular expression

26

A regular expression matching any input prompt (including continuation)

27

initial_re : regular expression, optional

28

A regular expression matching only the initial prompt, but not continuation.

29

If no initial expression is given, prompt_re will be used everywhere.

30

Used mainly for plain Python prompts, where the continuation prompt

31

``...`` is a valid Python expression in Python 3, so shouldn't be stripped.

32

33

If initial_re and prompt_re differ,

34

only initial_re will be tested against the first line.

35

If any prompt is found on the first two lines,

36

prompts will be stripped from the rest of the block.

37

"""

38

def __init__(self, prompt_re, initial_re=None):

39

self.prompt_re = prompt_re

40

self.initial_re = initial_re or prompt_re

41

42

def _strip(self, lines):

43

return [self.prompt_re.sub('', l, count=1) for l in lines]

44

45

def __call__(self, lines):

46

if self.initial_re.match(lines[0]) or \

47

(len(lines) > 1 and self.prompt_re.match(lines[1])):

48

return self._strip(lines)

49

return lines

50

51

classic_prompt = PromptStripper(

52

prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),

53

initial_re=re.compile(r'^>>>( |$)')

54

)

55

56

ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)'))

57

58

def cell_magic(lines):

59

if not lines[0].startswith('%%'):

60

return lines

61

if re.match('%%\w+\?', lines[0]):

62

# This case will be handled by help_end

63

return lines

64

magic_name, first_line = lines[0][2:].partition(' ')

65

body = '\n'.join(lines[1:])

66

return ['get_ipython().run_cell_magic(%r, %r, %r)' % (magic_name, first_line, body)]

67

68

line_transforms = [

69

leading_indent,

70

classic_prompt,

71

ipython_prompt,

72

cell_magic,

73

]

74

75

# -----

76

77

def help_end(tokens_by_line):

78

pass

79

80

def escaped_command(tokens_by_line):

81

pass

82

83

def _find_assign_op(token_line):

84

# Find the first assignment in the line ('=' not inside brackets)

85

# We don't try to support multiple special assignment (a = b = %foo)

86

paren_level = 0

87

for i, ti in enumerate(token_line):

88

s = ti.string

89

if s == '=' and paren_level == 0:

90

return i

91

if s in '([{':

92

paren_level += 1

93

elif s in ')]}':

94

paren_level -= 1

95

96

class MagicAssign:

97

@staticmethod

98

def find(tokens_by_line):

99

"""Find the first magic assignment (a = %foo) in the cell.

100

101

Returns (line, column) of the % if found, or None.

102

"""

103

for line in tokens_by_line:

104

assign_ix = _find_assign_op(line)

105

if (assign_ix is not None) \

106

and (len(line) >= assign_ix + 2) \

107

and (line[assign_ix+1].string == '%') \

108

and (line[assign_ix+2].type == tokenize2.NAME):

109

return line[assign_ix+1].start

110

111

@staticmethod

112

def transform(lines: List[str], start: Tuple[int, int]):

113

"""Transform a magic assignment found by find

114

"""

115

start_line = start[0] - 1 # Shift from 1-index to 0-index

116

start_col = start[1]

117

118

print("Start at", start_line, start_col)

119

print("Line", lines[start_line])

120

121

lhs, rhs = lines[start_line][:start_col], lines[start_line][start_col:-1]

122

assert rhs.startswith('%'), rhs

123

magic_name, _, args = rhs[1:].partition(' ')

124

args_parts = [args]

125

end_line = start_line

126

# Follow explicit (backslash) line continuations

127

while end_line < len(lines) and args_parts[-1].endswith('\\'):

128

end_line += 1

129

args_parts[-1] = args_parts[-1][:-1] # Trim backslash

130

args_parts.append(lines[end_line][:-1]) # Trim newline

131

args = ' '.join(args_parts)

132

133

lines_before = lines[:start_line]

134

call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)

135

new_line = lhs + call + '\n'

136

lines_after = lines[end_line+1:]

137

138

return lines_before + [new_line] + lines_after

139

140

def make_tokens_by_line(lines):

141

tokens_by_line = [[]]

142

for token in generate_tokens(iter(lines).__next__):

143

tokens_by_line[-1].append(token)

144

if token.type == tokenize2.NEWLINE:

145

tokens_by_line.append([])

146

147

return tokens_by_line

148

149

class TokenTransformers:

150

def __init__(self):

151

self.transformers = [

152

MagicAssign

153

]

154

155

def do_one_transform(self, lines):

156

"""Find and run the transform earliest in the code.

157

158

Returns (changed, lines).

159

160

This method is called repeatedly until changed is False, indicating

161

that all available transformations are complete.

162

163

The tokens following IPython special syntax might not be valid, so

164

the transformed code is retokenised every time to identify the next

165

piece of special syntax. Hopefully long code cells are mostly valid

166

Python, not using lots of IPython special syntax, so this shouldn't be

167

a performance issue.

168

"""

169

tokens_by_line = make_tokens_by_line(lines)

170

candidates = []

171

for transformer in self.transformers:

172

locn = transformer.find(tokens_by_line)

173

if locn:

174

candidates.append((locn, transformer))

175

176

if not candidates:

177

# Nothing to transform

178

return False, lines

179

180

first_locn, transformer = min(candidates)

181

return True, transformer.transform(lines, first_locn)

182

183

def __call__(self, lines):

184

while True:

185

changed, lines = self.do_one_transform(lines)

186

if not changed:

187

return lines

188

189

def assign_from_system(tokens_by_line, lines):

190

pass

191

192

193

def transform_cell(cell):

194

if not cell.endswith('\n'):

195

cell += '\n' # Ensure every line has a newline

196

lines = cell.splitlines(keepends=True)

197

for transform in line_transforms:

198

#print(transform, lines)

199

lines = transform(lines)

200

201

lines = TokenTransformers()(lines)

202

for line in lines:

203

print('~~', line)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

			@@ -0,0 +1,203 b''
		1	import re
		2	from typing import List, Tuple
		3	from IPython.utils import tokenize2
		4	from IPython.utils.tokenutil import generate_tokens
		5
		6	def leading_indent(lines):
		7	"""Remove leading indentation.
		8
		9	If the first line starts with a spaces or tabs, the same whitespace will be
		10	removed from each following line.
		11	"""
		12	m = re.match(r'^[ \t]+', lines[0])
		13	if not m:
		14	return lines
		15	space = m.group(0)
		16	n = len(space)
		17	return [l[n:] if l.startswith(space) else l
		18	for l in lines]
		19
		20	class PromptStripper:
		21	"""Remove matching input prompts from a block of input.
		22
		23	Parameters
		24	----------
		25	prompt_re : regular expression
		26	A regular expression matching any input prompt (including continuation)
		27	initial_re : regular expression, optional
		28	A regular expression matching only the initial prompt, but not continuation.
		29	If no initial expression is given, prompt_re will be used everywhere.
		30	Used mainly for plain Python prompts, where the continuation prompt
		31	``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
		32
		33	If initial_re and prompt_re differ,
		34	only initial_re will be tested against the first line.
		35	If any prompt is found on the first two lines,
		36	prompts will be stripped from the rest of the block.
		37	"""
		38	def __init__(self, prompt_re, initial_re=None):
		39	self.prompt_re = prompt_re
		40	self.initial_re = initial_re or prompt_re
		41
		42	def _strip(self, lines):
		43	return [self.prompt_re.sub('', l, count=1) for l in lines]
		44
		45	def __call__(self, lines):
		46	if self.initial_re.match(lines[0]) or \
		47	(len(lines) > 1 and self.prompt_re.match(lines[1])):
		48	return self._strip(lines)
		49	return lines
		50
		51	classic_prompt = PromptStripper(
		52	prompt_re=re.compile(r'^(>>>\|\.\.\.)( \|$)'),
		53	initial_re=re.compile(r'^>>>( \|$)')
		54	)
		55
		56	ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: \|\s*\.{3,}: ?)'))
		57
		58	def cell_magic(lines):
		59	if not lines[0].startswith('%%'):
		60	return lines
		61	if re.match('%%\w+\?', lines[0]):
		62	# This case will be handled by help_end
		63	return lines
		64	magic_name, first_line = lines[0][2:].partition(' ')
		65	body = '\n'.join(lines[1:])
		66	return ['get_ipython().run_cell_magic(%r, %r, %r)' % (magic_name, first_line, body)]
		67
		68	line_transforms = [
		69	leading_indent,
		70	classic_prompt,
		71	ipython_prompt,
		72	cell_magic,
		73	]
		74
		75	# -----
		76
		77	def help_end(tokens_by_line):
		78	pass
		79
		80	def escaped_command(tokens_by_line):
		81	pass
		82
		83	def _find_assign_op(token_line):
		84	# Find the first assignment in the line ('=' not inside brackets)
		85	# We don't try to support multiple special assignment (a = b = %foo)
		86	paren_level = 0
		87	for i, ti in enumerate(token_line):
		88	s = ti.string
		89	if s == '=' and paren_level == 0:
		90	return i
		91	if s in '([{':
		92	paren_level += 1
		93	elif s in ')]}':
		94	paren_level -= 1
		95
		96	class MagicAssign:
		97	@staticmethod
		98	def find(tokens_by_line):
		99	"""Find the first magic assignment (a = %foo) in the cell.
		100
		101	Returns (line, column) of the % if found, or None.
		102	"""
		103	for line in tokens_by_line:
		104	assign_ix = _find_assign_op(line)
		105	if (assign_ix is not None) \
		106	and (len(line) >= assign_ix + 2) \
		107	and (line[assign_ix+1].string == '%') \
		108	and (line[assign_ix+2].type == tokenize2.NAME):
		109	return line[assign_ix+1].start
		110
		111	@staticmethod
		112	def transform(lines: List[str], start: Tuple[int, int]):
		113	"""Transform a magic assignment found by find
		114	"""
		115	start_line = start[0] - 1 # Shift from 1-index to 0-index
		116	start_col = start[1]
		117
		118	print("Start at", start_line, start_col)
		119	print("Line", lines[start_line])
		120
		121	lhs, rhs = lines[start_line][:start_col], lines[start_line][start_col:-1]
		122	assert rhs.startswith('%'), rhs
		123	magic_name, _, args = rhs[1:].partition(' ')
		124	args_parts = [args]
		125	end_line = start_line
		126	# Follow explicit (backslash) line continuations
		127	while end_line < len(lines) and args_parts[-1].endswith('\\'):
		128	end_line += 1
		129	args_parts[-1] = args_parts[-1][:-1] # Trim backslash
		130	args_parts.append(lines[end_line][:-1]) # Trim newline
		131	args = ' '.join(args_parts)
		132
		133	lines_before = lines[:start_line]
		134	call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
		135	new_line = lhs + call + '\n'
		136	lines_after = lines[end_line+1:]
		137
		138	return lines_before + [new_line] + lines_after
		139
		140	def make_tokens_by_line(lines):
		141	tokens_by_line = [[]]
		142	for token in generate_tokens(iter(lines).__next__):
		143	tokens_by_line[-1].append(token)
		144	if token.type == tokenize2.NEWLINE:
		145	tokens_by_line.append([])
		146
		147	return tokens_by_line
		148
		149	class TokenTransformers:
		150	def __init__(self):
		151	self.transformers = [
		152	MagicAssign
		153	]
		154
		155	def do_one_transform(self, lines):
		156	"""Find and run the transform earliest in the code.
		157
		158	Returns (changed, lines).
		159
		160	This method is called repeatedly until changed is False, indicating
		161	that all available transformations are complete.
		162
		163	The tokens following IPython special syntax might not be valid, so
		164	the transformed code is retokenised every time to identify the next
		165	piece of special syntax. Hopefully long code cells are mostly valid
		166	Python, not using lots of IPython special syntax, so this shouldn't be
		167	a performance issue.
		168	"""
		169	tokens_by_line = make_tokens_by_line(lines)
		170	candidates = []
		171	for transformer in self.transformers:
		172	locn = transformer.find(tokens_by_line)
		173	if locn:
		174	candidates.append((locn, transformer))
		175
		176	if not candidates:
		177	# Nothing to transform
		178	return False, lines
		179
		180	first_locn, transformer = min(candidates)
		181	return True, transformer.transform(lines, first_locn)
		182
		183	def __call__(self, lines):
		184	while True:
		185	changed, lines = self.do_one_transform(lines)
		186	if not changed:
		187	return lines
		188
		189	def assign_from_system(tokens_by_line, lines):
		190	pass
		191
		192
		193	def transform_cell(cell):
		194	if not cell.endswith('\n'):
		195	cell += '\n' # Ensure every line has a newline
		196	lines = cell.splitlines(keepends=True)
		197	for transform in line_transforms:
		198	#print(transform, lines)
		199	lines = transform(lines)
		200
		201	lines = TokenTransformers()(lines)
		202	for line in lines:
		203	print('~~', line)