upstream/mercurial-mirror Commit - r38410:f77bbd34

23

return t._replace(start=(t.start[0], t.start[1] + ofs),

23

return t._replace(start=(t.start[0], t.start[1] + ofs),

24

end=(t.end[0], t.end[1] + ofs))

24

end=(t.end[0], t.end[1] + ofs))

25

26

if True:

26

def replacetokens(tokens, opts):

27

def replacetokens(tokens, opts):

27

"""Transform a stream of tokens from raw to Python 3.

28

"""Transform a stream of tokens from raw to Python 3.

28

29

Returns a generator of possibly rewritten tokens.

30

31

The input token list may be mutated as part of processing. However,

32

its changes do not necessarily match the output token stream.

33

"""

34

sysstrtokens = set()

29

35

30

Returns a generator of possibly rewritten tokens.

36

# The following utility functions access the tokens list and i index of

37

# the for i, t enumerate(tokens) loop below

38

def _isop(j, *o):

39

"""Assert that tokens[j] is an OP with one of the given values"""

40

try:

41

return tokens[j].type == token.OP and tokens[j].string in o

42

except IndexError:

43

return False

31

44

32

The input token list may be mutated as part of processing. However,

45

def _findargnofcall(n):

33

its changes do not necessarily match the output token stream.

46

"""Find arg n of a call expression (start at 0)

34

"""

47

35

sysstrtokens = set()

48

Returns index of the first token of that argument, or None if

49

there is not that many arguments.

50

51

Assumes that token[i + 1] is '('.

36

52

37

# The following utility functions access the tokens list and i index of

53

"""

38

# the for i, t enumerate(tokens) loop below

54

nested = 0

39

def _isop(j, *o):

55

for j in range(i + 2, len(tokens)):

40

"""Assert that tokens[j] is an OP with one of the given values"""

56

if _isop(j, ')', ']', '}'):

41

try:

57

# end of call, tuple, subscription or dict / set

42

return tokens[j].type == token.OP and tokens[j].string in o

58

nested -= 1

43

except IndexError:

59

if nested < 0:

44

return ~~Fals~~e

60

return None

61

elif n == 0:

62

# this is the starting position of arg

63

return j

64

elif _isop(j, '(', '[', '{'):

65

nested += 1

66

elif _isop(j, ',') and nested == 0:

67

n -= 1

45

68

46

def _findargnofcall(n):

69

return None

47

"""Find arg n of a call expression (start at 0)

70

71

def _ensuresysstr(j):

72

"""Make sure the token at j is a system string

48

73

49

Returns index of the first token of that argument, or None if

74

Remember the given token so the string transformer won't add

50

there is not that many arguments.

75

the byte prefix.

51

76

52

Assumes that token[i + 1] is '('.

77

Ignores tokens that are not strings. Assumes bounds checking has

78

already been done.

53

79

54

"""

80

"""

55

nested = 0

81

st = tokens[j]

56

for j in range(i + 2, len(tokens)):

82

if st.type == token.STRING and st.string.startswith(("'", '"')):

57

if _isop(j, ')', ']', '}'):

83

sysstrtokens.add(st)

58

# end of call, tuple, subscription or dict / set

59

nested -= 1

60

if nested < 0:

61

return None

62

elif n == 0:

63

# this is the starting position of arg

64

return j

65

elif _isop(j, '(', '[', '{'):

66

nested += 1

67

elif _isop(j, ',') and nested == 0:

68

n -= 1

69

84

70

return None

85

coldelta = 0 # column increment for new opening parens

71

86

coloffset = -1 # column offset for the current line (-1: TBD)

72

def _ensuresysstr(j):

87

parens = [(0, 0, 0)] # stack of (line, end-column, column-offset)

73

"""Make sure the token at j is a system string

88

for i, t in enumerate(tokens):

74

89

# Compute the column offset for the current line, such that

75

Remember the given token so the string transformer won't add

90

# the current line will be aligned to the last opening paren

76

~~the byte prefix~~.

91

# as before.

77

92

if coloffset < 0:

78

Ignores tokens that are not strings. Assumes bounds checking has

93

if t.start[1] == parens[-1][1]:

79

already been done.

94

coloffset = parens[-1][2]

95

elif t.start[1] + 1 == parens[-1][1]:

96

# fix misaligned indent of s/util.Abort/error.Abort/

97

coloffset = parens[-1][2] + (parens[-1][1] - t.start[1])

98

else:

99

coloffset = 0

80

100

81

"""

101

# Reset per-line attributes at EOL.

82

st = tokens[j]

102

if t.type in (token.NEWLINE, tokenize.NL):

83

if st.type == token.STRING and st.string.startswith(("'", '"')):

103

yield adjusttokenpos(t, coloffset)

84

sysstrtokens.add(st)

104

coldelta = 0

105

coloffset = -1

106

continue

107

108

# Remember the last paren position.

109

if _isop(i, '(', '[', '{'):

110

parens.append(t.end + (coloffset + coldelta,))

111

elif _isop(i, ')', ']', '}'):

112

parens.pop()

85

113

86

coldelta = 0 # column increment for new opening parens

114

# Convert most string literals to byte literals. String literals

87

coloffset = -1 # column offset for the current line (-1: TBD)

115

# in Python 2 are bytes. String literals in Python 3 are unicode.

88

parens = [(0, 0, 0)] # stack of (line, end-column, column-offset)

116

# Most strings in Mercurial are bytes and unicode strings are rare.

89

for i, t in enumerate(tokens):

117

# Rather than rewrite all string literals to use ``b''`` to indicate

90

# Compute the column offset for the current line, such that

118

# byte strings, we apply this token transformer to insert the ``b``

91

# the current line will be aligned to the last opening paren

119

# prefix nearly everywhere.

92

# as before.

120

if t.type == token.STRING and t not in sysstrtokens:

93

if coloffset < 0:

121

s = t.string

94

if t.start[1] == parens[-1][1]:

95

coloffset = parens[-1][2]

96

elif t.start[1] + 1 == parens[-1][1]:

97

# fix misaligned indent of s/util.Abort/error.Abort/

98

coloffset = parens[-1][2] + (parens[-1][1] - t.start[1])

99

else:

100

coloffset = 0

101

122

102

# Reset per-line attributes at EOL.

123

# Preserve docstrings as string literals. This is inconsistent

103

if t.type in (token.NEWLINE, tokenize.NL):

124

# with regular unprefixed strings. However, the

125

# "from __future__" parsing (which allows a module docstring to

126

# exist before it) doesn't properly handle the docstring if it

127

# is b''' prefixed, leading to a SyntaxError. We leave all

128

# docstrings as unprefixed to avoid this. This means Mercurial

129

# components touching docstrings need to handle unicode,

130

# unfortunately.

131

if s[0:3] in ("'''", '"""'):

104

yield adjusttokenpos(t, coloffset)

132

yield adjusttokenpos(t, coloffset)

105

coldelta = 0

106

coloffset = -1

107

continue

133

continue

108

134

109

# Remember the last paren position.

135

# If the first character isn't a quote, it is likely a string

110

if _isop(i, '(', '[', '{'):

136

# prefixing character (such as 'b', 'u', or 'r'. Ignore.

111

parens.append(t.end + (coloffset + coldelta,))

137

if s[0] not in ("'", '"'):

112

elif _isop(i, ')', ']', '}'):

138

yield adjusttokenpos(t, coloffset)

113

parens.pop()

114

115

# Convert most string literals to byte literals. String literals

116

# in Python 2 are bytes. String literals in Python 3 are unicode.

117

# Most strings in Mercurial are bytes and unicode strings are rare.

118

# Rather than rewrite all string literals to use ``b''`` to indicate

119

# byte strings, we apply this token transformer to insert the ``b``

120

# prefix nearly everywhere.

121

if t.type == token.STRING and t not in sysstrtokens:

122

s = t.string

123

124

# Preserve docstrings as string literals. This is inconsistent

125

# with regular unprefixed strings. However, the

126

# "from __future__" parsing (which allows a module docstring to

127

# exist before it) doesn't properly handle the docstring if it

128

# is b''' prefixed, leading to a SyntaxError. We leave all

129

# docstrings as unprefixed to avoid this. This means Mercurial

130

# components touching docstrings need to handle unicode,

131

# unfortunately.

132

if s[0:3] in ("'''", '"""'):

133

yield adjusttokenpos(t, coloffset)

134

continue

135

136

# If the first character isn't a quote, it is likely a string

137

# prefixing character (such as 'b', 'u', or 'r'. Ignore.

138

if s[0] not in ("'", '"'):

139

yield adjusttokenpos(t, coloffset)

140

continue

141

142

# String literal. Prefix to make a b'' string.

143

yield adjusttokenpos(t._replace(string='b%s' % t.string),

144

coloffset)

145

coldelta += 1

146

continue

139

continue

147

140

148

# This looks like a function call.

141

# String literal. Prefix to make a b'' string.

149

if t.type == token.NAME and _isop(i + 1, '('):

142

yield adjusttokenpos(t._replace(string='b%s' % t.string),

150

fn = t.string

143

coloffset)

144

coldelta += 1

145

continue

151

146

152

# *attr() builtins don't accept byte strings to 2nd argument.

147

# This looks like a function call.

153

if (fn in ('getattr', 'setattr', 'hasattr', 'safehasattr') and

148

if t.type == token.NAME and _isop(i + 1, '('):

154

not _isop(i - 1, '.')):

149

fn = t.string

155

arg1idx = _findargnofcall(1)

150

156

if arg1idx is not None:

151

# *attr() builtins don't accept byte strings to 2nd argument.

157

_ensuresysstr(arg1idx)

152

if (fn in ('getattr', 'setattr', 'hasattr', 'safehasattr') and

153

not _isop(i - 1, '.')):

154

arg1idx = _findargnofcall(1)

155

if arg1idx is not None:

156

_ensuresysstr(arg1idx)

158

157

159

# .encode() and .decode() on str/bytes/unicode don't accept

158

# .encode() and .decode() on str/bytes/unicode don't accept

160

# byte strings on Python 3.

159

# byte strings on Python 3.

161

elif fn in ('encode', 'decode') and _isop(i - 1, '.'):

160

elif fn in ('encode', 'decode') and _isop(i - 1, '.'):

162

for argn in range(2):

161

for argn in range(2):

163

argidx = _findargnofcall(argn)

162

argidx = _findargnofcall(argn)

164

if argidx is not None:

163

if argidx is not None:

165

_ensuresysstr(argidx)

164

_ensuresysstr(argidx)

166

165

167

# It changes iteritems/values to items/values as they are not

166

# It changes iteritems/values to items/values as they are not

168

# present in Python 3 world.

167

# present in Python 3 world.

169

elif opts['dictiter'] and fn in ('iteritems', 'itervalues'):

168

elif opts['dictiter'] and fn in ('iteritems', 'itervalues'):

170

yield adjusttokenpos(t._replace(string=fn[4:]), coloffset)

169

yield adjusttokenpos(t._replace(string=fn[4:]), coloffset)

171

continue

170

continue

172

171

173

# Emit unmodified token.

172

# Emit unmodified token.

174

yield adjusttokenpos(t, coloffset)

173

yield adjusttokenpos(t, coloffset)

175

174

176

def process(fin, fout, opts):

175

def process(fin, fout, opts):

177

tokens = tokenize.tokenize(fin.readline)

176

tokens = tokenize.tokenize(fin.readline)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

@@ -23,155 +23,154 def adjusttokenpos(t, ofs):
23	return t._replace(start=(t.start[0], t.start[1] + ofs),	23	return t._replace(start=(t.start[0], t.start[1] + ofs),
24	end=(t.end[0], t.end[1] + ofs))	24	end=(t.end[0], t.end[1] + ofs))
25		25
26	if True:	26	def replacetokens(tokens, opts):
27	def replacetokens(tokens, opts):	27	"""Transform a stream of tokens from raw to Python 3.
28	"""Transform a stream of tokens from raw to Python 3.	28
		29	Returns a generator of possibly rewritten tokens.
		30
		31	The input token list may be mutated as part of processing. However,
		32	its changes do not necessarily match the output token stream.
		33	"""
		34	sysstrtokens = set()
29		35
30	Returns a generator of possibly rewritten tokens.	36	# The following utility functions access the tokens list and i index of
		37	# the for i, t enumerate(tokens) loop below
		38	def _isop(j, *o):
		39	"""Assert that tokens[j] is an OP with one of the given values"""
		40	try:
		41	return tokens[j].type == token.OP and tokens[j].string in o
		42	except IndexError:
		43	return False
31		44
32	The input token list may be mutated as part of processing. However,	45	def _findargnofcall(n):
33	its changes do not necessarily match the output token stream.	46	"""Find arg n of a call expression (start at 0)
34	"""	47
35	sysstrtokens = set()	48	Returns index of the first token of that argument, or None if
		49	there is not that many arguments.
		50
		51	Assumes that token[i + 1] is '('.
36		52
37	# The following utility functions access the tokens list and i index of	53	"""
38	# the for i, t enumerate(tokens) loop below	54	nested = 0
39	def _isop(j, *o):	55	for j in range(i + 2, len(tokens)):
40	"""Assert that tokens[j] is an OP with one of the given values"""	56	if _isop(j, ')', ']', '}'):
41	try:	57	# end of call, tuple, subscription or dict / set
42	return tokens[j].type == token.OP and tokens[j].string in o	58	nested -= 1
43	except IndexError:	59	if nested < 0:
44	return ~~Fals~~e	60	return None
		61	elif n == 0:
		62	# this is the starting position of arg
		63	return j
		64	elif _isop(j, '(', '[', '{'):
		65	nested += 1
		66	elif _isop(j, ',') and nested == 0:
		67	n -= 1
45		68
46	def _findargnofcall(n):	69	return None
47	"""Find arg n of a call expression (start at 0)	70
		71	def _ensuresysstr(j):
		72	"""Make sure the token at j is a system string
48		73
49	Returns index of the first token of that argument, or None if	74	Remember the given token so the string transformer won't add
50	there is not that many arguments.	75	the byte prefix.
51		76
52	Assumes that token[i + 1] is '('.	77	Ignores tokens that are not strings. Assumes bounds checking has
		78	already been done.
53		79
54	"""	80	"""
55	nested = 0	81	st = tokens[j]
56	for j in range(i + 2, len(tokens)):	82	if st.type == token.STRING and st.string.startswith(("'", '"')):
57	if _isop(j, ')', ']', '}'):	83	sysstrtokens.add(st)
58	# end of call, tuple, subscription or dict / set
59	nested -= 1
60	if nested < 0:
61	return None
62	elif n == 0:
63	# this is the starting position of arg
64	return j
65	elif _isop(j, '(', '[', '{'):
66	nested += 1
67	elif _isop(j, ',') and nested == 0:
68	n -= 1
69		84
70	return None	85	coldelta = 0 # column increment for new opening parens
71		86	coloffset = -1 # column offset for the current line (-1: TBD)
72	def _ensuresysstr(j):	87	parens = [(0, 0, 0)] # stack of (line, end-column, column-offset)
73	"""Make sure the token at j is a system string	88	for i, t in enumerate(tokens):
74		89	# Compute the column offset for the current line, such that
75	Remember the given token so the string transformer won't add	90	# the current line will be aligned to the last opening paren
76	~~the byte prefix~~.	91	# as before.
77		92	if coloffset < 0:
78	Ignores tokens that are not strings. Assumes bounds checking has	93	if t.start[1] == parens[-1][1]:
79	already been done.	94	coloffset = parens[-1][2]
		95	elif t.start[1] + 1 == parens[-1][1]:
		96	# fix misaligned indent of s/util.Abort/error.Abort/
		97	coloffset = parens[-1][2] + (parens[-1][1] - t.start[1])
		98	else:
		99	coloffset = 0
80		100
81	"""	101	# Reset per-line attributes at EOL.
82	st = tokens[j]	102	if t.type in (token.NEWLINE, tokenize.NL):
83	if st.type == token.STRING and st.string.startswith(("'", '"')):	103	yield adjusttokenpos(t, coloffset)
84	sysstrtokens.add(st)	104	coldelta = 0
		105	coloffset = -1
		106	continue
		107
		108	# Remember the last paren position.
		109	if _isop(i, '(', '[', '{'):
		110	parens.append(t.end + (coloffset + coldelta,))
		111	elif _isop(i, ')', ']', '}'):
		112	parens.pop()
85		113
86	coldelta = 0 # column increment for new opening parens	114	# Convert most string literals to byte literals. String literals
87	coloffset = -1 # column offset for the current line (-1: TBD)	115	# in Python 2 are bytes. String literals in Python 3 are unicode.
88	parens = [(0, 0, 0)] # stack of (line, end-column, column-offset)	116	# Most strings in Mercurial are bytes and unicode strings are rare.
89	for i, t in enumerate(tokens):	117	# Rather than rewrite all string literals to use ``b''`` to indicate
90	# Compute the column offset for the current line, such that	118	# byte strings, we apply this token transformer to insert the ``b``
91	# the current line will be aligned to the last opening paren	119	# prefix nearly everywhere.
92	# as before.	120	if t.type == token.STRING and t not in sysstrtokens:
93	if coloffset < 0:	121	s = t.string
94	if t.start[1] == parens[-1][1]:
95	coloffset = parens[-1][2]
96	elif t.start[1] + 1 == parens[-1][1]:
97	# fix misaligned indent of s/util.Abort/error.Abort/
98	coloffset = parens[-1][2] + (parens[-1][1] - t.start[1])
99	else:
100	coloffset = 0
101		122
102	# Reset per-line attributes at EOL.	123	# Preserve docstrings as string literals. This is inconsistent
103	if t.type in (token.NEWLINE, tokenize.NL):	124	# with regular unprefixed strings. However, the
		125	# "from __future__" parsing (which allows a module docstring to
		126	# exist before it) doesn't properly handle the docstring if it
		127	# is b''' prefixed, leading to a SyntaxError. We leave all
		128	# docstrings as unprefixed to avoid this. This means Mercurial
		129	# components touching docstrings need to handle unicode,
		130	# unfortunately.
		131	if s[0:3] in ("'''", '"""'):
104	yield adjusttokenpos(t, coloffset)	132	yield adjusttokenpos(t, coloffset)
105	coldelta = 0
106	coloffset = -1
107	continue	133	continue
108		134
109	# Remember the last paren position.	135	# If the first character isn't a quote, it is likely a string
110	if _isop(i, '(', '[', '{'):	136	# prefixing character (such as 'b', 'u', or 'r'. Ignore.
111	parens.append(t.end + (coloffset + coldelta,))	137	if s[0] not in ("'", '"'):
112	elif _isop(i, ')', ']', '}'):	138	yield adjusttokenpos(t, coloffset)
113	parens.pop()
114
115	# Convert most string literals to byte literals. String literals
116	# in Python 2 are bytes. String literals in Python 3 are unicode.
117	# Most strings in Mercurial are bytes and unicode strings are rare.
118	# Rather than rewrite all string literals to use ``b''`` to indicate
119	# byte strings, we apply this token transformer to insert the ``b``
120	# prefix nearly everywhere.
121	if t.type == token.STRING and t not in sysstrtokens:
122	s = t.string
123
124	# Preserve docstrings as string literals. This is inconsistent
125	# with regular unprefixed strings. However, the
126	# "from __future__" parsing (which allows a module docstring to
127	# exist before it) doesn't properly handle the docstring if it
128	# is b''' prefixed, leading to a SyntaxError. We leave all
129	# docstrings as unprefixed to avoid this. This means Mercurial
130	# components touching docstrings need to handle unicode,
131	# unfortunately.
132	if s[0:3] in ("'''", '"""'):
133	yield adjusttokenpos(t, coloffset)
134	continue
135
136	# If the first character isn't a quote, it is likely a string
137	# prefixing character (such as 'b', 'u', or 'r'. Ignore.
138	if s[0] not in ("'", '"'):
139	yield adjusttokenpos(t, coloffset)
140	continue
141
142	# String literal. Prefix to make a b'' string.
143	yield adjusttokenpos(t._replace(string='b%s' % t.string),
144	coloffset)
145	coldelta += 1
146	continue	139	continue
147		140
148	# This looks like a function call.	141	# String literal. Prefix to make a b'' string.
149	if t.type == token.NAME and _isop(i + 1, '('):	142	yield adjusttokenpos(t._replace(string='b%s' % t.string),
150	fn = t.string	143	coloffset)
		144	coldelta += 1
		145	continue
151		146
152	# *attr() builtins don't accept byte strings to 2nd argument.	147	# This looks like a function call.
153	if (fn in ('getattr', 'setattr', 'hasattr', 'safehasattr') and	148	if t.type == token.NAME and _isop(i + 1, '('):
154	not _isop(i - 1, '.')):	149	fn = t.string
155	arg1idx = _findargnofcall(1)	150
156	if arg1idx is not None:	151	# *attr() builtins don't accept byte strings to 2nd argument.
157	_ensuresysstr(arg1idx)	152	if (fn in ('getattr', 'setattr', 'hasattr', 'safehasattr') and
		153	not _isop(i - 1, '.')):
		154	arg1idx = _findargnofcall(1)
		155	if arg1idx is not None:
		156	_ensuresysstr(arg1idx)
158		157
159	# .encode() and .decode() on str/bytes/unicode don't accept	158	# .encode() and .decode() on str/bytes/unicode don't accept
160	# byte strings on Python 3.	159	# byte strings on Python 3.
161	elif fn in ('encode', 'decode') and _isop(i - 1, '.'):	160	elif fn in ('encode', 'decode') and _isop(i - 1, '.'):
162	for argn in range(2):	161	for argn in range(2):
163	argidx = _findargnofcall(argn)	162	argidx = _findargnofcall(argn)
164	if argidx is not None:	163	if argidx is not None:
165	_ensuresysstr(argidx)	164	_ensuresysstr(argidx)
166		165
167	# It changes iteritems/values to items/values as they are not	166	# It changes iteritems/values to items/values as they are not
168	# present in Python 3 world.	167	# present in Python 3 world.
169	elif opts['dictiter'] and fn in ('iteritems', 'itervalues'):	168	elif opts['dictiter'] and fn in ('iteritems', 'itervalues'):
170	yield adjusttokenpos(t._replace(string=fn[4:]), coloffset)	169	yield adjusttokenpos(t._replace(string=fn[4:]), coloffset)
171	continue	170	continue
172		171
173	# Emit unmodified token.	172	# Emit unmodified token.
174	yield adjusttokenpos(t, coloffset)	173	yield adjusttokenpos(t, coloffset)
175		174
176	def process(fin, fout, opts):	175	def process(fin, fout, opts):
177	tokens = tokenize.tokenize(fin.readline)	176	tokens = tokenize.tokenize(fin.readline)