upstream/mercurial-mirror Commit - r38410:f77bbd34

1

#!/usr/bin/env python3

1

#!/usr/bin/env python3

2

#

2

#

3

# byteify-strings.py - transform string literals to be Python 3 safe

3

# byteify-strings.py - transform string literals to be Python 3 safe

4

#

4

#

5

6

#

6

#

7

# This software may be used and distributed according to the terms of the

7

# This software may be used and distributed according to the terms of the

8

# GNU General Public License version 2 or any later version.

8

# GNU General Public License version 2 or any later version.

9

10

from __future__ import absolute_import

10

from __future__ import absolute_import

11

12

import argparse

12

import argparse

13

import contextlib

13

import contextlib

14

import errno

14

import errno

15

import os

15

import os

16

import sys

16

import sys

17

import tempfile

17

import tempfile

18

import token

18

import token

19

import tokenize

19

import tokenize

20

21

def adjusttokenpos(t, ofs):

21

def adjusttokenpos(t, ofs):

22

"""Adjust start/end column of the given token"""

22

"""Adjust start/end column of the given token"""

23

return t._replace(start=(t.start[0], t.start[1] + ofs),

23

return t._replace(start=(t.start[0], t.start[1] + ofs),

24

end=(t.end[0], t.end[1] + ofs))

24

end=(t.end[0], t.end[1] + ofs))

25

26

if True:

26

def replacetokens(tokens, opts):

27

def replacetokens(tokens, opts):

27

"""Transform a stream of tokens from raw to Python 3.

28

"""Transform a stream of tokens from raw to Python 3.

28

29

Returns a generator of possibly rewritten tokens.

30

31

The input token list may be mutated as part of processing. However,

32

its changes do not necessarily match the output token stream.

33

"""

34

sysstrtokens = set()

29

35

30

Returns a generator of possibly rewritten tokens.

36

# The following utility functions access the tokens list and i index of

37

# the for i, t enumerate(tokens) loop below

38

def _isop(j, *o):

39

"""Assert that tokens[j] is an OP with one of the given values"""

40

try:

41

return tokens[j].type == token.OP and tokens[j].string in o

42

except IndexError:

43

return False

31

44

32

The input token list may be mutated as part of processing. However,

45

def _findargnofcall(n):

33

its changes do not necessarily match the output token stream.

46

"""Find arg n of a call expression (start at 0)

34

"""

47

35

sysstrtokens = set()

48

Returns index of the first token of that argument, or None if

49

there is not that many arguments.

50

51

Assumes that token[i + 1] is '('.

36

52

37

# The following utility functions access the tokens list and i index of

53

"""

38

# the for i, t enumerate(tokens) loop below

54

nested = 0

39

def _isop(j, *o):

55

for j in range(i + 2, len(tokens)):

40

"""Assert that tokens[j] is an OP with one of the given values"""

56

if _isop(j, ')', ']', '}'):

41

try:

57

# end of call, tuple, subscription or dict / set

42

return tokens[j].type == token.OP and tokens[j].string in o

58

nested -= 1

43

except IndexError:

59

if nested < 0:

44

return ~~Fals~~e

60

return None

61

elif n == 0:

62

# this is the starting position of arg

63

return j

64

elif _isop(j, '(', '[', '{'):

65

nested += 1

66

elif _isop(j, ',') and nested == 0:

67

n -= 1

45

68

46

def _findargnofcall(n):

69

return None

47

"""Find arg n of a call expression (start at 0)

70

71

def _ensuresysstr(j):

72

"""Make sure the token at j is a system string

48

73

49

Returns index of the first token of that argument, or None if

74

Remember the given token so the string transformer won't add

50

there is not that many arguments.

75

the byte prefix.

51

76

52

Assumes that token[i + 1] is '('.

77

Ignores tokens that are not strings. Assumes bounds checking has

78

already been done.

53

79

54

"""

80

"""

55

nested = 0

81

st = tokens[j]

56

for j in range(i + 2, len(tokens)):

82

if st.type == token.STRING and st.string.startswith(("'", '"')):

57

if _isop(j, ')', ']', '}'):

83

sysstrtokens.add(st)

58

# end of call, tuple, subscription or dict / set

59

nested -= 1

60

if nested < 0:

61

return None

62

elif n == 0:

63

# this is the starting position of arg

64

return j

65

elif _isop(j, '(', '[', '{'):

66

nested += 1

67

elif _isop(j, ',') and nested == 0:

68

n -= 1

69

84

70

return None

85

coldelta = 0 # column increment for new opening parens

71

86

coloffset = -1 # column offset for the current line (-1: TBD)

72

def _ensuresysstr(j):

87

parens = [(0, 0, 0)] # stack of (line, end-column, column-offset)

73

"""Make sure the token at j is a system string

88

for i, t in enumerate(tokens):

74

89

# Compute the column offset for the current line, such that

75

Remember the given token so the string transformer won't add

90

# the current line will be aligned to the last opening paren

76

~~the byte prefix~~.

91

# as before.

77

92

if coloffset < 0:

78

Ignores tokens that are not strings. Assumes bounds checking has

93

if t.start[1] == parens[-1][1]:

79

already been done.

94

coloffset = parens[-1][2]

95

elif t.start[1] + 1 == parens[-1][1]:

96

# fix misaligned indent of s/util.Abort/error.Abort/

97

coloffset = parens[-1][2] + (parens[-1][1] - t.start[1])

98

else:

99

coloffset = 0

80

100

81

"""

101

# Reset per-line attributes at EOL.

82

st = tokens[j]

102

if t.type in (token.NEWLINE, tokenize.NL):

83

if st.type == token.STRING and st.string.startswith(("'", '"')):

103

yield adjusttokenpos(t, coloffset)

84

sysstrtokens.add(st)

104

coldelta = 0

105

coloffset = -1

106

continue

107

108

# Remember the last paren position.

109

if _isop(i, '(', '[', '{'):

110

parens.append(t.end + (coloffset + coldelta,))

111

elif _isop(i, ')', ']', '}'):

112

parens.pop()

85

113

86

coldelta = 0 # column increment for new opening parens

114

# Convert most string literals to byte literals. String literals

87

coloffset = -1 # column offset for the current line (-1: TBD)

115

# in Python 2 are bytes. String literals in Python 3 are unicode.

88

parens = [(0, 0, 0)] # stack of (line, end-column, column-offset)

116

# Most strings in Mercurial are bytes and unicode strings are rare.

89

for i, t in enumerate(tokens):

117

# Rather than rewrite all string literals to use ``b''`` to indicate

90

# Compute the column offset for the current line, such that

118

# byte strings, we apply this token transformer to insert the ``b``

91

# the current line will be aligned to the last opening paren

119

# prefix nearly everywhere.

92

# as before.

120

if t.type == token.STRING and t not in sysstrtokens:

93

if coloffset < 0:

121

s = t.string

94

if t.start[1] == parens[-1][1]:

95

coloffset = parens[-1][2]

96

elif t.start[1] + 1 == parens[-1][1]:

97

# fix misaligned indent of s/util.Abort/error.Abort/

98

coloffset = parens[-1][2] + (parens[-1][1] - t.start[1])

99

else:

100

coloffset = 0

101

122

102

# Reset per-line attributes at EOL.

123

# Preserve docstrings as string literals. This is inconsistent

103

if t.type in (token.NEWLINE, tokenize.NL):

124

# with regular unprefixed strings. However, the

125

# "from __future__" parsing (which allows a module docstring to

126

# exist before it) doesn't properly handle the docstring if it

127

# is b''' prefixed, leading to a SyntaxError. We leave all

128

# docstrings as unprefixed to avoid this. This means Mercurial

129

# components touching docstrings need to handle unicode,

130

# unfortunately.

131

if s[0:3] in ("'''", '"""'):

104

yield adjusttokenpos(t, coloffset)

132

yield adjusttokenpos(t, coloffset)

105

coldelta = 0

106

coloffset = -1

107

continue

133

continue

108

134

109

# Remember the last paren position.

135

# If the first character isn't a quote, it is likely a string

110

if _isop(i, '(', '[', '{'):

136

# prefixing character (such as 'b', 'u', or 'r'. Ignore.

111

parens.append(t.end + (coloffset + coldelta,))

137

if s[0] not in ("'", '"'):

112

elif _isop(i, ')', ']', '}'):

138

yield adjusttokenpos(t, coloffset)

113

parens.pop()

114

115

# Convert most string literals to byte literals. String literals

116

# in Python 2 are bytes. String literals in Python 3 are unicode.

117

# Most strings in Mercurial are bytes and unicode strings are rare.

118

# Rather than rewrite all string literals to use ``b''`` to indicate

119

# byte strings, we apply this token transformer to insert the ``b``

120

# prefix nearly everywhere.

121

if t.type == token.STRING and t not in sysstrtokens:

122

s = t.string

123

124

# Preserve docstrings as string literals. This is inconsistent

125

# with regular unprefixed strings. However, the

126

# "from __future__" parsing (which allows a module docstring to

127

# exist before it) doesn't properly handle the docstring if it

128

# is b''' prefixed, leading to a SyntaxError. We leave all

129

# docstrings as unprefixed to avoid this. This means Mercurial

130

# components touching docstrings need to handle unicode,

131

# unfortunately.

132

if s[0:3] in ("'''", '"""'):

133

yield adjusttokenpos(t, coloffset)

134

continue

135

136

# If the first character isn't a quote, it is likely a string

137

# prefixing character (such as 'b', 'u', or 'r'. Ignore.

138

if s[0] not in ("'", '"'):

139

yield adjusttokenpos(t, coloffset)

140

continue

141

142

# String literal. Prefix to make a b'' string.

143

yield adjusttokenpos(t._replace(string='b%s' % t.string),

144

coloffset)

145

coldelta += 1

146

continue

139

continue

147

140

148

# This looks like a function call.

141

# String literal. Prefix to make a b'' string.

149

if t.type == token.NAME and _isop(i + 1, '('):

142

yield adjusttokenpos(t._replace(string='b%s' % t.string),

150

fn = t.string

143

coloffset)

144

coldelta += 1

145

continue

151

146

152

# *attr() builtins don't accept byte strings to 2nd argument.

147

# This looks like a function call.

153

if (fn in ('getattr', 'setattr', 'hasattr', 'safehasattr') and

148

if t.type == token.NAME and _isop(i + 1, '('):

154

not _isop(i - 1, '.')):

149

fn = t.string

155

arg1idx = _findargnofcall(1)

150

156

if arg1idx is not None:

151

# *attr() builtins don't accept byte strings to 2nd argument.

157

_ensuresysstr(arg1idx)

152

if (fn in ('getattr', 'setattr', 'hasattr', 'safehasattr') and

153

not _isop(i - 1, '.')):

154

arg1idx = _findargnofcall(1)

155

if arg1idx is not None:

156

_ensuresysstr(arg1idx)

158

157

159

# .encode() and .decode() on str/bytes/unicode don't accept

158

# .encode() and .decode() on str/bytes/unicode don't accept

160

# byte strings on Python 3.

159

# byte strings on Python 3.

161

elif fn in ('encode', 'decode') and _isop(i - 1, '.'):

160

elif fn in ('encode', 'decode') and _isop(i - 1, '.'):

162

for argn in range(2):

161

for argn in range(2):

163

argidx = _findargnofcall(argn)

162

argidx = _findargnofcall(argn)

164

if argidx is not None:

163

if argidx is not None:

165

_ensuresysstr(argidx)

164

_ensuresysstr(argidx)

166

165

167

# It changes iteritems/values to items/values as they are not

166

# It changes iteritems/values to items/values as they are not

168

# present in Python 3 world.

167

# present in Python 3 world.

169

elif opts['dictiter'] and fn in ('iteritems', 'itervalues'):

168

elif opts['dictiter'] and fn in ('iteritems', 'itervalues'):

170

yield adjusttokenpos(t._replace(string=fn[4:]), coloffset)

169

yield adjusttokenpos(t._replace(string=fn[4:]), coloffset)

171

continue

170

continue

172

171

173

# Emit unmodified token.

172

# Emit unmodified token.

174

yield adjusttokenpos(t, coloffset)

173

yield adjusttokenpos(t, coloffset)

175

174

176

def process(fin, fout, opts):

175

def process(fin, fout, opts):

177

tokens = tokenize.tokenize(fin.readline)

176

tokens = tokenize.tokenize(fin.readline)

178

tokens = replacetokens(list(tokens), opts)

177

tokens = replacetokens(list(tokens), opts)

179

fout.write(tokenize.untokenize(tokens))

178

fout.write(tokenize.untokenize(tokens))

180

179

181

def tryunlink(fname):

180

def tryunlink(fname):

182

try:

181

try:

183

os.unlink(fname)

182

os.unlink(fname)

184

except OSError as err:

183

except OSError as err:

185

if err.errno != errno.ENOENT:

184

if err.errno != errno.ENOENT:

186

raise

185

raise

187

186

188

@contextlib.contextmanager

187

@contextlib.contextmanager

189

def editinplace(fname):

188

def editinplace(fname):

190

n = os.path.basename(fname)

189

n = os.path.basename(fname)

191

d = os.path.dirname(fname)

190

d = os.path.dirname(fname)

192

fp = tempfile.NamedTemporaryFile(prefix='.%s-' % n, suffix='~', dir=d,

191

fp = tempfile.NamedTemporaryFile(prefix='.%s-' % n, suffix='~', dir=d,

193

delete=False)

192

delete=False)

194

try:

193

try:

195

yield fp

194

yield fp

196

fp.close()

195

fp.close()

197

if os.name == 'nt':

196

if os.name == 'nt':

198

tryunlink(fname)

197

tryunlink(fname)

199

os.rename(fp.name, fname)

198

os.rename(fp.name, fname)

200

finally:

199

finally:

201

fp.close()

200

fp.close()

202

tryunlink(fp.name)

201

tryunlink(fp.name)

203

202

204

def main():

203

def main():

205

ap = argparse.ArgumentParser()

204

ap = argparse.ArgumentParser()

206

ap.add_argument('-i', '--inplace', action='store_true', default=False,

205

ap.add_argument('-i', '--inplace', action='store_true', default=False,

207

help='edit files in place')

206

help='edit files in place')

208

ap.add_argument('--dictiter', action='store_true', default=False,

207

ap.add_argument('--dictiter', action='store_true', default=False,

209

help='rewrite iteritems() and itervalues()'),

208

help='rewrite iteritems() and itervalues()'),

210

ap.add_argument('files', metavar='FILE', nargs='+', help='source file')

209

ap.add_argument('files', metavar='FILE', nargs='+', help='source file')

211

args = ap.parse_args()

210

args = ap.parse_args()

212

opts = {

211

opts = {

213

'dictiter': args.dictiter,

212

'dictiter': args.dictiter,

214

}

213

}

215

for fname in args.files:

214

for fname in args.files:

216

if args.inplace:

215

if args.inplace:

217

with editinplace(fname) as fout:

216

with editinplace(fname) as fout:

218

with open(fname, 'rb') as fin:

217

with open(fname, 'rb') as fin:

219

process(fin, fout, opts)

218

process(fin, fout, opts)

220

else:

219

else:

221

with open(fname, 'rb') as fin:

220

with open(fname, 'rb') as fin:

222

fout = sys.stdout.buffer

221

fout = sys.stdout.buffer

223

process(fin, fout, opts)

222

process(fin, fout, opts)

224

223

225

if __name__ == '__main__':

224

if __name__ == '__main__':

226

main()

225

main()

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

@@ -1,226 +1,225 b''
1	#!/usr/bin/env python3	1	#!/usr/bin/env python3
2	#	2	#
3	# byteify-strings.py - transform string literals to be Python 3 safe	3	# byteify-strings.py - transform string literals to be Python 3 safe
4	#	4	#
5	# Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>	5	# Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
6	#	6	#
7	# This software may be used and distributed according to the terms of the	7	# This software may be used and distributed according to the terms of the
8	# GNU General Public License version 2 or any later version.	8	# GNU General Public License version 2 or any later version.
9		9
10	from __future__ import absolute_import	10	from __future__ import absolute_import
11		11
12	import argparse	12	import argparse
13	import contextlib	13	import contextlib
14	import errno	14	import errno
15	import os	15	import os
16	import sys	16	import sys
17	import tempfile	17	import tempfile
18	import token	18	import token
19	import tokenize	19	import tokenize
20		20
21	def adjusttokenpos(t, ofs):	21	def adjusttokenpos(t, ofs):
22	"""Adjust start/end column of the given token"""	22	"""Adjust start/end column of the given token"""
23	return t._replace(start=(t.start[0], t.start[1] + ofs),	23	return t._replace(start=(t.start[0], t.start[1] + ofs),
24	end=(t.end[0], t.end[1] + ofs))	24	end=(t.end[0], t.end[1] + ofs))
25		25
26	if True:	26	def replacetokens(tokens, opts):
27	def replacetokens(tokens, opts):	27	"""Transform a stream of tokens from raw to Python 3.
28	"""Transform a stream of tokens from raw to Python 3.	28
		29	Returns a generator of possibly rewritten tokens.
		30
		31	The input token list may be mutated as part of processing. However,
		32	its changes do not necessarily match the output token stream.
		33	"""
		34	sysstrtokens = set()
29		35
30	Returns a generator of possibly rewritten tokens.	36	# The following utility functions access the tokens list and i index of
		37	# the for i, t enumerate(tokens) loop below
		38	def _isop(j, *o):
		39	"""Assert that tokens[j] is an OP with one of the given values"""
		40	try:
		41	return tokens[j].type == token.OP and tokens[j].string in o
		42	except IndexError:
		43	return False
31		44
32	The input token list may be mutated as part of processing. However,	45	def _findargnofcall(n):
33	its changes do not necessarily match the output token stream.	46	"""Find arg n of a call expression (start at 0)
34	"""	47
35	sysstrtokens = set()	48	Returns index of the first token of that argument, or None if
		49	there is not that many arguments.
		50
		51	Assumes that token[i + 1] is '('.
36		52
37	# The following utility functions access the tokens list and i index of	53	"""
38	# the for i, t enumerate(tokens) loop below	54	nested = 0
39	def _isop(j, *o):	55	for j in range(i + 2, len(tokens)):
40	"""Assert that tokens[j] is an OP with one of the given values"""	56	if _isop(j, ')', ']', '}'):
41	try:	57	# end of call, tuple, subscription or dict / set
42	return tokens[j].type == token.OP and tokens[j].string in o	58	nested -= 1
43	except IndexError:	59	if nested < 0:
44	return ~~Fals~~e	60	return None
		61	elif n == 0:
		62	# this is the starting position of arg
		63	return j
		64	elif _isop(j, '(', '[', '{'):
		65	nested += 1
		66	elif _isop(j, ',') and nested == 0:
		67	n -= 1
45		68
46	def _findargnofcall(n):	69	return None
47	"""Find arg n of a call expression (start at 0)	70
		71	def _ensuresysstr(j):
		72	"""Make sure the token at j is a system string
48		73
49	Returns index of the first token of that argument, or None if	74	Remember the given token so the string transformer won't add
50	there is not that many arguments.	75	the byte prefix.
51		76
52	Assumes that token[i + 1] is '('.	77	Ignores tokens that are not strings. Assumes bounds checking has
		78	already been done.
53		79
54	"""	80	"""
55	nested = 0	81	st = tokens[j]
56	for j in range(i + 2, len(tokens)):	82	if st.type == token.STRING and st.string.startswith(("'", '"')):
57	if _isop(j, ')', ']', '}'):	83	sysstrtokens.add(st)
58	# end of call, tuple, subscription or dict / set
59	nested -= 1
60	if nested < 0:
61	return None
62	elif n == 0:
63	# this is the starting position of arg
64	return j
65	elif _isop(j, '(', '[', '{'):
66	nested += 1
67	elif _isop(j, ',') and nested == 0:
68	n -= 1
69		84
70	return None	85	coldelta = 0 # column increment for new opening parens
71		86	coloffset = -1 # column offset for the current line (-1: TBD)
72	def _ensuresysstr(j):	87	parens = [(0, 0, 0)] # stack of (line, end-column, column-offset)
73	"""Make sure the token at j is a system string	88	for i, t in enumerate(tokens):
74		89	# Compute the column offset for the current line, such that
75	Remember the given token so the string transformer won't add	90	# the current line will be aligned to the last opening paren
76	~~the byte prefix~~.	91	# as before.
77		92	if coloffset < 0:
78	Ignores tokens that are not strings. Assumes bounds checking has	93	if t.start[1] == parens[-1][1]:
79	already been done.	94	coloffset = parens[-1][2]
		95	elif t.start[1] + 1 == parens[-1][1]:
		96	# fix misaligned indent of s/util.Abort/error.Abort/
		97	coloffset = parens[-1][2] + (parens[-1][1] - t.start[1])
		98	else:
		99	coloffset = 0
80		100
81	"""	101	# Reset per-line attributes at EOL.
82	st = tokens[j]	102	if t.type in (token.NEWLINE, tokenize.NL):
83	if st.type == token.STRING and st.string.startswith(("'", '"')):	103	yield adjusttokenpos(t, coloffset)
84	sysstrtokens.add(st)	104	coldelta = 0
		105	coloffset = -1
		106	continue
		107
		108	# Remember the last paren position.
		109	if _isop(i, '(', '[', '{'):
		110	parens.append(t.end + (coloffset + coldelta,))
		111	elif _isop(i, ')', ']', '}'):
		112	parens.pop()
85		113
86	coldelta = 0 # column increment for new opening parens	114	# Convert most string literals to byte literals. String literals
87	coloffset = -1 # column offset for the current line (-1: TBD)	115	# in Python 2 are bytes. String literals in Python 3 are unicode.
88	parens = [(0, 0, 0)] # stack of (line, end-column, column-offset)	116	# Most strings in Mercurial are bytes and unicode strings are rare.
89	for i, t in enumerate(tokens):	117	# Rather than rewrite all string literals to use ``b''`` to indicate
90	# Compute the column offset for the current line, such that	118	# byte strings, we apply this token transformer to insert the ``b``
91	# the current line will be aligned to the last opening paren	119	# prefix nearly everywhere.
92	# as before.	120	if t.type == token.STRING and t not in sysstrtokens:
93	if coloffset < 0:	121	s = t.string
94	if t.start[1] == parens[-1][1]:
95	coloffset = parens[-1][2]
96	elif t.start[1] + 1 == parens[-1][1]:
97	# fix misaligned indent of s/util.Abort/error.Abort/
98	coloffset = parens[-1][2] + (parens[-1][1] - t.start[1])
99	else:
100	coloffset = 0
101		122
102	# Reset per-line attributes at EOL.	123	# Preserve docstrings as string literals. This is inconsistent
103	if t.type in (token.NEWLINE, tokenize.NL):	124	# with regular unprefixed strings. However, the
		125	# "from __future__" parsing (which allows a module docstring to
		126	# exist before it) doesn't properly handle the docstring if it
		127	# is b''' prefixed, leading to a SyntaxError. We leave all
		128	# docstrings as unprefixed to avoid this. This means Mercurial
		129	# components touching docstrings need to handle unicode,
		130	# unfortunately.
		131	if s[0:3] in ("'''", '"""'):
104	yield adjusttokenpos(t, coloffset)	132	yield adjusttokenpos(t, coloffset)
105	coldelta = 0
106	coloffset = -1
107	continue	133	continue
108		134
109	# Remember the last paren position.	135	# If the first character isn't a quote, it is likely a string
110	if _isop(i, '(', '[', '{'):	136	# prefixing character (such as 'b', 'u', or 'r'. Ignore.
111	parens.append(t.end + (coloffset + coldelta,))	137	if s[0] not in ("'", '"'):
112	elif _isop(i, ')', ']', '}'):	138	yield adjusttokenpos(t, coloffset)
113	parens.pop()
114
115	# Convert most string literals to byte literals. String literals
116	# in Python 2 are bytes. String literals in Python 3 are unicode.
117	# Most strings in Mercurial are bytes and unicode strings are rare.
118	# Rather than rewrite all string literals to use ``b''`` to indicate
119	# byte strings, we apply this token transformer to insert the ``b``
120	# prefix nearly everywhere.
121	if t.type == token.STRING and t not in sysstrtokens:
122	s = t.string
123
124	# Preserve docstrings as string literals. This is inconsistent
125	# with regular unprefixed strings. However, the
126	# "from __future__" parsing (which allows a module docstring to
127	# exist before it) doesn't properly handle the docstring if it
128	# is b''' prefixed, leading to a SyntaxError. We leave all
129	# docstrings as unprefixed to avoid this. This means Mercurial
130	# components touching docstrings need to handle unicode,
131	# unfortunately.
132	if s[0:3] in ("'''", '"""'):
133	yield adjusttokenpos(t, coloffset)
134	continue
135
136	# If the first character isn't a quote, it is likely a string
137	# prefixing character (such as 'b', 'u', or 'r'. Ignore.
138	if s[0] not in ("'", '"'):
139	yield adjusttokenpos(t, coloffset)
140	continue
141
142	# String literal. Prefix to make a b'' string.
143	yield adjusttokenpos(t._replace(string='b%s' % t.string),
144	coloffset)
145	coldelta += 1
146	continue	139	continue
147		140
148	# This looks like a function call.	141	# String literal. Prefix to make a b'' string.
149	if t.type == token.NAME and _isop(i + 1, '('):	142	yield adjusttokenpos(t._replace(string='b%s' % t.string),
150	fn = t.string	143	coloffset)
		144	coldelta += 1
		145	continue
151		146
152	# *attr() builtins don't accept byte strings to 2nd argument.	147	# This looks like a function call.
153	if (fn in ('getattr', 'setattr', 'hasattr', 'safehasattr') and	148	if t.type == token.NAME and _isop(i + 1, '('):
154	not _isop(i - 1, '.')):	149	fn = t.string
155	arg1idx = _findargnofcall(1)	150
156	if arg1idx is not None:	151	# *attr() builtins don't accept byte strings to 2nd argument.
157	_ensuresysstr(arg1idx)	152	if (fn in ('getattr', 'setattr', 'hasattr', 'safehasattr') and
		153	not _isop(i - 1, '.')):
		154	arg1idx = _findargnofcall(1)
		155	if arg1idx is not None:
		156	_ensuresysstr(arg1idx)
158		157
159	# .encode() and .decode() on str/bytes/unicode don't accept	158	# .encode() and .decode() on str/bytes/unicode don't accept
160	# byte strings on Python 3.	159	# byte strings on Python 3.
161	elif fn in ('encode', 'decode') and _isop(i - 1, '.'):	160	elif fn in ('encode', 'decode') and _isop(i - 1, '.'):
162	for argn in range(2):	161	for argn in range(2):
163	argidx = _findargnofcall(argn)	162	argidx = _findargnofcall(argn)
164	if argidx is not None:	163	if argidx is not None:
165	_ensuresysstr(argidx)	164	_ensuresysstr(argidx)
166		165
167	# It changes iteritems/values to items/values as they are not	166	# It changes iteritems/values to items/values as they are not
168	# present in Python 3 world.	167	# present in Python 3 world.
169	elif opts['dictiter'] and fn in ('iteritems', 'itervalues'):	168	elif opts['dictiter'] and fn in ('iteritems', 'itervalues'):
170	yield adjusttokenpos(t._replace(string=fn[4:]), coloffset)	169	yield adjusttokenpos(t._replace(string=fn[4:]), coloffset)
171	continue	170	continue
172		171
173	# Emit unmodified token.	172	# Emit unmodified token.
174	yield adjusttokenpos(t, coloffset)	173	yield adjusttokenpos(t, coloffset)
175		174
176	def process(fin, fout, opts):	175	def process(fin, fout, opts):
177	tokens = tokenize.tokenize(fin.readline)	176	tokens = tokenize.tokenize(fin.readline)
178	tokens = replacetokens(list(tokens), opts)	177	tokens = replacetokens(list(tokens), opts)
179	fout.write(tokenize.untokenize(tokens))	178	fout.write(tokenize.untokenize(tokens))
180		179
181	def tryunlink(fname):	180	def tryunlink(fname):
182	try:	181	try:
183	os.unlink(fname)	182	os.unlink(fname)
184	except OSError as err:	183	except OSError as err:
185	if err.errno != errno.ENOENT:	184	if err.errno != errno.ENOENT:
186	raise	185	raise
187		186
188	@contextlib.contextmanager	187	@contextlib.contextmanager
189	def editinplace(fname):	188	def editinplace(fname):
190	n = os.path.basename(fname)	189	n = os.path.basename(fname)
191	d = os.path.dirname(fname)	190	d = os.path.dirname(fname)
192	fp = tempfile.NamedTemporaryFile(prefix='.%s-' % n, suffix='~', dir=d,	191	fp = tempfile.NamedTemporaryFile(prefix='.%s-' % n, suffix='~', dir=d,
193	delete=False)	192	delete=False)
194	try:	193	try:
195	yield fp	194	yield fp
196	fp.close()	195	fp.close()
197	if os.name == 'nt':	196	if os.name == 'nt':
198	tryunlink(fname)	197	tryunlink(fname)
199	os.rename(fp.name, fname)	198	os.rename(fp.name, fname)
200	finally:	199	finally:
201	fp.close()	200	fp.close()
202	tryunlink(fp.name)	201	tryunlink(fp.name)
203		202
204	def main():	203	def main():
205	ap = argparse.ArgumentParser()	204	ap = argparse.ArgumentParser()
206	ap.add_argument('-i', '--inplace', action='store_true', default=False,	205	ap.add_argument('-i', '--inplace', action='store_true', default=False,
207	help='edit files in place')	206	help='edit files in place')
208	ap.add_argument('--dictiter', action='store_true', default=False,	207	ap.add_argument('--dictiter', action='store_true', default=False,
209	help='rewrite iteritems() and itervalues()'),	208	help='rewrite iteritems() and itervalues()'),
210	ap.add_argument('files', metavar='FILE', nargs='+', help='source file')	209	ap.add_argument('files', metavar='FILE', nargs='+', help='source file')
211	args = ap.parse_args()	210	args = ap.parse_args()
212	opts = {	211	opts = {
213	'dictiter': args.dictiter,	212	'dictiter': args.dictiter,
214	}	213	}
215	for fname in args.files:	214	for fname in args.files:
216	if args.inplace:	215	if args.inplace:
217	with editinplace(fname) as fout:	216	with editinplace(fname) as fout:
218	with open(fname, 'rb') as fin:	217	with open(fname, 'rb') as fin:
219	process(fin, fout, opts)	218	process(fin, fout, opts)
220	else:	219	else:
221	with open(fname, 'rb') as fin:	220	with open(fname, 'rb') as fin:
222	fout = sys.stdout.buffer	221	fout = sys.stdout.buffer
223	process(fin, fout, opts)	222	process(fin, fout, opts)
224		223
225	if __name__ == '__main__':	224	if __name__ == '__main__':
226	main()	225	main()