##// END OF EJS Templates
contrib: require Python 3.6 for byteify-strings.py...
Gregory Szorc -
r43696:bb509f39 stable
parent child Browse files
Show More
@@ -1,345 +1,345
1 1 #!/usr/bin/env python3
2 2 #
3 3 # byteify-strings.py - transform string literals to be Python 3 safe
4 4 #
5 5 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 from __future__ import absolute_import, print_function
11 11
12 12 import argparse
13 13 import contextlib
14 14 import errno
15 15 import os
16 16 import sys
17 17 import tempfile
18 18 import token
19 19 import tokenize
20 20
21 21
22 22 def adjusttokenpos(t, ofs):
23 23 """Adjust start/end column of the given token"""
24 24 return t._replace(
25 25 start=(t.start[0], t.start[1] + ofs), end=(t.end[0], t.end[1] + ofs)
26 26 )
27 27
28 28
29 29 def replacetokens(tokens, opts):
30 30 """Transform a stream of tokens from raw to Python 3.
31 31
32 32 Returns a generator of possibly rewritten tokens.
33 33
34 34 The input token list may be mutated as part of processing. However,
35 35 its changes do not necessarily match the output token stream.
36 36 """
37 37 sysstrtokens = set()
38 38
39 39 # The following utility functions access the tokens list and i index of
40 40 # the for i, t enumerate(tokens) loop below
41 41 def _isop(j, *o):
42 42 """Assert that tokens[j] is an OP with one of the given values"""
43 43 try:
44 44 return tokens[j].type == token.OP and tokens[j].string in o
45 45 except IndexError:
46 46 return False
47 47
48 48 def _findargnofcall(n):
49 49 """Find arg n of a call expression (start at 0)
50 50
51 51 Returns index of the first token of that argument, or None if
52 52 there is not that many arguments.
53 53
54 54 Assumes that token[i + 1] is '('.
55 55
56 56 """
57 57 nested = 0
58 58 for j in range(i + 2, len(tokens)):
59 59 if _isop(j, ')', ']', '}'):
60 60 # end of call, tuple, subscription or dict / set
61 61 nested -= 1
62 62 if nested < 0:
63 63 return None
64 64 elif n == 0:
65 65 # this is the starting position of arg
66 66 return j
67 67 elif _isop(j, '(', '[', '{'):
68 68 nested += 1
69 69 elif _isop(j, ',') and nested == 0:
70 70 n -= 1
71 71
72 72 return None
73 73
74 74 def _ensuresysstr(j):
75 75 """Make sure the token at j is a system string
76 76
77 77 Remember the given token so the string transformer won't add
78 78 the byte prefix.
79 79
80 80 Ignores tokens that are not strings. Assumes bounds checking has
81 81 already been done.
82 82
83 83 """
84 84 k = j
85 85 currtoken = tokens[k]
86 86 while currtoken.type in (token.STRING, token.NEWLINE, tokenize.NL):
87 87 k += 1
88 88 if currtoken.type == token.STRING and currtoken.string.startswith(
89 89 ("'", '"')
90 90 ):
91 91 sysstrtokens.add(currtoken)
92 92 try:
93 93 currtoken = tokens[k]
94 94 except IndexError:
95 95 break
96 96
97 97 def _isitemaccess(j):
98 98 """Assert the next tokens form an item access on `tokens[j]` and that
99 99 `tokens[j]` is a name.
100 100 """
101 101 try:
102 102 return (
103 103 tokens[j].type == token.NAME
104 104 and _isop(j + 1, '[')
105 105 and tokens[j + 2].type == token.STRING
106 106 and _isop(j + 3, ']')
107 107 )
108 108 except IndexError:
109 109 return False
110 110
111 111 def _ismethodcall(j, *methodnames):
112 112 """Assert the next tokens form a call to `methodname` with a string
113 113 as first argument on `tokens[j]` and that `tokens[j]` is a name.
114 114 """
115 115 try:
116 116 return (
117 117 tokens[j].type == token.NAME
118 118 and _isop(j + 1, '.')
119 119 and tokens[j + 2].type == token.NAME
120 120 and tokens[j + 2].string in methodnames
121 121 and _isop(j + 3, '(')
122 122 and tokens[j + 4].type == token.STRING
123 123 )
124 124 except IndexError:
125 125 return False
126 126
127 127 coldelta = 0 # column increment for new opening parens
128 128 coloffset = -1 # column offset for the current line (-1: TBD)
129 129 parens = [(0, 0, 0, -1)] # stack of (line, end-column, column-offset, type)
130 130 ignorenextline = False # don't transform the next line
131 131 insideignoreblock = False # don't transform until turned off
132 132 for i, t in enumerate(tokens):
133 133 # Compute the column offset for the current line, such that
134 134 # the current line will be aligned to the last opening paren
135 135 # as before.
136 136 if coloffset < 0:
137 137 lastparen = parens[-1]
138 138 if t.start[1] == lastparen[1]:
139 139 coloffset = lastparen[2]
140 140 elif t.start[1] + 1 == lastparen[1] and lastparen[3] not in (
141 141 token.NEWLINE,
142 142 tokenize.NL,
143 143 ):
144 144 # fix misaligned indent of s/util.Abort/error.Abort/
145 145 coloffset = lastparen[2] + (lastparen[1] - t.start[1])
146 146 else:
147 147 coloffset = 0
148 148
149 149 # Reset per-line attributes at EOL.
150 150 if t.type in (token.NEWLINE, tokenize.NL):
151 151 yield adjusttokenpos(t, coloffset)
152 152 coldelta = 0
153 153 coloffset = -1
154 154 if not insideignoreblock:
155 155 ignorenextline = (
156 156 tokens[i - 1].type == token.COMMENT
157 157 and tokens[i - 1].string == "# no-py3-transform"
158 158 )
159 159 continue
160 160
161 161 if t.type == token.COMMENT:
162 162 if t.string == "# py3-transform: off":
163 163 insideignoreblock = True
164 164 if t.string == "# py3-transform: on":
165 165 insideignoreblock = False
166 166
167 167 if ignorenextline or insideignoreblock:
168 168 yield adjusttokenpos(t, coloffset)
169 169 continue
170 170
171 171 # Remember the last paren position.
172 172 if _isop(i, '(', '[', '{'):
173 173 parens.append(t.end + (coloffset + coldelta, tokens[i + 1].type))
174 174 elif _isop(i, ')', ']', '}'):
175 175 parens.pop()
176 176
177 177 # Convert most string literals to byte literals. String literals
178 178 # in Python 2 are bytes. String literals in Python 3 are unicode.
179 179 # Most strings in Mercurial are bytes and unicode strings are rare.
180 180 # Rather than rewrite all string literals to use ``b''`` to indicate
181 181 # byte strings, we apply this token transformer to insert the ``b``
182 182 # prefix nearly everywhere.
183 183 if t.type == token.STRING and t not in sysstrtokens:
184 184 s = t.string
185 185
186 186 # Preserve docstrings as string literals. This is inconsistent
187 187 # with regular unprefixed strings. However, the
188 188 # "from __future__" parsing (which allows a module docstring to
189 189 # exist before it) doesn't properly handle the docstring if it
190 190 # is b''' prefixed, leading to a SyntaxError. We leave all
191 191 # docstrings as unprefixed to avoid this. This means Mercurial
192 192 # components touching docstrings need to handle unicode,
193 193 # unfortunately.
194 194 if s[0:3] in ("'''", '"""'):
195 195 # If it's assigned to something, it's not a docstring
196 196 if not _isop(i - 1, '='):
197 197 yield adjusttokenpos(t, coloffset)
198 198 continue
199 199
200 200 # If the first character isn't a quote, it is likely a string
201 201 # prefixing character (such as 'b', 'u', or 'r'. Ignore.
202 202 if s[0] not in ("'", '"'):
203 203 yield adjusttokenpos(t, coloffset)
204 204 continue
205 205
206 206 # String literal. Prefix to make a b'' string.
207 207 yield adjusttokenpos(t._replace(string='b%s' % t.string), coloffset)
208 208 coldelta += 1
209 209 continue
210 210
211 211 # This looks like a function call.
212 212 if t.type == token.NAME and _isop(i + 1, '('):
213 213 fn = t.string
214 214
215 215 # *attr() builtins don't accept byte strings to 2nd argument.
216 216 if fn in (
217 217 'getattr',
218 218 'setattr',
219 219 'hasattr',
220 220 'safehasattr',
221 221 'wrapfunction',
222 222 'wrapclass',
223 223 'addattr',
224 224 ) and (opts['allow-attr-methods'] or not _isop(i - 1, '.')):
225 225 arg1idx = _findargnofcall(1)
226 226 if arg1idx is not None:
227 227 _ensuresysstr(arg1idx)
228 228
229 229 # .encode() and .decode() on str/bytes/unicode don't accept
230 230 # byte strings on Python 3.
231 231 elif fn in ('encode', 'decode') and _isop(i - 1, '.'):
232 232 for argn in range(2):
233 233 argidx = _findargnofcall(argn)
234 234 if argidx is not None:
235 235 _ensuresysstr(argidx)
236 236
237 237 # It changes iteritems/values to items/values as they are not
238 238 # present in Python 3 world.
239 239 elif opts['dictiter'] and fn in ('iteritems', 'itervalues'):
240 240 yield adjusttokenpos(t._replace(string=fn[4:]), coloffset)
241 241 continue
242 242
243 243 if t.type == token.NAME and t.string in opts['treat-as-kwargs']:
244 244 if _isitemaccess(i):
245 245 _ensuresysstr(i + 2)
246 246 if _ismethodcall(i, 'get', 'pop', 'setdefault', 'popitem'):
247 247 _ensuresysstr(i + 4)
248 248
249 249 # Looks like "if __name__ == '__main__'".
250 250 if (
251 251 t.type == token.NAME
252 252 and t.string == '__name__'
253 253 and _isop(i + 1, '==')
254 254 ):
255 255 _ensuresysstr(i + 2)
256 256
257 257 # Emit unmodified token.
258 258 yield adjusttokenpos(t, coloffset)
259 259
260 260
261 261 def process(fin, fout, opts):
262 262 tokens = tokenize.tokenize(fin.readline)
263 263 tokens = replacetokens(list(tokens), opts)
264 264 fout.write(tokenize.untokenize(tokens))
265 265
266 266
267 267 def tryunlink(fname):
268 268 try:
269 269 os.unlink(fname)
270 270 except OSError as err:
271 271 if err.errno != errno.ENOENT:
272 272 raise
273 273
274 274
275 275 @contextlib.contextmanager
276 276 def editinplace(fname):
277 277 n = os.path.basename(fname)
278 278 d = os.path.dirname(fname)
279 279 fp = tempfile.NamedTemporaryFile(
280 280 prefix='.%s-' % n, suffix='~', dir=d, delete=False
281 281 )
282 282 try:
283 283 yield fp
284 284 fp.close()
285 285 if os.name == 'nt':
286 286 tryunlink(fname)
287 287 os.rename(fp.name, fname)
288 288 finally:
289 289 fp.close()
290 290 tryunlink(fp.name)
291 291
292 292
293 293 def main():
294 294 ap = argparse.ArgumentParser()
295 295 ap.add_argument(
296 296 '--version', action='version', version='Byteify strings 1.0'
297 297 )
298 298 ap.add_argument(
299 299 '-i',
300 300 '--inplace',
301 301 action='store_true',
302 302 default=False,
303 303 help='edit files in place',
304 304 )
305 305 ap.add_argument(
306 306 '--dictiter',
307 307 action='store_true',
308 308 default=False,
309 309 help='rewrite iteritems() and itervalues()',
310 310 ),
311 311 ap.add_argument(
312 312 '--allow-attr-methods',
313 313 action='store_true',
314 314 default=False,
315 315 help='also handle attr*() when they are methods',
316 316 ),
317 317 ap.add_argument(
318 318 '--treat-as-kwargs',
319 319 nargs="+",
320 320 default=[],
321 321 help="ignore kwargs-like objects",
322 322 ),
323 323 ap.add_argument('files', metavar='FILE', nargs='+', help='source file')
324 324 args = ap.parse_args()
325 325 opts = {
326 326 'dictiter': args.dictiter,
327 327 'treat-as-kwargs': set(args.treat_as_kwargs),
328 328 'allow-attr-methods': args.allow_attr_methods,
329 329 }
330 330 for fname in args.files:
331 331 if args.inplace:
332 332 with editinplace(fname) as fout:
333 333 with open(fname, 'rb') as fin:
334 334 process(fin, fout, opts)
335 335 else:
336 336 with open(fname, 'rb') as fin:
337 337 fout = sys.stdout.buffer
338 338 process(fin, fout, opts)
339 339
340 340
341 341 if __name__ == '__main__':
342 if sys.version_info.major < 3:
343 print('This script must be run under Python 3.')
342 if sys.version_info[0:2] < (3, 6):
343 print('This script must be run under Python 3.6+')
344 344 sys.exit(3)
345 345 main()
@@ -1,266 +1,266
1 #require py3
1 #require py36
2 2
3 3 $ byteify_strings () {
4 4 > $PYTHON "$TESTDIR/../contrib/byteify-strings.py" "$@"
5 5 > }
6 6
7 7 Test version
8 8
9 9 $ byteify_strings --version
10 10 Byteify strings * (glob)
11 11
12 12 Test in-place
13 13
14 14 $ cat > testfile.py <<EOF
15 15 > obj['test'] = b"1234"
16 16 > mydict.iteritems()
17 17 > EOF
18 18 $ byteify_strings testfile.py -i
19 19 $ cat testfile.py
20 20 obj[b'test'] = b"1234"
21 21 mydict.iteritems()
22 22
23 23 Test with dictiter
24 24
25 25 $ cat > testfile.py <<EOF
26 26 > obj['test'] = b"1234"
27 27 > mydict.iteritems()
28 28 > EOF
29 29 $ byteify_strings testfile.py --dictiter
30 30 obj[b'test'] = b"1234"
31 31 mydict.items()
32 32
33 33 Test kwargs-like objects
34 34
35 35 $ cat > testfile.py <<EOF
36 36 > kwargs['test'] = "123"
37 37 > kwargs[test['testing']]
38 38 > kwargs[test[[['testing']]]]
39 39 > kwargs[kwargs['testing']]
40 40 > kwargs.get('test')
41 41 > kwargs.pop('test')
42 42 > kwargs.get('test', 'testing')
43 43 > kwargs.pop('test', 'testing')
44 44 > kwargs.setdefault('test', 'testing')
45 45 >
46 46 > opts['test'] = "123"
47 47 > opts[test['testing']]
48 48 > opts[test[[['testing']]]]
49 49 > opts[opts['testing']]
50 50 > opts.get('test')
51 51 > opts.pop('test')
52 52 > opts.get('test', 'testing')
53 53 > opts.pop('test', 'testing')
54 54 > opts.setdefault('test', 'testing')
55 55 >
56 56 > commitopts['test'] = "123"
57 57 > commitopts[test['testing']]
58 58 > commitopts[test[[['testing']]]]
59 59 > commitopts[commitopts['testing']]
60 60 > commitopts.get('test')
61 61 > commitopts.pop('test')
62 62 > commitopts.get('test', 'testing')
63 63 > commitopts.pop('test', 'testing')
64 64 > commitopts.setdefault('test', 'testing')
65 65 > EOF
66 66 $ byteify_strings testfile.py --treat-as-kwargs kwargs opts commitopts
67 67 kwargs['test'] = b"123"
68 68 kwargs[test[b'testing']]
69 69 kwargs[test[[[b'testing']]]]
70 70 kwargs[kwargs['testing']]
71 71 kwargs.get('test')
72 72 kwargs.pop('test')
73 73 kwargs.get('test', b'testing')
74 74 kwargs.pop('test', b'testing')
75 75 kwargs.setdefault('test', b'testing')
76 76
77 77 opts['test'] = b"123"
78 78 opts[test[b'testing']]
79 79 opts[test[[[b'testing']]]]
80 80 opts[opts['testing']]
81 81 opts.get('test')
82 82 opts.pop('test')
83 83 opts.get('test', b'testing')
84 84 opts.pop('test', b'testing')
85 85 opts.setdefault('test', b'testing')
86 86
87 87 commitopts['test'] = b"123"
88 88 commitopts[test[b'testing']]
89 89 commitopts[test[[[b'testing']]]]
90 90 commitopts[commitopts['testing']]
91 91 commitopts.get('test')
92 92 commitopts.pop('test')
93 93 commitopts.get('test', b'testing')
94 94 commitopts.pop('test', b'testing')
95 95 commitopts.setdefault('test', b'testing')
96 96
97 97 Test attr*() as methods
98 98
99 99 $ cat > testfile.py <<EOF
100 100 > setattr(o, 'a', 1)
101 101 > util.setattr(o, 'ae', 1)
102 102 > util.getattr(o, 'alksjdf', 'default')
103 103 > util.addattr(o, 'asdf')
104 104 > util.hasattr(o, 'lksjdf', 'default')
105 105 > util.safehasattr(o, 'lksjdf', 'default')
106 106 > @eh.wrapfunction(func, 'lksjdf')
107 107 > def f():
108 108 > pass
109 109 > @eh.wrapclass(klass, 'lksjdf')
110 110 > def f():
111 111 > pass
112 112 > EOF
113 113 $ byteify_strings testfile.py --allow-attr-methods
114 114 setattr(o, 'a', 1)
115 115 util.setattr(o, 'ae', 1)
116 116 util.getattr(o, 'alksjdf', b'default')
117 117 util.addattr(o, 'asdf')
118 118 util.hasattr(o, 'lksjdf', b'default')
119 119 util.safehasattr(o, 'lksjdf', b'default')
120 120 @eh.wrapfunction(func, 'lksjdf')
121 121 def f():
122 122 pass
123 123 @eh.wrapclass(klass, 'lksjdf')
124 124 def f():
125 125 pass
126 126
127 127 Test without attr*() as methods
128 128
129 129 $ cat > testfile.py <<EOF
130 130 > setattr(o, 'a', 1)
131 131 > util.setattr(o, 'ae', 1)
132 132 > util.getattr(o, 'alksjdf', 'default')
133 133 > util.addattr(o, 'asdf')
134 134 > util.hasattr(o, 'lksjdf', 'default')
135 135 > util.safehasattr(o, 'lksjdf', 'default')
136 136 > @eh.wrapfunction(func, 'lksjdf')
137 137 > def f():
138 138 > pass
139 139 > @eh.wrapclass(klass, 'lksjdf')
140 140 > def f():
141 141 > pass
142 142 > EOF
143 143 $ byteify_strings testfile.py
144 144 setattr(o, 'a', 1)
145 145 util.setattr(o, b'ae', 1)
146 146 util.getattr(o, b'alksjdf', b'default')
147 147 util.addattr(o, b'asdf')
148 148 util.hasattr(o, b'lksjdf', b'default')
149 149 util.safehasattr(o, b'lksjdf', b'default')
150 150 @eh.wrapfunction(func, b'lksjdf')
151 151 def f():
152 152 pass
153 153 @eh.wrapclass(klass, b'lksjdf')
154 154 def f():
155 155 pass
156 156
157 157 Test ignore comments
158 158
159 159 $ cat > testfile.py <<EOF
160 160 > # py3-transform: off
161 161 > "none"
162 162 > "of"
163 163 > 'these'
164 164 > s = """should"""
165 165 > d = '''be'''
166 166 > # py3-transform: on
167 167 > "this should"
168 168 > 'and this also'
169 169 >
170 170 > # no-py3-transform
171 171 > l = "this should be ignored"
172 172 > l2 = "this shouldn't"
173 173 >
174 174 > EOF
175 175 $ byteify_strings testfile.py
176 176 # py3-transform: off
177 177 "none"
178 178 "of"
179 179 'these'
180 180 s = """should"""
181 181 d = '''be'''
182 182 # py3-transform: on
183 183 b"this should"
184 184 b'and this also'
185 185
186 186 # no-py3-transform
187 187 l = "this should be ignored"
188 188 l2 = b"this shouldn't"
189 189
190 190 Test triple-quoted strings
191 191
192 192 $ cat > testfile.py <<EOF
193 193 > """This is ignored
194 194 > """
195 195 >
196 196 > line = """
197 197 > This should not be
198 198 > """
199 199 > line = '''
200 200 > Neither should this
201 201 > '''
202 202 > EOF
203 203 $ byteify_strings testfile.py
204 204 """This is ignored
205 205 """
206 206
207 207 line = b"""
208 208 This should not be
209 209 """
210 210 line = b'''
211 211 Neither should this
212 212 '''
213 213
214 214 Test prefixed strings
215 215
216 216 $ cat > testfile.py <<EOF
217 217 > obj['test'] = b"1234"
218 218 > obj[r'test'] = u"1234"
219 219 > EOF
220 220 $ byteify_strings testfile.py
221 221 obj[b'test'] = b"1234"
222 222 obj[r'test'] = u"1234"
223 223
224 224 Test multi-line alignment
225 225
226 226 $ cat > testfile.py <<'EOF'
227 227 > def foo():
228 228 > error.Abort(_("foo"
229 229 > "bar"
230 230 > "%s")
231 231 > % parameter)
232 232 > {
233 233 > 'test': dict,
234 234 > 'test2': dict,
235 235 > }
236 236 > [
237 237 > "thing",
238 238 > "thing2"
239 239 > ]
240 240 > (
241 241 > "tuple",
242 242 > "tuple2",
243 243 > )
244 244 > {"thing",
245 245 > }
246 246 > EOF
247 247 $ byteify_strings testfile.py
248 248 def foo():
249 249 error.Abort(_(b"foo"
250 250 b"bar"
251 251 b"%s")
252 252 % parameter)
253 253 {
254 254 b'test': dict,
255 255 b'test2': dict,
256 256 }
257 257 [
258 258 b"thing",
259 259 b"thing2"
260 260 ]
261 261 (
262 262 b"tuple",
263 263 b"tuple2",
264 264 )
265 265 {b"thing",
266 266 }
General Comments 0
You need to be logged in to leave comments. Login now