##// END OF EJS Templates
byteify-strings: add space in special comments to silence flake8 error...
Raphaël Gomès -
r42928:f9b64ff9 default
parent child Browse files
Show More
@@ -1,311 +1,311 b''
1 1 #!/usr/bin/env python3
2 2 #
3 3 # byteify-strings.py - transform string literals to be Python 3 safe
4 4 #
5 5 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 from __future__ import absolute_import, print_function
11 11
12 12 import argparse
13 13 import contextlib
14 14 import errno
15 15 import os
16 16 import sys
17 17 import tempfile
18 18 import token
19 19 import tokenize
20 20
21 21 def adjusttokenpos(t, ofs):
22 22 """Adjust start/end column of the given token"""
23 23 return t._replace(start=(t.start[0], t.start[1] + ofs),
24 24 end=(t.end[0], t.end[1] + ofs))
25 25
26 26 def replacetokens(tokens, opts):
27 27 """Transform a stream of tokens from raw to Python 3.
28 28
29 29 Returns a generator of possibly rewritten tokens.
30 30
31 31 The input token list may be mutated as part of processing. However,
32 32 its changes do not necessarily match the output token stream.
33 33 """
34 34 sysstrtokens = set()
35 35
36 36 # The following utility functions access the tokens list and i index of
37 37 # the for i, t enumerate(tokens) loop below
38 38 def _isop(j, *o):
39 39 """Assert that tokens[j] is an OP with one of the given values"""
40 40 try:
41 41 return tokens[j].type == token.OP and tokens[j].string in o
42 42 except IndexError:
43 43 return False
44 44
45 45 def _findargnofcall(n):
46 46 """Find arg n of a call expression (start at 0)
47 47
48 48 Returns index of the first token of that argument, or None if
49 49 there is not that many arguments.
50 50
51 51 Assumes that token[i + 1] is '('.
52 52
53 53 """
54 54 nested = 0
55 55 for j in range(i + 2, len(tokens)):
56 56 if _isop(j, ')', ']', '}'):
57 57 # end of call, tuple, subscription or dict / set
58 58 nested -= 1
59 59 if nested < 0:
60 60 return None
61 61 elif n == 0:
62 62 # this is the starting position of arg
63 63 return j
64 64 elif _isop(j, '(', '[', '{'):
65 65 nested += 1
66 66 elif _isop(j, ',') and nested == 0:
67 67 n -= 1
68 68
69 69 return None
70 70
71 71 def _ensuresysstr(j):
72 72 """Make sure the token at j is a system string
73 73
74 74 Remember the given token so the string transformer won't add
75 75 the byte prefix.
76 76
77 77 Ignores tokens that are not strings. Assumes bounds checking has
78 78 already been done.
79 79
80 80 """
81 81 k = j
82 82 currtoken = tokens[k]
83 83 while currtoken.type in (token.STRING, token.NEWLINE, tokenize.NL):
84 84 k += 1
85 85 if (
86 86 currtoken.type == token.STRING
87 87 and currtoken.string.startswith(("'", '"'))
88 88 ):
89 89 sysstrtokens.add(currtoken)
90 90 try:
91 91 currtoken = tokens[k]
92 92 except IndexError:
93 93 break
94 94
95 95 def _isitemaccess(j):
96 96 """Assert the next tokens form an item access on `tokens[j]` and that
97 97 `tokens[j]` is a name.
98 98 """
99 99 try:
100 100 return (
101 101 tokens[j].type == token.NAME
102 102 and _isop(j + 1, '[')
103 103 and tokens[j + 2].type == token.STRING
104 104 and _isop(j + 3, ']')
105 105 )
106 106 except IndexError:
107 107 return False
108 108
109 109 def _ismethodcall(j, *methodnames):
110 110 """Assert the next tokens form a call to `methodname` with a string
111 111 as first argument on `tokens[j]` and that `tokens[j]` is a name.
112 112 """
113 113 try:
114 114 return (
115 115 tokens[j].type == token.NAME
116 116 and _isop(j + 1, '.')
117 117 and tokens[j + 2].type == token.NAME
118 118 and tokens[j + 2].string in methodnames
119 119 and _isop(j + 3, '(')
120 120 and tokens[j + 4].type == token.STRING
121 121 )
122 122 except IndexError:
123 123 return False
124 124
125 125 coldelta = 0 # column increment for new opening parens
126 126 coloffset = -1 # column offset for the current line (-1: TBD)
127 127 parens = [(0, 0, 0, -1)] # stack of (line, end-column, column-offset, type)
128 128 ignorenextline = False # don't transform the next line
129 129 insideignoreblock = False # don't transform until turned off
130 130 for i, t in enumerate(tokens):
131 131 # Compute the column offset for the current line, such that
132 132 # the current line will be aligned to the last opening paren
133 133 # as before.
134 134 if coloffset < 0:
135 135 lastparen = parens[-1]
136 136 if t.start[1] == lastparen[1]:
137 137 coloffset = lastparen[2]
138 138 elif (
139 139 t.start[1] + 1 == lastparen[1]
140 140 and lastparen[3] not in (token.NEWLINE, tokenize.NL)
141 141 ):
142 142 # fix misaligned indent of s/util.Abort/error.Abort/
143 143 coloffset = lastparen[2] + (lastparen[1] - t.start[1])
144 144 else:
145 145 coloffset = 0
146 146
147 147 # Reset per-line attributes at EOL.
148 148 if t.type in (token.NEWLINE, tokenize.NL):
149 149 yield adjusttokenpos(t, coloffset)
150 150 coldelta = 0
151 151 coloffset = -1
152 152 if not insideignoreblock:
153 153 ignorenextline = (
154 154 tokens[i - 1].type == token.COMMENT
155 and tokens[i - 1].string == "#no-py3-transform"
155 and tokens[i - 1].string == "# no-py3-transform"
156 156 )
157 157 continue
158 158
159 159 if t.type == token.COMMENT:
160 if t.string == "#py3-transform: off":
160 if t.string == "# py3-transform: off":
161 161 insideignoreblock = True
162 if t.string == "#py3-transform: on":
162 if t.string == "# py3-transform: on":
163 163 insideignoreblock = False
164 164
165 165 if ignorenextline or insideignoreblock:
166 166 yield adjusttokenpos(t, coloffset)
167 167 continue
168 168
169 169 # Remember the last paren position.
170 170 if _isop(i, '(', '[', '{'):
171 171 parens.append(t.end + (coloffset + coldelta, tokens[i + 1].type))
172 172 elif _isop(i, ')', ']', '}'):
173 173 parens.pop()
174 174
175 175 # Convert most string literals to byte literals. String literals
176 176 # in Python 2 are bytes. String literals in Python 3 are unicode.
177 177 # Most strings in Mercurial are bytes and unicode strings are rare.
178 178 # Rather than rewrite all string literals to use ``b''`` to indicate
179 179 # byte strings, we apply this token transformer to insert the ``b``
180 180 # prefix nearly everywhere.
181 181 if t.type == token.STRING and t not in sysstrtokens:
182 182 s = t.string
183 183
184 184 # Preserve docstrings as string literals. This is inconsistent
185 185 # with regular unprefixed strings. However, the
186 186 # "from __future__" parsing (which allows a module docstring to
187 187 # exist before it) doesn't properly handle the docstring if it
188 188 # is b''' prefixed, leading to a SyntaxError. We leave all
189 189 # docstrings as unprefixed to avoid this. This means Mercurial
190 190 # components touching docstrings need to handle unicode,
191 191 # unfortunately.
192 192 if s[0:3] in ("'''", '"""'):
193 193 # If it's assigned to something, it's not a docstring
194 194 if not _isop(i - 1, '='):
195 195 yield adjusttokenpos(t, coloffset)
196 196 continue
197 197
198 198 # If the first character isn't a quote, it is likely a string
199 199 # prefixing character (such as 'b', 'u', or 'r'. Ignore.
200 200 if s[0] not in ("'", '"'):
201 201 yield adjusttokenpos(t, coloffset)
202 202 continue
203 203
204 204 # String literal. Prefix to make a b'' string.
205 205 yield adjusttokenpos(t._replace(string='b%s' % t.string),
206 206 coloffset)
207 207 coldelta += 1
208 208 continue
209 209
210 210 # This looks like a function call.
211 211 if t.type == token.NAME and _isop(i + 1, '('):
212 212 fn = t.string
213 213
214 214 # *attr() builtins don't accept byte strings to 2nd argument.
215 215 if fn in (
216 216 'getattr', 'setattr', 'hasattr', 'safehasattr', 'wrapfunction',
217 217 'wrapclass', 'addattr'
218 218 ) and (opts['allow-attr-methods'] or not _isop(i - 1, '.')):
219 219 arg1idx = _findargnofcall(1)
220 220 if arg1idx is not None:
221 221 _ensuresysstr(arg1idx)
222 222
223 223 # .encode() and .decode() on str/bytes/unicode don't accept
224 224 # byte strings on Python 3.
225 225 elif fn in ('encode', 'decode') and _isop(i - 1, '.'):
226 226 for argn in range(2):
227 227 argidx = _findargnofcall(argn)
228 228 if argidx is not None:
229 229 _ensuresysstr(argidx)
230 230
231 231 # It changes iteritems/values to items/values as they are not
232 232 # present in Python 3 world.
233 233 elif opts['dictiter'] and fn in ('iteritems', 'itervalues'):
234 234 yield adjusttokenpos(t._replace(string=fn[4:]), coloffset)
235 235 continue
236 236
237 237 if t.type == token.NAME and t.string in opts['treat-as-kwargs']:
238 238 if _isitemaccess(i):
239 239 _ensuresysstr(i + 2)
240 240 if _ismethodcall(i, 'get', 'pop', 'setdefault', 'popitem'):
241 241 _ensuresysstr(i + 4)
242 242
243 243 # Looks like "if __name__ == '__main__'".
244 244 if (t.type == token.NAME and t.string == '__name__'
245 245 and _isop(i + 1, '==')):
246 246 _ensuresysstr(i + 2)
247 247
248 248 # Emit unmodified token.
249 249 yield adjusttokenpos(t, coloffset)
250 250
251 251 def process(fin, fout, opts):
252 252 tokens = tokenize.tokenize(fin.readline)
253 253 tokens = replacetokens(list(tokens), opts)
254 254 fout.write(tokenize.untokenize(tokens))
255 255
256 256 def tryunlink(fname):
257 257 try:
258 258 os.unlink(fname)
259 259 except OSError as err:
260 260 if err.errno != errno.ENOENT:
261 261 raise
262 262
263 263 @contextlib.contextmanager
264 264 def editinplace(fname):
265 265 n = os.path.basename(fname)
266 266 d = os.path.dirname(fname)
267 267 fp = tempfile.NamedTemporaryFile(prefix='.%s-' % n, suffix='~', dir=d,
268 268 delete=False)
269 269 try:
270 270 yield fp
271 271 fp.close()
272 272 if os.name == 'nt':
273 273 tryunlink(fname)
274 274 os.rename(fp.name, fname)
275 275 finally:
276 276 fp.close()
277 277 tryunlink(fp.name)
278 278
279 279 def main():
280 280 ap = argparse.ArgumentParser()
281 281 ap.add_argument('-i', '--inplace', action='store_true', default=False,
282 282 help='edit files in place')
283 283 ap.add_argument('--dictiter', action='store_true', default=False,
284 284 help='rewrite iteritems() and itervalues()'),
285 285 ap.add_argument('--allow-attr-methods', action='store_true',
286 286 default=False,
287 287 help='also handle attr*() when they are methods'),
288 288 ap.add_argument('--treat-as-kwargs', nargs="+", default=[],
289 289 help="ignore kwargs-like objects"),
290 290 ap.add_argument('files', metavar='FILE', nargs='+', help='source file')
291 291 args = ap.parse_args()
292 292 opts = {
293 293 'dictiter': args.dictiter,
294 294 'treat-as-kwargs': set(args.treat_as_kwargs),
295 295 'allow-attr-methods': args.allow_attr_methods,
296 296 }
297 297 for fname in args.files:
298 298 if args.inplace:
299 299 with editinplace(fname) as fout:
300 300 with open(fname, 'rb') as fin:
301 301 process(fin, fout, opts)
302 302 else:
303 303 with open(fname, 'rb') as fin:
304 304 fout = sys.stdout.buffer
305 305 process(fin, fout, opts)
306 306
307 307 if __name__ == '__main__':
308 308 if sys.version_info.major < 3:
309 309 print('This script must be run under Python 3.')
310 310 sys.exit(3)
311 311 main()
@@ -1,261 +1,261 b''
1 1 #require py3
2 2
3 3 $ byteify_strings () {
4 4 > $PYTHON "$TESTDIR/../contrib/byteify-strings.py" "$@"
5 5 > }
6 6
7 7 Test in-place
8 8
9 9 $ cat > testfile.py <<EOF
10 10 > obj['test'] = b"1234"
11 11 > mydict.iteritems()
12 12 > EOF
13 13 $ byteify_strings testfile.py -i
14 14 $ cat testfile.py
15 15 obj[b'test'] = b"1234"
16 16 mydict.iteritems()
17 17
18 18 Test with dictiter
19 19
20 20 $ cat > testfile.py <<EOF
21 21 > obj['test'] = b"1234"
22 22 > mydict.iteritems()
23 23 > EOF
24 24 $ byteify_strings testfile.py --dictiter
25 25 obj[b'test'] = b"1234"
26 26 mydict.items()
27 27
28 28 Test kwargs-like objects
29 29
30 30 $ cat > testfile.py <<EOF
31 31 > kwargs['test'] = "123"
32 32 > kwargs[test['testing']]
33 33 > kwargs[test[[['testing']]]]
34 34 > kwargs[kwargs['testing']]
35 35 > kwargs.get('test')
36 36 > kwargs.pop('test')
37 37 > kwargs.get('test', 'testing')
38 38 > kwargs.pop('test', 'testing')
39 39 > kwargs.setdefault('test', 'testing')
40 40 >
41 41 > opts['test'] = "123"
42 42 > opts[test['testing']]
43 43 > opts[test[[['testing']]]]
44 44 > opts[opts['testing']]
45 45 > opts.get('test')
46 46 > opts.pop('test')
47 47 > opts.get('test', 'testing')
48 48 > opts.pop('test', 'testing')
49 49 > opts.setdefault('test', 'testing')
50 50 >
51 51 > commitopts['test'] = "123"
52 52 > commitopts[test['testing']]
53 53 > commitopts[test[[['testing']]]]
54 54 > commitopts[commitopts['testing']]
55 55 > commitopts.get('test')
56 56 > commitopts.pop('test')
57 57 > commitopts.get('test', 'testing')
58 58 > commitopts.pop('test', 'testing')
59 59 > commitopts.setdefault('test', 'testing')
60 60 > EOF
61 61 $ byteify_strings testfile.py --treat-as-kwargs kwargs opts commitopts
62 62 kwargs['test'] = b"123"
63 63 kwargs[test[b'testing']]
64 64 kwargs[test[[[b'testing']]]]
65 65 kwargs[kwargs['testing']]
66 66 kwargs.get('test')
67 67 kwargs.pop('test')
68 68 kwargs.get('test', b'testing')
69 69 kwargs.pop('test', b'testing')
70 70 kwargs.setdefault('test', b'testing')
71 71
72 72 opts['test'] = b"123"
73 73 opts[test[b'testing']]
74 74 opts[test[[[b'testing']]]]
75 75 opts[opts['testing']]
76 76 opts.get('test')
77 77 opts.pop('test')
78 78 opts.get('test', b'testing')
79 79 opts.pop('test', b'testing')
80 80 opts.setdefault('test', b'testing')
81 81
82 82 commitopts['test'] = b"123"
83 83 commitopts[test[b'testing']]
84 84 commitopts[test[[[b'testing']]]]
85 85 commitopts[commitopts['testing']]
86 86 commitopts.get('test')
87 87 commitopts.pop('test')
88 88 commitopts.get('test', b'testing')
89 89 commitopts.pop('test', b'testing')
90 90 commitopts.setdefault('test', b'testing')
91 91
92 92 Test attr*() as methods
93 93
94 94 $ cat > testfile.py <<EOF
95 95 > setattr(o, 'a', 1)
96 96 > util.setattr(o, 'ae', 1)
97 97 > util.getattr(o, 'alksjdf', 'default')
98 98 > util.addattr(o, 'asdf')
99 99 > util.hasattr(o, 'lksjdf', 'default')
100 100 > util.safehasattr(o, 'lksjdf', 'default')
101 101 > @eh.wrapfunction(func, 'lksjdf')
102 102 > def f():
103 103 > pass
104 104 > @eh.wrapclass(klass, 'lksjdf')
105 105 > def f():
106 106 > pass
107 107 > EOF
108 108 $ byteify_strings testfile.py --allow-attr-methods
109 109 setattr(o, 'a', 1)
110 110 util.setattr(o, 'ae', 1)
111 111 util.getattr(o, 'alksjdf', b'default')
112 112 util.addattr(o, 'asdf')
113 113 util.hasattr(o, 'lksjdf', b'default')
114 114 util.safehasattr(o, 'lksjdf', b'default')
115 115 @eh.wrapfunction(func, 'lksjdf')
116 116 def f():
117 117 pass
118 118 @eh.wrapclass(klass, 'lksjdf')
119 119 def f():
120 120 pass
121 121
122 122 Test without attr*() as methods
123 123
124 124 $ cat > testfile.py <<EOF
125 125 > setattr(o, 'a', 1)
126 126 > util.setattr(o, 'ae', 1)
127 127 > util.getattr(o, 'alksjdf', 'default')
128 128 > util.addattr(o, 'asdf')
129 129 > util.hasattr(o, 'lksjdf', 'default')
130 130 > util.safehasattr(o, 'lksjdf', 'default')
131 131 > @eh.wrapfunction(func, 'lksjdf')
132 132 > def f():
133 133 > pass
134 134 > @eh.wrapclass(klass, 'lksjdf')
135 135 > def f():
136 136 > pass
137 137 > EOF
138 138 $ byteify_strings testfile.py
139 139 setattr(o, 'a', 1)
140 140 util.setattr(o, b'ae', 1)
141 141 util.getattr(o, b'alksjdf', b'default')
142 142 util.addattr(o, b'asdf')
143 143 util.hasattr(o, b'lksjdf', b'default')
144 144 util.safehasattr(o, b'lksjdf', b'default')
145 145 @eh.wrapfunction(func, b'lksjdf')
146 146 def f():
147 147 pass
148 148 @eh.wrapclass(klass, b'lksjdf')
149 149 def f():
150 150 pass
151 151
152 152 Test ignore comments
153 153
154 154 $ cat > testfile.py <<EOF
155 > #py3-transform: off
155 > # py3-transform: off
156 156 > "none"
157 157 > "of"
158 158 > 'these'
159 159 > s = """should"""
160 160 > d = '''be'''
161 > #py3-transform: on
161 > # py3-transform: on
162 162 > "this should"
163 163 > 'and this also'
164 164 >
165 > #no-py3-transform
165 > # no-py3-transform
166 166 > l = "this should be ignored"
167 167 > l2 = "this shouldn't"
168 168 >
169 169 > EOF
170 170 $ byteify_strings testfile.py
171 #py3-transform: off
171 # py3-transform: off
172 172 "none"
173 173 "of"
174 174 'these'
175 175 s = """should"""
176 176 d = '''be'''
177 #py3-transform: on
177 # py3-transform: on
178 178 b"this should"
179 179 b'and this also'
180 180
181 #no-py3-transform
181 # no-py3-transform
182 182 l = "this should be ignored"
183 183 l2 = b"this shouldn't"
184 184
185 185 Test triple-quoted strings
186 186
187 187 $ cat > testfile.py <<EOF
188 188 > """This is ignored
189 189 > """
190 190 >
191 191 > line = """
192 192 > This should not be
193 193 > """
194 194 > line = '''
195 195 > Neither should this
196 196 > '''
197 197 > EOF
198 198 $ byteify_strings testfile.py
199 199 """This is ignored
200 200 """
201 201
202 202 line = b"""
203 203 This should not be
204 204 """
205 205 line = b'''
206 206 Neither should this
207 207 '''
208 208
209 209 Test prefixed strings
210 210
211 211 $ cat > testfile.py <<EOF
212 212 > obj['test'] = b"1234"
213 213 > obj[r'test'] = u"1234"
214 214 > EOF
215 215 $ byteify_strings testfile.py
216 216 obj[b'test'] = b"1234"
217 217 obj[r'test'] = u"1234"
218 218
219 219 Test multi-line alignment
220 220
221 221 $ cat > testfile.py <<'EOF'
222 222 > def foo():
223 223 > error.Abort(_("foo"
224 224 > "bar"
225 225 > "%s")
226 226 > % parameter)
227 227 > {
228 228 > 'test': dict,
229 229 > 'test2': dict,
230 230 > }
231 231 > [
232 232 > "thing",
233 233 > "thing2"
234 234 > ]
235 235 > (
236 236 > "tuple",
237 237 > "tuple2",
238 238 > )
239 239 > {"thing",
240 240 > }
241 241 > EOF
242 242 $ byteify_strings testfile.py
243 243 def foo():
244 244 error.Abort(_(b"foo"
245 245 b"bar"
246 246 b"%s")
247 247 % parameter)
248 248 {
249 249 b'test': dict,
250 250 b'test2': dict,
251 251 }
252 252 [
253 253 b"thing",
254 254 b"thing2"
255 255 ]
256 256 (
257 257 b"tuple",
258 258 b"tuple2",
259 259 )
260 260 {b"thing",
261 261 }
General Comments 0
You need to be logged in to leave comments. Login now