Show More
@@ -1,345 +1,345 | |||||
1 | #!/usr/bin/env python3 |
|
1 | #!/usr/bin/env python3 | |
2 | # |
|
2 | # | |
3 | # byteify-strings.py - transform string literals to be Python 3 safe |
|
3 | # byteify-strings.py - transform string literals to be Python 3 safe | |
4 | # |
|
4 | # | |
5 | # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com> |
|
5 | # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com> | |
6 | # |
|
6 | # | |
7 | # This software may be used and distributed according to the terms of the |
|
7 | # This software may be used and distributed according to the terms of the | |
8 | # GNU General Public License version 2 or any later version. |
|
8 | # GNU General Public License version 2 or any later version. | |
9 |
|
9 | |||
10 | from __future__ import absolute_import, print_function |
|
10 | from __future__ import absolute_import, print_function | |
11 |
|
11 | |||
12 | import argparse |
|
12 | import argparse | |
13 | import contextlib |
|
13 | import contextlib | |
14 | import errno |
|
14 | import errno | |
15 | import os |
|
15 | import os | |
16 | import sys |
|
16 | import sys | |
17 | import tempfile |
|
17 | import tempfile | |
18 | import token |
|
18 | import token | |
19 | import tokenize |
|
19 | import tokenize | |
20 |
|
20 | |||
21 |
|
21 | |||
22 | def adjusttokenpos(t, ofs): |
|
22 | def adjusttokenpos(t, ofs): | |
23 | """Adjust start/end column of the given token""" |
|
23 | """Adjust start/end column of the given token""" | |
24 | return t._replace( |
|
24 | return t._replace( | |
25 | start=(t.start[0], t.start[1] + ofs), end=(t.end[0], t.end[1] + ofs) |
|
25 | start=(t.start[0], t.start[1] + ofs), end=(t.end[0], t.end[1] + ofs) | |
26 | ) |
|
26 | ) | |
27 |
|
27 | |||
28 |
|
28 | |||
29 | def replacetokens(tokens, opts): |
|
29 | def replacetokens(tokens, opts): | |
30 | """Transform a stream of tokens from raw to Python 3. |
|
30 | """Transform a stream of tokens from raw to Python 3. | |
31 |
|
31 | |||
32 | Returns a generator of possibly rewritten tokens. |
|
32 | Returns a generator of possibly rewritten tokens. | |
33 |
|
33 | |||
34 | The input token list may be mutated as part of processing. However, |
|
34 | The input token list may be mutated as part of processing. However, | |
35 | its changes do not necessarily match the output token stream. |
|
35 | its changes do not necessarily match the output token stream. | |
36 | """ |
|
36 | """ | |
37 | sysstrtokens = set() |
|
37 | sysstrtokens = set() | |
38 |
|
38 | |||
39 | # The following utility functions access the tokens list and i index of |
|
39 | # The following utility functions access the tokens list and i index of | |
40 | # the for i, t enumerate(tokens) loop below |
|
40 | # the for i, t enumerate(tokens) loop below | |
41 | def _isop(j, *o): |
|
41 | def _isop(j, *o): | |
42 | """Assert that tokens[j] is an OP with one of the given values""" |
|
42 | """Assert that tokens[j] is an OP with one of the given values""" | |
43 | try: |
|
43 | try: | |
44 | return tokens[j].type == token.OP and tokens[j].string in o |
|
44 | return tokens[j].type == token.OP and tokens[j].string in o | |
45 | except IndexError: |
|
45 | except IndexError: | |
46 | return False |
|
46 | return False | |
47 |
|
47 | |||
48 | def _findargnofcall(n): |
|
48 | def _findargnofcall(n): | |
49 | """Find arg n of a call expression (start at 0) |
|
49 | """Find arg n of a call expression (start at 0) | |
50 |
|
50 | |||
51 | Returns index of the first token of that argument, or None if |
|
51 | Returns index of the first token of that argument, or None if | |
52 | there is not that many arguments. |
|
52 | there is not that many arguments. | |
53 |
|
53 | |||
54 | Assumes that token[i + 1] is '('. |
|
54 | Assumes that token[i + 1] is '('. | |
55 |
|
55 | |||
56 | """ |
|
56 | """ | |
57 | nested = 0 |
|
57 | nested = 0 | |
58 | for j in range(i + 2, len(tokens)): |
|
58 | for j in range(i + 2, len(tokens)): | |
59 | if _isop(j, ')', ']', '}'): |
|
59 | if _isop(j, ')', ']', '}'): | |
60 | # end of call, tuple, subscription or dict / set |
|
60 | # end of call, tuple, subscription or dict / set | |
61 | nested -= 1 |
|
61 | nested -= 1 | |
62 | if nested < 0: |
|
62 | if nested < 0: | |
63 | return None |
|
63 | return None | |
64 | elif n == 0: |
|
64 | elif n == 0: | |
65 | # this is the starting position of arg |
|
65 | # this is the starting position of arg | |
66 | return j |
|
66 | return j | |
67 | elif _isop(j, '(', '[', '{'): |
|
67 | elif _isop(j, '(', '[', '{'): | |
68 | nested += 1 |
|
68 | nested += 1 | |
69 | elif _isop(j, ',') and nested == 0: |
|
69 | elif _isop(j, ',') and nested == 0: | |
70 | n -= 1 |
|
70 | n -= 1 | |
71 |
|
71 | |||
72 | return None |
|
72 | return None | |
73 |
|
73 | |||
74 | def _ensuresysstr(j): |
|
74 | def _ensuresysstr(j): | |
75 | """Make sure the token at j is a system string |
|
75 | """Make sure the token at j is a system string | |
76 |
|
76 | |||
77 | Remember the given token so the string transformer won't add |
|
77 | Remember the given token so the string transformer won't add | |
78 | the byte prefix. |
|
78 | the byte prefix. | |
79 |
|
79 | |||
80 | Ignores tokens that are not strings. Assumes bounds checking has |
|
80 | Ignores tokens that are not strings. Assumes bounds checking has | |
81 | already been done. |
|
81 | already been done. | |
82 |
|
82 | |||
83 | """ |
|
83 | """ | |
84 | k = j |
|
84 | k = j | |
85 | currtoken = tokens[k] |
|
85 | currtoken = tokens[k] | |
86 | while currtoken.type in (token.STRING, token.NEWLINE, tokenize.NL): |
|
86 | while currtoken.type in (token.STRING, token.NEWLINE, tokenize.NL): | |
87 | k += 1 |
|
87 | k += 1 | |
88 | if currtoken.type == token.STRING and currtoken.string.startswith( |
|
88 | if currtoken.type == token.STRING and currtoken.string.startswith( | |
89 | ("'", '"') |
|
89 | ("'", '"') | |
90 | ): |
|
90 | ): | |
91 | sysstrtokens.add(currtoken) |
|
91 | sysstrtokens.add(currtoken) | |
92 | try: |
|
92 | try: | |
93 | currtoken = tokens[k] |
|
93 | currtoken = tokens[k] | |
94 | except IndexError: |
|
94 | except IndexError: | |
95 | break |
|
95 | break | |
96 |
|
96 | |||
97 | def _isitemaccess(j): |
|
97 | def _isitemaccess(j): | |
98 | """Assert the next tokens form an item access on `tokens[j]` and that |
|
98 | """Assert the next tokens form an item access on `tokens[j]` and that | |
99 | `tokens[j]` is a name. |
|
99 | `tokens[j]` is a name. | |
100 | """ |
|
100 | """ | |
101 | try: |
|
101 | try: | |
102 | return ( |
|
102 | return ( | |
103 | tokens[j].type == token.NAME |
|
103 | tokens[j].type == token.NAME | |
104 | and _isop(j + 1, '[') |
|
104 | and _isop(j + 1, '[') | |
105 | and tokens[j + 2].type == token.STRING |
|
105 | and tokens[j + 2].type == token.STRING | |
106 | and _isop(j + 3, ']') |
|
106 | and _isop(j + 3, ']') | |
107 | ) |
|
107 | ) | |
108 | except IndexError: |
|
108 | except IndexError: | |
109 | return False |
|
109 | return False | |
110 |
|
110 | |||
111 | def _ismethodcall(j, *methodnames): |
|
111 | def _ismethodcall(j, *methodnames): | |
112 | """Assert the next tokens form a call to `methodname` with a string |
|
112 | """Assert the next tokens form a call to `methodname` with a string | |
113 | as first argument on `tokens[j]` and that `tokens[j]` is a name. |
|
113 | as first argument on `tokens[j]` and that `tokens[j]` is a name. | |
114 | """ |
|
114 | """ | |
115 | try: |
|
115 | try: | |
116 | return ( |
|
116 | return ( | |
117 | tokens[j].type == token.NAME |
|
117 | tokens[j].type == token.NAME | |
118 | and _isop(j + 1, '.') |
|
118 | and _isop(j + 1, '.') | |
119 | and tokens[j + 2].type == token.NAME |
|
119 | and tokens[j + 2].type == token.NAME | |
120 | and tokens[j + 2].string in methodnames |
|
120 | and tokens[j + 2].string in methodnames | |
121 | and _isop(j + 3, '(') |
|
121 | and _isop(j + 3, '(') | |
122 | and tokens[j + 4].type == token.STRING |
|
122 | and tokens[j + 4].type == token.STRING | |
123 | ) |
|
123 | ) | |
124 | except IndexError: |
|
124 | except IndexError: | |
125 | return False |
|
125 | return False | |
126 |
|
126 | |||
127 | coldelta = 0 # column increment for new opening parens |
|
127 | coldelta = 0 # column increment for new opening parens | |
128 | coloffset = -1 # column offset for the current line (-1: TBD) |
|
128 | coloffset = -1 # column offset for the current line (-1: TBD) | |
129 | parens = [(0, 0, 0, -1)] # stack of (line, end-column, column-offset, type) |
|
129 | parens = [(0, 0, 0, -1)] # stack of (line, end-column, column-offset, type) | |
130 | ignorenextline = False # don't transform the next line |
|
130 | ignorenextline = False # don't transform the next line | |
131 | insideignoreblock = False # don't transform until turned off |
|
131 | insideignoreblock = False # don't transform until turned off | |
132 | for i, t in enumerate(tokens): |
|
132 | for i, t in enumerate(tokens): | |
133 | # Compute the column offset for the current line, such that |
|
133 | # Compute the column offset for the current line, such that | |
134 | # the current line will be aligned to the last opening paren |
|
134 | # the current line will be aligned to the last opening paren | |
135 | # as before. |
|
135 | # as before. | |
136 | if coloffset < 0: |
|
136 | if coloffset < 0: | |
137 | lastparen = parens[-1] |
|
137 | lastparen = parens[-1] | |
138 | if t.start[1] == lastparen[1]: |
|
138 | if t.start[1] == lastparen[1]: | |
139 | coloffset = lastparen[2] |
|
139 | coloffset = lastparen[2] | |
140 | elif t.start[1] + 1 == lastparen[1] and lastparen[3] not in ( |
|
140 | elif t.start[1] + 1 == lastparen[1] and lastparen[3] not in ( | |
141 | token.NEWLINE, |
|
141 | token.NEWLINE, | |
142 | tokenize.NL, |
|
142 | tokenize.NL, | |
143 | ): |
|
143 | ): | |
144 | # fix misaligned indent of s/util.Abort/error.Abort/ |
|
144 | # fix misaligned indent of s/util.Abort/error.Abort/ | |
145 | coloffset = lastparen[2] + (lastparen[1] - t.start[1]) |
|
145 | coloffset = lastparen[2] + (lastparen[1] - t.start[1]) | |
146 | else: |
|
146 | else: | |
147 | coloffset = 0 |
|
147 | coloffset = 0 | |
148 |
|
148 | |||
149 | # Reset per-line attributes at EOL. |
|
149 | # Reset per-line attributes at EOL. | |
150 | if t.type in (token.NEWLINE, tokenize.NL): |
|
150 | if t.type in (token.NEWLINE, tokenize.NL): | |
151 | yield adjusttokenpos(t, coloffset) |
|
151 | yield adjusttokenpos(t, coloffset) | |
152 | coldelta = 0 |
|
152 | coldelta = 0 | |
153 | coloffset = -1 |
|
153 | coloffset = -1 | |
154 | if not insideignoreblock: |
|
154 | if not insideignoreblock: | |
155 | ignorenextline = ( |
|
155 | ignorenextline = ( | |
156 | tokens[i - 1].type == token.COMMENT |
|
156 | tokens[i - 1].type == token.COMMENT | |
157 | and tokens[i - 1].string == "# no-py3-transform" |
|
157 | and tokens[i - 1].string == "# no-py3-transform" | |
158 | ) |
|
158 | ) | |
159 | continue |
|
159 | continue | |
160 |
|
160 | |||
161 | if t.type == token.COMMENT: |
|
161 | if t.type == token.COMMENT: | |
162 | if t.string == "# py3-transform: off": |
|
162 | if t.string == "# py3-transform: off": | |
163 | insideignoreblock = True |
|
163 | insideignoreblock = True | |
164 | if t.string == "# py3-transform: on": |
|
164 | if t.string == "# py3-transform: on": | |
165 | insideignoreblock = False |
|
165 | insideignoreblock = False | |
166 |
|
166 | |||
167 | if ignorenextline or insideignoreblock: |
|
167 | if ignorenextline or insideignoreblock: | |
168 | yield adjusttokenpos(t, coloffset) |
|
168 | yield adjusttokenpos(t, coloffset) | |
169 | continue |
|
169 | continue | |
170 |
|
170 | |||
171 | # Remember the last paren position. |
|
171 | # Remember the last paren position. | |
172 | if _isop(i, '(', '[', '{'): |
|
172 | if _isop(i, '(', '[', '{'): | |
173 | parens.append(t.end + (coloffset + coldelta, tokens[i + 1].type)) |
|
173 | parens.append(t.end + (coloffset + coldelta, tokens[i + 1].type)) | |
174 | elif _isop(i, ')', ']', '}'): |
|
174 | elif _isop(i, ')', ']', '}'): | |
175 | parens.pop() |
|
175 | parens.pop() | |
176 |
|
176 | |||
177 | # Convert most string literals to byte literals. String literals |
|
177 | # Convert most string literals to byte literals. String literals | |
178 | # in Python 2 are bytes. String literals in Python 3 are unicode. |
|
178 | # in Python 2 are bytes. String literals in Python 3 are unicode. | |
179 | # Most strings in Mercurial are bytes and unicode strings are rare. |
|
179 | # Most strings in Mercurial are bytes and unicode strings are rare. | |
180 | # Rather than rewrite all string literals to use ``b''`` to indicate |
|
180 | # Rather than rewrite all string literals to use ``b''`` to indicate | |
181 | # byte strings, we apply this token transformer to insert the ``b`` |
|
181 | # byte strings, we apply this token transformer to insert the ``b`` | |
182 | # prefix nearly everywhere. |
|
182 | # prefix nearly everywhere. | |
183 | if t.type == token.STRING and t not in sysstrtokens: |
|
183 | if t.type == token.STRING and t not in sysstrtokens: | |
184 | s = t.string |
|
184 | s = t.string | |
185 |
|
185 | |||
186 | # Preserve docstrings as string literals. This is inconsistent |
|
186 | # Preserve docstrings as string literals. This is inconsistent | |
187 | # with regular unprefixed strings. However, the |
|
187 | # with regular unprefixed strings. However, the | |
188 | # "from __future__" parsing (which allows a module docstring to |
|
188 | # "from __future__" parsing (which allows a module docstring to | |
189 | # exist before it) doesn't properly handle the docstring if it |
|
189 | # exist before it) doesn't properly handle the docstring if it | |
190 | # is b''' prefixed, leading to a SyntaxError. We leave all |
|
190 | # is b''' prefixed, leading to a SyntaxError. We leave all | |
191 | # docstrings as unprefixed to avoid this. This means Mercurial |
|
191 | # docstrings as unprefixed to avoid this. This means Mercurial | |
192 | # components touching docstrings need to handle unicode, |
|
192 | # components touching docstrings need to handle unicode, | |
193 | # unfortunately. |
|
193 | # unfortunately. | |
194 | if s[0:3] in ("'''", '"""'): |
|
194 | if s[0:3] in ("'''", '"""'): | |
195 | # If it's assigned to something, it's not a docstring |
|
195 | # If it's assigned to something, it's not a docstring | |
196 | if not _isop(i - 1, '='): |
|
196 | if not _isop(i - 1, '='): | |
197 | yield adjusttokenpos(t, coloffset) |
|
197 | yield adjusttokenpos(t, coloffset) | |
198 | continue |
|
198 | continue | |
199 |
|
199 | |||
200 | # If the first character isn't a quote, it is likely a string |
|
200 | # If the first character isn't a quote, it is likely a string | |
201 | # prefixing character (such as 'b', 'u', or 'r'. Ignore. |
|
201 | # prefixing character (such as 'b', 'u', or 'r'. Ignore. | |
202 | if s[0] not in ("'", '"'): |
|
202 | if s[0] not in ("'", '"'): | |
203 | yield adjusttokenpos(t, coloffset) |
|
203 | yield adjusttokenpos(t, coloffset) | |
204 | continue |
|
204 | continue | |
205 |
|
205 | |||
206 | # String literal. Prefix to make a b'' string. |
|
206 | # String literal. Prefix to make a b'' string. | |
207 | yield adjusttokenpos(t._replace(string='b%s' % t.string), coloffset) |
|
207 | yield adjusttokenpos(t._replace(string='b%s' % t.string), coloffset) | |
208 | coldelta += 1 |
|
208 | coldelta += 1 | |
209 | continue |
|
209 | continue | |
210 |
|
210 | |||
211 | # This looks like a function call. |
|
211 | # This looks like a function call. | |
212 | if t.type == token.NAME and _isop(i + 1, '('): |
|
212 | if t.type == token.NAME and _isop(i + 1, '('): | |
213 | fn = t.string |
|
213 | fn = t.string | |
214 |
|
214 | |||
215 | # *attr() builtins don't accept byte strings to 2nd argument. |
|
215 | # *attr() builtins don't accept byte strings to 2nd argument. | |
216 | if fn in ( |
|
216 | if fn in ( | |
217 | 'getattr', |
|
217 | 'getattr', | |
218 | 'setattr', |
|
218 | 'setattr', | |
219 | 'hasattr', |
|
219 | 'hasattr', | |
220 | 'safehasattr', |
|
220 | 'safehasattr', | |
221 | 'wrapfunction', |
|
221 | 'wrapfunction', | |
222 | 'wrapclass', |
|
222 | 'wrapclass', | |
223 | 'addattr', |
|
223 | 'addattr', | |
224 | ) and (opts['allow-attr-methods'] or not _isop(i - 1, '.')): |
|
224 | ) and (opts['allow-attr-methods'] or not _isop(i - 1, '.')): | |
225 | arg1idx = _findargnofcall(1) |
|
225 | arg1idx = _findargnofcall(1) | |
226 | if arg1idx is not None: |
|
226 | if arg1idx is not None: | |
227 | _ensuresysstr(arg1idx) |
|
227 | _ensuresysstr(arg1idx) | |
228 |
|
228 | |||
229 | # .encode() and .decode() on str/bytes/unicode don't accept |
|
229 | # .encode() and .decode() on str/bytes/unicode don't accept | |
230 | # byte strings on Python 3. |
|
230 | # byte strings on Python 3. | |
231 | elif fn in ('encode', 'decode') and _isop(i - 1, '.'): |
|
231 | elif fn in ('encode', 'decode') and _isop(i - 1, '.'): | |
232 | for argn in range(2): |
|
232 | for argn in range(2): | |
233 | argidx = _findargnofcall(argn) |
|
233 | argidx = _findargnofcall(argn) | |
234 | if argidx is not None: |
|
234 | if argidx is not None: | |
235 | _ensuresysstr(argidx) |
|
235 | _ensuresysstr(argidx) | |
236 |
|
236 | |||
237 | # It changes iteritems/values to items/values as they are not |
|
237 | # It changes iteritems/values to items/values as they are not | |
238 | # present in Python 3 world. |
|
238 | # present in Python 3 world. | |
239 | elif opts['dictiter'] and fn in ('iteritems', 'itervalues'): |
|
239 | elif opts['dictiter'] and fn in ('iteritems', 'itervalues'): | |
240 | yield adjusttokenpos(t._replace(string=fn[4:]), coloffset) |
|
240 | yield adjusttokenpos(t._replace(string=fn[4:]), coloffset) | |
241 | continue |
|
241 | continue | |
242 |
|
242 | |||
243 | if t.type == token.NAME and t.string in opts['treat-as-kwargs']: |
|
243 | if t.type == token.NAME and t.string in opts['treat-as-kwargs']: | |
244 | if _isitemaccess(i): |
|
244 | if _isitemaccess(i): | |
245 | _ensuresysstr(i + 2) |
|
245 | _ensuresysstr(i + 2) | |
246 | if _ismethodcall(i, 'get', 'pop', 'setdefault', 'popitem'): |
|
246 | if _ismethodcall(i, 'get', 'pop', 'setdefault', 'popitem'): | |
247 | _ensuresysstr(i + 4) |
|
247 | _ensuresysstr(i + 4) | |
248 |
|
248 | |||
249 | # Looks like "if __name__ == '__main__'". |
|
249 | # Looks like "if __name__ == '__main__'". | |
250 | if ( |
|
250 | if ( | |
251 | t.type == token.NAME |
|
251 | t.type == token.NAME | |
252 | and t.string == '__name__' |
|
252 | and t.string == '__name__' | |
253 | and _isop(i + 1, '==') |
|
253 | and _isop(i + 1, '==') | |
254 | ): |
|
254 | ): | |
255 | _ensuresysstr(i + 2) |
|
255 | _ensuresysstr(i + 2) | |
256 |
|
256 | |||
257 | # Emit unmodified token. |
|
257 | # Emit unmodified token. | |
258 | yield adjusttokenpos(t, coloffset) |
|
258 | yield adjusttokenpos(t, coloffset) | |
259 |
|
259 | |||
260 |
|
260 | |||
261 | def process(fin, fout, opts): |
|
261 | def process(fin, fout, opts): | |
262 | tokens = tokenize.tokenize(fin.readline) |
|
262 | tokens = tokenize.tokenize(fin.readline) | |
263 | tokens = replacetokens(list(tokens), opts) |
|
263 | tokens = replacetokens(list(tokens), opts) | |
264 | fout.write(tokenize.untokenize(tokens)) |
|
264 | fout.write(tokenize.untokenize(tokens)) | |
265 |
|
265 | |||
266 |
|
266 | |||
267 | def tryunlink(fname): |
|
267 | def tryunlink(fname): | |
268 | try: |
|
268 | try: | |
269 | os.unlink(fname) |
|
269 | os.unlink(fname) | |
270 | except OSError as err: |
|
270 | except OSError as err: | |
271 | if err.errno != errno.ENOENT: |
|
271 | if err.errno != errno.ENOENT: | |
272 | raise |
|
272 | raise | |
273 |
|
273 | |||
274 |
|
274 | |||
275 | @contextlib.contextmanager |
|
275 | @contextlib.contextmanager | |
276 | def editinplace(fname): |
|
276 | def editinplace(fname): | |
277 | n = os.path.basename(fname) |
|
277 | n = os.path.basename(fname) | |
278 | d = os.path.dirname(fname) |
|
278 | d = os.path.dirname(fname) | |
279 | fp = tempfile.NamedTemporaryFile( |
|
279 | fp = tempfile.NamedTemporaryFile( | |
280 | prefix='.%s-' % n, suffix='~', dir=d, delete=False |
|
280 | prefix='.%s-' % n, suffix='~', dir=d, delete=False | |
281 | ) |
|
281 | ) | |
282 | try: |
|
282 | try: | |
283 | yield fp |
|
283 | yield fp | |
284 | fp.close() |
|
284 | fp.close() | |
285 | if os.name == 'nt': |
|
285 | if os.name == 'nt': | |
286 | tryunlink(fname) |
|
286 | tryunlink(fname) | |
287 | os.rename(fp.name, fname) |
|
287 | os.rename(fp.name, fname) | |
288 | finally: |
|
288 | finally: | |
289 | fp.close() |
|
289 | fp.close() | |
290 | tryunlink(fp.name) |
|
290 | tryunlink(fp.name) | |
291 |
|
291 | |||
292 |
|
292 | |||
293 | def main(): |
|
293 | def main(): | |
294 | ap = argparse.ArgumentParser() |
|
294 | ap = argparse.ArgumentParser() | |
295 | ap.add_argument( |
|
295 | ap.add_argument( | |
296 | '--version', action='version', version='Byteify strings 1.0' |
|
296 | '--version', action='version', version='Byteify strings 1.0' | |
297 | ) |
|
297 | ) | |
298 | ap.add_argument( |
|
298 | ap.add_argument( | |
299 | '-i', |
|
299 | '-i', | |
300 | '--inplace', |
|
300 | '--inplace', | |
301 | action='store_true', |
|
301 | action='store_true', | |
302 | default=False, |
|
302 | default=False, | |
303 | help='edit files in place', |
|
303 | help='edit files in place', | |
304 | ) |
|
304 | ) | |
305 | ap.add_argument( |
|
305 | ap.add_argument( | |
306 | '--dictiter', |
|
306 | '--dictiter', | |
307 | action='store_true', |
|
307 | action='store_true', | |
308 | default=False, |
|
308 | default=False, | |
309 | help='rewrite iteritems() and itervalues()', |
|
309 | help='rewrite iteritems() and itervalues()', | |
310 | ), |
|
310 | ), | |
311 | ap.add_argument( |
|
311 | ap.add_argument( | |
312 | '--allow-attr-methods', |
|
312 | '--allow-attr-methods', | |
313 | action='store_true', |
|
313 | action='store_true', | |
314 | default=False, |
|
314 | default=False, | |
315 | help='also handle attr*() when they are methods', |
|
315 | help='also handle attr*() when they are methods', | |
316 | ), |
|
316 | ), | |
317 | ap.add_argument( |
|
317 | ap.add_argument( | |
318 | '--treat-as-kwargs', |
|
318 | '--treat-as-kwargs', | |
319 | nargs="+", |
|
319 | nargs="+", | |
320 | default=[], |
|
320 | default=[], | |
321 | help="ignore kwargs-like objects", |
|
321 | help="ignore kwargs-like objects", | |
322 | ), |
|
322 | ), | |
323 | ap.add_argument('files', metavar='FILE', nargs='+', help='source file') |
|
323 | ap.add_argument('files', metavar='FILE', nargs='+', help='source file') | |
324 | args = ap.parse_args() |
|
324 | args = ap.parse_args() | |
325 | opts = { |
|
325 | opts = { | |
326 | 'dictiter': args.dictiter, |
|
326 | 'dictiter': args.dictiter, | |
327 | 'treat-as-kwargs': set(args.treat_as_kwargs), |
|
327 | 'treat-as-kwargs': set(args.treat_as_kwargs), | |
328 | 'allow-attr-methods': args.allow_attr_methods, |
|
328 | 'allow-attr-methods': args.allow_attr_methods, | |
329 | } |
|
329 | } | |
330 | for fname in args.files: |
|
330 | for fname in args.files: | |
331 | if args.inplace: |
|
331 | if args.inplace: | |
332 | with editinplace(fname) as fout: |
|
332 | with editinplace(fname) as fout: | |
333 | with open(fname, 'rb') as fin: |
|
333 | with open(fname, 'rb') as fin: | |
334 | process(fin, fout, opts) |
|
334 | process(fin, fout, opts) | |
335 | else: |
|
335 | else: | |
336 | with open(fname, 'rb') as fin: |
|
336 | with open(fname, 'rb') as fin: | |
337 | fout = sys.stdout.buffer |
|
337 | fout = sys.stdout.buffer | |
338 | process(fin, fout, opts) |
|
338 | process(fin, fout, opts) | |
339 |
|
339 | |||
340 |
|
340 | |||
341 | if __name__ == '__main__': |
|
341 | if __name__ == '__main__': | |
342 |
if sys.version_info[0:2] < (3, |
|
342 | if sys.version_info[0:2] < (3, 7): | |
343 |
print('This script must be run under Python 3. |
|
343 | print('This script must be run under Python 3.7+') | |
344 | sys.exit(3) |
|
344 | sys.exit(3) | |
345 | main() |
|
345 | main() |
@@ -1,266 +1,266 | |||||
1 |
#require py3 |
|
1 | #require py37 | |
2 |
|
2 | |||
3 | $ byteify_strings () { |
|
3 | $ byteify_strings () { | |
4 | > $PYTHON "$TESTDIR/../contrib/byteify-strings.py" "$@" |
|
4 | > $PYTHON "$TESTDIR/../contrib/byteify-strings.py" "$@" | |
5 | > } |
|
5 | > } | |
6 |
|
6 | |||
7 | Test version |
|
7 | Test version | |
8 |
|
8 | |||
9 | $ byteify_strings --version |
|
9 | $ byteify_strings --version | |
10 | Byteify strings * (glob) |
|
10 | Byteify strings * (glob) | |
11 |
|
11 | |||
12 | Test in-place |
|
12 | Test in-place | |
13 |
|
13 | |||
14 | $ cat > testfile.py <<EOF |
|
14 | $ cat > testfile.py <<EOF | |
15 | > obj['test'] = b"1234" |
|
15 | > obj['test'] = b"1234" | |
16 | > mydict.iteritems() |
|
16 | > mydict.iteritems() | |
17 | > EOF |
|
17 | > EOF | |
18 | $ byteify_strings testfile.py -i |
|
18 | $ byteify_strings testfile.py -i | |
19 | $ cat testfile.py |
|
19 | $ cat testfile.py | |
20 | obj[b'test'] = b"1234" |
|
20 | obj[b'test'] = b"1234" | |
21 | mydict.iteritems() |
|
21 | mydict.iteritems() | |
22 |
|
22 | |||
23 | Test with dictiter |
|
23 | Test with dictiter | |
24 |
|
24 | |||
25 | $ cat > testfile.py <<EOF |
|
25 | $ cat > testfile.py <<EOF | |
26 | > obj['test'] = b"1234" |
|
26 | > obj['test'] = b"1234" | |
27 | > mydict.iteritems() |
|
27 | > mydict.iteritems() | |
28 | > EOF |
|
28 | > EOF | |
29 | $ byteify_strings testfile.py --dictiter |
|
29 | $ byteify_strings testfile.py --dictiter | |
30 | obj[b'test'] = b"1234" |
|
30 | obj[b'test'] = b"1234" | |
31 | mydict.items() |
|
31 | mydict.items() | |
32 |
|
32 | |||
33 | Test kwargs-like objects |
|
33 | Test kwargs-like objects | |
34 |
|
34 | |||
35 | $ cat > testfile.py <<EOF |
|
35 | $ cat > testfile.py <<EOF | |
36 | > kwargs['test'] = "123" |
|
36 | > kwargs['test'] = "123" | |
37 | > kwargs[test['testing']] |
|
37 | > kwargs[test['testing']] | |
38 | > kwargs[test[[['testing']]]] |
|
38 | > kwargs[test[[['testing']]]] | |
39 | > kwargs[kwargs['testing']] |
|
39 | > kwargs[kwargs['testing']] | |
40 | > kwargs.get('test') |
|
40 | > kwargs.get('test') | |
41 | > kwargs.pop('test') |
|
41 | > kwargs.pop('test') | |
42 | > kwargs.get('test', 'testing') |
|
42 | > kwargs.get('test', 'testing') | |
43 | > kwargs.pop('test', 'testing') |
|
43 | > kwargs.pop('test', 'testing') | |
44 | > kwargs.setdefault('test', 'testing') |
|
44 | > kwargs.setdefault('test', 'testing') | |
45 | > |
|
45 | > | |
46 | > opts['test'] = "123" |
|
46 | > opts['test'] = "123" | |
47 | > opts[test['testing']] |
|
47 | > opts[test['testing']] | |
48 | > opts[test[[['testing']]]] |
|
48 | > opts[test[[['testing']]]] | |
49 | > opts[opts['testing']] |
|
49 | > opts[opts['testing']] | |
50 | > opts.get('test') |
|
50 | > opts.get('test') | |
51 | > opts.pop('test') |
|
51 | > opts.pop('test') | |
52 | > opts.get('test', 'testing') |
|
52 | > opts.get('test', 'testing') | |
53 | > opts.pop('test', 'testing') |
|
53 | > opts.pop('test', 'testing') | |
54 | > opts.setdefault('test', 'testing') |
|
54 | > opts.setdefault('test', 'testing') | |
55 | > |
|
55 | > | |
56 | > commitopts['test'] = "123" |
|
56 | > commitopts['test'] = "123" | |
57 | > commitopts[test['testing']] |
|
57 | > commitopts[test['testing']] | |
58 | > commitopts[test[[['testing']]]] |
|
58 | > commitopts[test[[['testing']]]] | |
59 | > commitopts[commitopts['testing']] |
|
59 | > commitopts[commitopts['testing']] | |
60 | > commitopts.get('test') |
|
60 | > commitopts.get('test') | |
61 | > commitopts.pop('test') |
|
61 | > commitopts.pop('test') | |
62 | > commitopts.get('test', 'testing') |
|
62 | > commitopts.get('test', 'testing') | |
63 | > commitopts.pop('test', 'testing') |
|
63 | > commitopts.pop('test', 'testing') | |
64 | > commitopts.setdefault('test', 'testing') |
|
64 | > commitopts.setdefault('test', 'testing') | |
65 | > EOF |
|
65 | > EOF | |
66 | $ byteify_strings testfile.py --treat-as-kwargs kwargs opts commitopts |
|
66 | $ byteify_strings testfile.py --treat-as-kwargs kwargs opts commitopts | |
67 | kwargs['test'] = b"123" |
|
67 | kwargs['test'] = b"123" | |
68 | kwargs[test[b'testing']] |
|
68 | kwargs[test[b'testing']] | |
69 | kwargs[test[[[b'testing']]]] |
|
69 | kwargs[test[[[b'testing']]]] | |
70 | kwargs[kwargs['testing']] |
|
70 | kwargs[kwargs['testing']] | |
71 | kwargs.get('test') |
|
71 | kwargs.get('test') | |
72 | kwargs.pop('test') |
|
72 | kwargs.pop('test') | |
73 | kwargs.get('test', b'testing') |
|
73 | kwargs.get('test', b'testing') | |
74 | kwargs.pop('test', b'testing') |
|
74 | kwargs.pop('test', b'testing') | |
75 | kwargs.setdefault('test', b'testing') |
|
75 | kwargs.setdefault('test', b'testing') | |
76 |
|
76 | |||
77 | opts['test'] = b"123" |
|
77 | opts['test'] = b"123" | |
78 | opts[test[b'testing']] |
|
78 | opts[test[b'testing']] | |
79 | opts[test[[[b'testing']]]] |
|
79 | opts[test[[[b'testing']]]] | |
80 | opts[opts['testing']] |
|
80 | opts[opts['testing']] | |
81 | opts.get('test') |
|
81 | opts.get('test') | |
82 | opts.pop('test') |
|
82 | opts.pop('test') | |
83 | opts.get('test', b'testing') |
|
83 | opts.get('test', b'testing') | |
84 | opts.pop('test', b'testing') |
|
84 | opts.pop('test', b'testing') | |
85 | opts.setdefault('test', b'testing') |
|
85 | opts.setdefault('test', b'testing') | |
86 |
|
86 | |||
87 | commitopts['test'] = b"123" |
|
87 | commitopts['test'] = b"123" | |
88 | commitopts[test[b'testing']] |
|
88 | commitopts[test[b'testing']] | |
89 | commitopts[test[[[b'testing']]]] |
|
89 | commitopts[test[[[b'testing']]]] | |
90 | commitopts[commitopts['testing']] |
|
90 | commitopts[commitopts['testing']] | |
91 | commitopts.get('test') |
|
91 | commitopts.get('test') | |
92 | commitopts.pop('test') |
|
92 | commitopts.pop('test') | |
93 | commitopts.get('test', b'testing') |
|
93 | commitopts.get('test', b'testing') | |
94 | commitopts.pop('test', b'testing') |
|
94 | commitopts.pop('test', b'testing') | |
95 | commitopts.setdefault('test', b'testing') |
|
95 | commitopts.setdefault('test', b'testing') | |
96 |
|
96 | |||
97 | Test attr*() as methods |
|
97 | Test attr*() as methods | |
98 |
|
98 | |||
99 | $ cat > testfile.py <<EOF |
|
99 | $ cat > testfile.py <<EOF | |
100 | > setattr(o, 'a', 1) |
|
100 | > setattr(o, 'a', 1) | |
101 | > util.setattr(o, 'ae', 1) |
|
101 | > util.setattr(o, 'ae', 1) | |
102 | > util.getattr(o, 'alksjdf', 'default') |
|
102 | > util.getattr(o, 'alksjdf', 'default') | |
103 | > util.addattr(o, 'asdf') |
|
103 | > util.addattr(o, 'asdf') | |
104 | > util.hasattr(o, 'lksjdf', 'default') |
|
104 | > util.hasattr(o, 'lksjdf', 'default') | |
105 | > util.safehasattr(o, 'lksjdf', 'default') |
|
105 | > util.safehasattr(o, 'lksjdf', 'default') | |
106 | > @eh.wrapfunction(func, 'lksjdf') |
|
106 | > @eh.wrapfunction(func, 'lksjdf') | |
107 | > def f(): |
|
107 | > def f(): | |
108 | > pass |
|
108 | > pass | |
109 | > @eh.wrapclass(klass, 'lksjdf') |
|
109 | > @eh.wrapclass(klass, 'lksjdf') | |
110 | > def f(): |
|
110 | > def f(): | |
111 | > pass |
|
111 | > pass | |
112 | > EOF |
|
112 | > EOF | |
113 | $ byteify_strings testfile.py --allow-attr-methods |
|
113 | $ byteify_strings testfile.py --allow-attr-methods | |
114 | setattr(o, 'a', 1) |
|
114 | setattr(o, 'a', 1) | |
115 | util.setattr(o, 'ae', 1) |
|
115 | util.setattr(o, 'ae', 1) | |
116 | util.getattr(o, 'alksjdf', b'default') |
|
116 | util.getattr(o, 'alksjdf', b'default') | |
117 | util.addattr(o, 'asdf') |
|
117 | util.addattr(o, 'asdf') | |
118 | util.hasattr(o, 'lksjdf', b'default') |
|
118 | util.hasattr(o, 'lksjdf', b'default') | |
119 | util.safehasattr(o, 'lksjdf', b'default') |
|
119 | util.safehasattr(o, 'lksjdf', b'default') | |
120 | @eh.wrapfunction(func, 'lksjdf') |
|
120 | @eh.wrapfunction(func, 'lksjdf') | |
121 | def f(): |
|
121 | def f(): | |
122 | pass |
|
122 | pass | |
123 | @eh.wrapclass(klass, 'lksjdf') |
|
123 | @eh.wrapclass(klass, 'lksjdf') | |
124 | def f(): |
|
124 | def f(): | |
125 | pass |
|
125 | pass | |
126 |
|
126 | |||
127 | Test without attr*() as methods |
|
127 | Test without attr*() as methods | |
128 |
|
128 | |||
129 | $ cat > testfile.py <<EOF |
|
129 | $ cat > testfile.py <<EOF | |
130 | > setattr(o, 'a', 1) |
|
130 | > setattr(o, 'a', 1) | |
131 | > util.setattr(o, 'ae', 1) |
|
131 | > util.setattr(o, 'ae', 1) | |
132 | > util.getattr(o, 'alksjdf', 'default') |
|
132 | > util.getattr(o, 'alksjdf', 'default') | |
133 | > util.addattr(o, 'asdf') |
|
133 | > util.addattr(o, 'asdf') | |
134 | > util.hasattr(o, 'lksjdf', 'default') |
|
134 | > util.hasattr(o, 'lksjdf', 'default') | |
135 | > util.safehasattr(o, 'lksjdf', 'default') |
|
135 | > util.safehasattr(o, 'lksjdf', 'default') | |
136 | > @eh.wrapfunction(func, 'lksjdf') |
|
136 | > @eh.wrapfunction(func, 'lksjdf') | |
137 | > def f(): |
|
137 | > def f(): | |
138 | > pass |
|
138 | > pass | |
139 | > @eh.wrapclass(klass, 'lksjdf') |
|
139 | > @eh.wrapclass(klass, 'lksjdf') | |
140 | > def f(): |
|
140 | > def f(): | |
141 | > pass |
|
141 | > pass | |
142 | > EOF |
|
142 | > EOF | |
143 | $ byteify_strings testfile.py |
|
143 | $ byteify_strings testfile.py | |
144 | setattr(o, 'a', 1) |
|
144 | setattr(o, 'a', 1) | |
145 | util.setattr(o, b'ae', 1) |
|
145 | util.setattr(o, b'ae', 1) | |
146 | util.getattr(o, b'alksjdf', b'default') |
|
146 | util.getattr(o, b'alksjdf', b'default') | |
147 | util.addattr(o, b'asdf') |
|
147 | util.addattr(o, b'asdf') | |
148 | util.hasattr(o, b'lksjdf', b'default') |
|
148 | util.hasattr(o, b'lksjdf', b'default') | |
149 | util.safehasattr(o, b'lksjdf', b'default') |
|
149 | util.safehasattr(o, b'lksjdf', b'default') | |
150 | @eh.wrapfunction(func, b'lksjdf') |
|
150 | @eh.wrapfunction(func, b'lksjdf') | |
151 | def f(): |
|
151 | def f(): | |
152 | pass |
|
152 | pass | |
153 | @eh.wrapclass(klass, b'lksjdf') |
|
153 | @eh.wrapclass(klass, b'lksjdf') | |
154 | def f(): |
|
154 | def f(): | |
155 | pass |
|
155 | pass | |
156 |
|
156 | |||
157 | Test ignore comments |
|
157 | Test ignore comments | |
158 |
|
158 | |||
159 | $ cat > testfile.py <<EOF |
|
159 | $ cat > testfile.py <<EOF | |
160 | > # py3-transform: off |
|
160 | > # py3-transform: off | |
161 | > "none" |
|
161 | > "none" | |
162 | > "of" |
|
162 | > "of" | |
163 | > 'these' |
|
163 | > 'these' | |
164 | > s = """should""" |
|
164 | > s = """should""" | |
165 | > d = '''be''' |
|
165 | > d = '''be''' | |
166 | > # py3-transform: on |
|
166 | > # py3-transform: on | |
167 | > "this should" |
|
167 | > "this should" | |
168 | > 'and this also' |
|
168 | > 'and this also' | |
169 | > |
|
169 | > | |
170 | > # no-py3-transform |
|
170 | > # no-py3-transform | |
171 | > l = "this should be ignored" |
|
171 | > l = "this should be ignored" | |
172 | > l2 = "this shouldn't" |
|
172 | > l2 = "this shouldn't" | |
173 | > |
|
173 | > | |
174 | > EOF |
|
174 | > EOF | |
175 | $ byteify_strings testfile.py |
|
175 | $ byteify_strings testfile.py | |
176 | # py3-transform: off |
|
176 | # py3-transform: off | |
177 | "none" |
|
177 | "none" | |
178 | "of" |
|
178 | "of" | |
179 | 'these' |
|
179 | 'these' | |
180 | s = """should""" |
|
180 | s = """should""" | |
181 | d = '''be''' |
|
181 | d = '''be''' | |
182 | # py3-transform: on |
|
182 | # py3-transform: on | |
183 | b"this should" |
|
183 | b"this should" | |
184 | b'and this also' |
|
184 | b'and this also' | |
185 |
|
185 | |||
186 | # no-py3-transform |
|
186 | # no-py3-transform | |
187 | l = "this should be ignored" |
|
187 | l = "this should be ignored" | |
188 | l2 = b"this shouldn't" |
|
188 | l2 = b"this shouldn't" | |
189 |
|
189 | |||
190 | Test triple-quoted strings |
|
190 | Test triple-quoted strings | |
191 |
|
191 | |||
192 | $ cat > testfile.py <<EOF |
|
192 | $ cat > testfile.py <<EOF | |
193 | > """This is ignored |
|
193 | > """This is ignored | |
194 | > """ |
|
194 | > """ | |
195 | > |
|
195 | > | |
196 | > line = """ |
|
196 | > line = """ | |
197 | > This should not be |
|
197 | > This should not be | |
198 | > """ |
|
198 | > """ | |
199 | > line = ''' |
|
199 | > line = ''' | |
200 | > Neither should this |
|
200 | > Neither should this | |
201 | > ''' |
|
201 | > ''' | |
202 | > EOF |
|
202 | > EOF | |
203 | $ byteify_strings testfile.py |
|
203 | $ byteify_strings testfile.py | |
204 | """This is ignored |
|
204 | """This is ignored | |
205 | """ |
|
205 | """ | |
206 |
|
206 | |||
207 | line = b""" |
|
207 | line = b""" | |
208 | This should not be |
|
208 | This should not be | |
209 | """ |
|
209 | """ | |
210 | line = b''' |
|
210 | line = b''' | |
211 | Neither should this |
|
211 | Neither should this | |
212 | ''' |
|
212 | ''' | |
213 |
|
213 | |||
214 | Test prefixed strings |
|
214 | Test prefixed strings | |
215 |
|
215 | |||
216 | $ cat > testfile.py <<EOF |
|
216 | $ cat > testfile.py <<EOF | |
217 | > obj['test'] = b"1234" |
|
217 | > obj['test'] = b"1234" | |
218 | > obj[r'test'] = u"1234" |
|
218 | > obj[r'test'] = u"1234" | |
219 | > EOF |
|
219 | > EOF | |
220 | $ byteify_strings testfile.py |
|
220 | $ byteify_strings testfile.py | |
221 | obj[b'test'] = b"1234" |
|
221 | obj[b'test'] = b"1234" | |
222 | obj[r'test'] = u"1234" |
|
222 | obj[r'test'] = u"1234" | |
223 |
|
223 | |||
224 | Test multi-line alignment |
|
224 | Test multi-line alignment | |
225 |
|
225 | |||
226 | $ cat > testfile.py <<'EOF' |
|
226 | $ cat > testfile.py <<'EOF' | |
227 | > def foo(): |
|
227 | > def foo(): | |
228 | > error.Abort(_("foo" |
|
228 | > error.Abort(_("foo" | |
229 | > "bar" |
|
229 | > "bar" | |
230 | > "%s") |
|
230 | > "%s") | |
231 | > % parameter) |
|
231 | > % parameter) | |
232 | > { |
|
232 | > { | |
233 | > 'test': dict, |
|
233 | > 'test': dict, | |
234 | > 'test2': dict, |
|
234 | > 'test2': dict, | |
235 | > } |
|
235 | > } | |
236 | > [ |
|
236 | > [ | |
237 | > "thing", |
|
237 | > "thing", | |
238 | > "thing2" |
|
238 | > "thing2" | |
239 | > ] |
|
239 | > ] | |
240 | > ( |
|
240 | > ( | |
241 | > "tuple", |
|
241 | > "tuple", | |
242 | > "tuple2", |
|
242 | > "tuple2", | |
243 | > ) |
|
243 | > ) | |
244 | > {"thing", |
|
244 | > {"thing", | |
245 | > } |
|
245 | > } | |
246 | > EOF |
|
246 | > EOF | |
247 | $ byteify_strings testfile.py |
|
247 | $ byteify_strings testfile.py | |
248 | def foo(): |
|
248 | def foo(): | |
249 | error.Abort(_(b"foo" |
|
249 | error.Abort(_(b"foo" | |
250 | b"bar" |
|
250 | b"bar" | |
251 | b"%s") |
|
251 | b"%s") | |
252 | % parameter) |
|
252 | % parameter) | |
253 | { |
|
253 | { | |
254 | b'test': dict, |
|
254 | b'test': dict, | |
255 | b'test2': dict, |
|
255 | b'test2': dict, | |
256 | } |
|
256 | } | |
257 | [ |
|
257 | [ | |
258 | b"thing", |
|
258 | b"thing", | |
259 | b"thing2" |
|
259 | b"thing2" | |
260 | ] |
|
260 | ] | |
261 | ( |
|
261 | ( | |
262 | b"tuple", |
|
262 | b"tuple", | |
263 | b"tuple2", |
|
263 | b"tuple2", | |
264 | ) |
|
264 | ) | |
265 | {b"thing", |
|
265 | {b"thing", | |
266 | } |
|
266 | } |
General Comments 0
You need to be logged in to leave comments.
Login now