Show More
@@ -23,155 +23,154 b' def adjusttokenpos(t, ofs):' | |||||
23 | return t._replace(start=(t.start[0], t.start[1] + ofs), |
|
23 | return t._replace(start=(t.start[0], t.start[1] + ofs), | |
24 | end=(t.end[0], t.end[1] + ofs)) |
|
24 | end=(t.end[0], t.end[1] + ofs)) | |
25 |
|
25 | |||
26 | if True: |
|
26 | def replacetokens(tokens, opts): | |
27 | def replacetokens(tokens, opts): |
|
27 | """Transform a stream of tokens from raw to Python 3. | |
28 | """Transform a stream of tokens from raw to Python 3. |
|
28 | ||
|
29 | Returns a generator of possibly rewritten tokens. | |||
|
30 | ||||
|
31 | The input token list may be mutated as part of processing. However, | |||
|
32 | its changes do not necessarily match the output token stream. | |||
|
33 | """ | |||
|
34 | sysstrtokens = set() | |||
29 |
|
35 | |||
30 | Returns a generator of possibly rewritten tokens. |
|
36 | # The following utility functions access the tokens list and i index of | |
|
37 | # the for i, t enumerate(tokens) loop below | |||
|
38 | def _isop(j, *o): | |||
|
39 | """Assert that tokens[j] is an OP with one of the given values""" | |||
|
40 | try: | |||
|
41 | return tokens[j].type == token.OP and tokens[j].string in o | |||
|
42 | except IndexError: | |||
|
43 | return False | |||
31 |
|
44 | |||
32 | The input token list may be mutated as part of processing. However, |
|
45 | def _findargnofcall(n): | |
33 | its changes do not necessarily match the output token stream. |
|
46 | """Find arg n of a call expression (start at 0) | |
34 | """ |
|
47 | ||
35 | sysstrtokens = set() |
|
48 | Returns index of the first token of that argument, or None if | |
|
49 | there is not that many arguments. | |||
|
50 | ||||
|
51 | Assumes that token[i + 1] is '('. | |||
36 |
|
|
52 | ||
37 | # The following utility functions access the tokens list and i index of |
|
53 | """ | |
38 | # the for i, t enumerate(tokens) loop below |
|
54 | nested = 0 | |
39 | def _isop(j, *o): |
|
55 | for j in range(i + 2, len(tokens)): | |
40 | """Assert that tokens[j] is an OP with one of the given values""" |
|
56 | if _isop(j, ')', ']', '}'): | |
41 | try: |
|
57 | # end of call, tuple, subscription or dict / set | |
42 | return tokens[j].type == token.OP and tokens[j].string in o |
|
58 | nested -= 1 | |
43 | except IndexError: |
|
59 | if nested < 0: | |
44 |
return |
|
60 | return None | |
|
61 | elif n == 0: | |||
|
62 | # this is the starting position of arg | |||
|
63 | return j | |||
|
64 | elif _isop(j, '(', '[', '{'): | |||
|
65 | nested += 1 | |||
|
66 | elif _isop(j, ',') and nested == 0: | |||
|
67 | n -= 1 | |||
45 |
|
68 | |||
46 | def _findargnofcall(n): |
|
69 | return None | |
47 | """Find arg n of a call expression (start at 0) |
|
70 | ||
|
71 | def _ensuresysstr(j): | |||
|
72 | """Make sure the token at j is a system string | |||
48 |
|
73 | |||
49 | Returns index of the first token of that argument, or None if |
|
74 | Remember the given token so the string transformer won't add | |
50 | there is not that many arguments. |
|
75 | the byte prefix. | |
51 |
|
76 | |||
52 | Assumes that token[i + 1] is '('. |
|
77 | Ignores tokens that are not strings. Assumes bounds checking has | |
|
78 | already been done. | |||
53 |
|
79 | |||
54 |
|
|
80 | """ | |
55 | nested = 0 |
|
81 | st = tokens[j] | |
56 | for j in range(i + 2, len(tokens)): |
|
82 | if st.type == token.STRING and st.string.startswith(("'", '"')): | |
57 | if _isop(j, ')', ']', '}'): |
|
83 | sysstrtokens.add(st) | |
58 | # end of call, tuple, subscription or dict / set |
|
|||
59 | nested -= 1 |
|
|||
60 | if nested < 0: |
|
|||
61 | return None |
|
|||
62 | elif n == 0: |
|
|||
63 | # this is the starting position of arg |
|
|||
64 | return j |
|
|||
65 | elif _isop(j, '(', '[', '{'): |
|
|||
66 | nested += 1 |
|
|||
67 | elif _isop(j, ',') and nested == 0: |
|
|||
68 | n -= 1 |
|
|||
69 |
|
84 | |||
70 | return None |
|
85 | coldelta = 0 # column increment for new opening parens | |
71 |
|
86 | coloffset = -1 # column offset for the current line (-1: TBD) | ||
72 | def _ensuresysstr(j): |
|
87 | parens = [(0, 0, 0)] # stack of (line, end-column, column-offset) | |
73 | """Make sure the token at j is a system string |
|
88 | for i, t in enumerate(tokens): | |
74 |
|
89 | # Compute the column offset for the current line, such that | ||
75 | Remember the given token so the string transformer won't add |
|
90 | # the current line will be aligned to the last opening paren | |
76 |
|
|
91 | # as before. | |
77 |
|
92 | if coloffset < 0: | ||
78 | Ignores tokens that are not strings. Assumes bounds checking has |
|
93 | if t.start[1] == parens[-1][1]: | |
79 | already been done. |
|
94 | coloffset = parens[-1][2] | |
|
95 | elif t.start[1] + 1 == parens[-1][1]: | |||
|
96 | # fix misaligned indent of s/util.Abort/error.Abort/ | |||
|
97 | coloffset = parens[-1][2] + (parens[-1][1] - t.start[1]) | |||
|
98 | else: | |||
|
99 | coloffset = 0 | |||
80 |
|
100 | |||
81 | """ |
|
101 | # Reset per-line attributes at EOL. | |
82 | st = tokens[j] |
|
102 | if t.type in (token.NEWLINE, tokenize.NL): | |
83 | if st.type == token.STRING and st.string.startswith(("'", '"')): |
|
103 | yield adjusttokenpos(t, coloffset) | |
84 | sysstrtokens.add(st) |
|
104 | coldelta = 0 | |
|
105 | coloffset = -1 | |||
|
106 | continue | |||
|
107 | ||||
|
108 | # Remember the last paren position. | |||
|
109 | if _isop(i, '(', '[', '{'): | |||
|
110 | parens.append(t.end + (coloffset + coldelta,)) | |||
|
111 | elif _isop(i, ')', ']', '}'): | |||
|
112 | parens.pop() | |||
85 |
|
113 | |||
86 | coldelta = 0 # column increment for new opening parens |
|
114 | # Convert most string literals to byte literals. String literals | |
87 | coloffset = -1 # column offset for the current line (-1: TBD) |
|
115 | # in Python 2 are bytes. String literals in Python 3 are unicode. | |
88 | parens = [(0, 0, 0)] # stack of (line, end-column, column-offset) |
|
116 | # Most strings in Mercurial are bytes and unicode strings are rare. | |
89 | for i, t in enumerate(tokens): |
|
117 | # Rather than rewrite all string literals to use ``b''`` to indicate | |
90 | # Compute the column offset for the current line, such that |
|
118 | # byte strings, we apply this token transformer to insert the ``b`` | |
91 | # the current line will be aligned to the last opening paren |
|
119 | # prefix nearly everywhere. | |
92 | # as before. |
|
120 | if t.type == token.STRING and t not in sysstrtokens: | |
93 | if coloffset < 0: |
|
121 | s = t.string | |
94 | if t.start[1] == parens[-1][1]: |
|
|||
95 | coloffset = parens[-1][2] |
|
|||
96 | elif t.start[1] + 1 == parens[-1][1]: |
|
|||
97 | # fix misaligned indent of s/util.Abort/error.Abort/ |
|
|||
98 | coloffset = parens[-1][2] + (parens[-1][1] - t.start[1]) |
|
|||
99 | else: |
|
|||
100 | coloffset = 0 |
|
|||
101 |
|
122 | |||
102 | # Reset per-line attributes at EOL. |
|
123 | # Preserve docstrings as string literals. This is inconsistent | |
103 | if t.type in (token.NEWLINE, tokenize.NL): |
|
124 | # with regular unprefixed strings. However, the | |
|
125 | # "from __future__" parsing (which allows a module docstring to | |||
|
126 | # exist before it) doesn't properly handle the docstring if it | |||
|
127 | # is b''' prefixed, leading to a SyntaxError. We leave all | |||
|
128 | # docstrings as unprefixed to avoid this. This means Mercurial | |||
|
129 | # components touching docstrings need to handle unicode, | |||
|
130 | # unfortunately. | |||
|
131 | if s[0:3] in ("'''", '"""'): | |||
104 | yield adjusttokenpos(t, coloffset) |
|
132 | yield adjusttokenpos(t, coloffset) | |
105 | coldelta = 0 |
|
|||
106 | coloffset = -1 |
|
|||
107 | continue |
|
133 | continue | |
108 |
|
134 | |||
109 | # Remember the last paren position. |
|
135 | # If the first character isn't a quote, it is likely a string | |
110 | if _isop(i, '(', '[', '{'): |
|
136 | # prefixing character (such as 'b', 'u', or 'r'. Ignore. | |
111 | parens.append(t.end + (coloffset + coldelta,)) |
|
137 | if s[0] not in ("'", '"'): | |
112 | elif _isop(i, ')', ']', '}'): |
|
138 | yield adjusttokenpos(t, coloffset) | |
113 | parens.pop() |
|
|||
114 |
|
||||
115 | # Convert most string literals to byte literals. String literals |
|
|||
116 | # in Python 2 are bytes. String literals in Python 3 are unicode. |
|
|||
117 | # Most strings in Mercurial are bytes and unicode strings are rare. |
|
|||
118 | # Rather than rewrite all string literals to use ``b''`` to indicate |
|
|||
119 | # byte strings, we apply this token transformer to insert the ``b`` |
|
|||
120 | # prefix nearly everywhere. |
|
|||
121 | if t.type == token.STRING and t not in sysstrtokens: |
|
|||
122 | s = t.string |
|
|||
123 |
|
||||
124 | # Preserve docstrings as string literals. This is inconsistent |
|
|||
125 | # with regular unprefixed strings. However, the |
|
|||
126 | # "from __future__" parsing (which allows a module docstring to |
|
|||
127 | # exist before it) doesn't properly handle the docstring if it |
|
|||
128 | # is b''' prefixed, leading to a SyntaxError. We leave all |
|
|||
129 | # docstrings as unprefixed to avoid this. This means Mercurial |
|
|||
130 | # components touching docstrings need to handle unicode, |
|
|||
131 | # unfortunately. |
|
|||
132 | if s[0:3] in ("'''", '"""'): |
|
|||
133 | yield adjusttokenpos(t, coloffset) |
|
|||
134 | continue |
|
|||
135 |
|
||||
136 | # If the first character isn't a quote, it is likely a string |
|
|||
137 | # prefixing character (such as 'b', 'u', or 'r'. Ignore. |
|
|||
138 | if s[0] not in ("'", '"'): |
|
|||
139 | yield adjusttokenpos(t, coloffset) |
|
|||
140 | continue |
|
|||
141 |
|
||||
142 | # String literal. Prefix to make a b'' string. |
|
|||
143 | yield adjusttokenpos(t._replace(string='b%s' % t.string), |
|
|||
144 | coloffset) |
|
|||
145 | coldelta += 1 |
|
|||
146 | continue |
|
139 | continue | |
147 |
|
140 | |||
148 | # This looks like a function call. |
|
141 | # String literal. Prefix to make a b'' string. | |
149 | if t.type == token.NAME and _isop(i + 1, '('): |
|
142 | yield adjusttokenpos(t._replace(string='b%s' % t.string), | |
150 | fn = t.string |
|
143 | coloffset) | |
|
144 | coldelta += 1 | |||
|
145 | continue | |||
151 |
|
146 | |||
152 | # *attr() builtins don't accept byte strings to 2nd argument. |
|
147 | # This looks like a function call. | |
153 | if (fn in ('getattr', 'setattr', 'hasattr', 'safehasattr') and |
|
148 | if t.type == token.NAME and _isop(i + 1, '('): | |
154 | not _isop(i - 1, '.')): |
|
149 | fn = t.string | |
155 | arg1idx = _findargnofcall(1) |
|
150 | ||
156 | if arg1idx is not None: |
|
151 | # *attr() builtins don't accept byte strings to 2nd argument. | |
157 | _ensuresysstr(arg1idx) |
|
152 | if (fn in ('getattr', 'setattr', 'hasattr', 'safehasattr') and | |
|
153 | not _isop(i - 1, '.')): | |||
|
154 | arg1idx = _findargnofcall(1) | |||
|
155 | if arg1idx is not None: | |||
|
156 | _ensuresysstr(arg1idx) | |||
158 |
|
157 | |||
159 |
|
|
158 | # .encode() and .decode() on str/bytes/unicode don't accept | |
160 |
|
|
159 | # byte strings on Python 3. | |
161 |
|
|
160 | elif fn in ('encode', 'decode') and _isop(i - 1, '.'): | |
162 |
|
|
161 | for argn in range(2): | |
163 |
|
|
162 | argidx = _findargnofcall(argn) | |
164 |
|
|
163 | if argidx is not None: | |
165 |
|
|
164 | _ensuresysstr(argidx) | |
166 |
|
165 | |||
167 |
|
|
166 | # It changes iteritems/values to items/values as they are not | |
168 |
|
|
167 | # present in Python 3 world. | |
169 |
|
|
168 | elif opts['dictiter'] and fn in ('iteritems', 'itervalues'): | |
170 |
|
|
169 | yield adjusttokenpos(t._replace(string=fn[4:]), coloffset) | |
171 |
|
|
170 | continue | |
172 |
|
171 | |||
173 |
|
|
172 | # Emit unmodified token. | |
174 |
|
|
173 | yield adjusttokenpos(t, coloffset) | |
175 |
|
174 | |||
176 | def process(fin, fout, opts): |
|
175 | def process(fin, fout, opts): | |
177 | tokens = tokenize.tokenize(fin.readline) |
|
176 | tokens = tokenize.tokenize(fin.readline) |
General Comments 0
You need to be logged in to leave comments.
Login now