Show More
@@ -185,6 +185,58 b' if sys.version_info[0] >= 3:' | |||||
185 | OR CACHED FILES WON'T GET INVALIDATED PROPERLY. |
|
185 | OR CACHED FILES WON'T GET INVALIDATED PROPERLY. | |
186 | """ |
|
186 | """ | |
187 | futureimpline = False |
|
187 | futureimpline = False | |
|
188 | ||||
|
189 | # The following utility functions access the tokens list and i index of | |||
|
190 | # the for i, t enumerate(tokens) loop below | |||
|
191 | def _isop(j, *o): | |||
|
192 | """Assert that tokens[j] is an OP with one of the given values""" | |||
|
193 | try: | |||
|
194 | return tokens[j].type == token.OP and tokens[j].string in o | |||
|
195 | except IndexError: | |||
|
196 | return False | |||
|
197 | ||||
|
198 | def _findargnofcall(n): | |||
|
199 | """Find arg n of a call expression (start at 0) | |||
|
200 | ||||
|
201 | Returns index of the first token of that argument, or None if | |||
|
202 | there is not that many arguments. | |||
|
203 | ||||
|
204 | Assumes that token[i + 1] is '('. | |||
|
205 | ||||
|
206 | """ | |||
|
207 | nested = 0 | |||
|
208 | for j in range(i + 2, len(tokens)): | |||
|
209 | if _isop(j, ')', ']', '}'): | |||
|
210 | # end of call, tuple, subscription or dict / set | |||
|
211 | nested -= 1 | |||
|
212 | if nested < 0: | |||
|
213 | return None | |||
|
214 | elif n == 0: | |||
|
215 | # this is the starting position of arg | |||
|
216 | return j | |||
|
217 | elif _isop(j, '(', '[', '{'): | |||
|
218 | nested += 1 | |||
|
219 | elif _isop(j, ',') and nested == 0: | |||
|
220 | n -= 1 | |||
|
221 | ||||
|
222 | return None | |||
|
223 | ||||
|
224 | def _ensureunicode(j): | |||
|
225 | """Make sure the token at j is a unicode string | |||
|
226 | ||||
|
227 | This rewrites a string token to include the unicode literal prefix | |||
|
228 | so the string transformer won't add the byte prefix. | |||
|
229 | ||||
|
230 | Ignores tokens that are not strings. Assumes bounds checking has | |||
|
231 | already been done. | |||
|
232 | ||||
|
233 | """ | |||
|
234 | st = tokens[j] | |||
|
235 | if st.type == token.STRING and st.string.startswith(("'", '"')): | |||
|
236 | rt = tokenize.TokenInfo(st.type, 'u%s' % st.string, | |||
|
237 | st.start, st.end, st.line) | |||
|
238 | tokens[j] = rt | |||
|
239 | ||||
188 | for i, t in enumerate(tokens): |
|
240 | for i, t in enumerate(tokens): | |
189 | # Convert most string literals to byte literals. String literals |
|
241 | # Convert most string literals to byte literals. String literals | |
190 | # in Python 2 are bytes. String literals in Python 3 are unicode. |
|
242 | # in Python 2 are bytes. String literals in Python 3 are unicode. | |
@@ -241,91 +293,35 b' if sys.version_info[0] >= 3:' | |||||
241 | '') |
|
293 | '') | |
242 | continue |
|
294 | continue | |
243 |
|
295 | |||
244 | try: |
|
|||
245 | nexttoken = tokens[i + 1] |
|
|||
246 | except IndexError: |
|
|||
247 | nexttoken = None |
|
|||
248 |
|
||||
249 | try: |
|
|||
250 | prevtoken = tokens[i - 1] |
|
|||
251 | except IndexError: |
|
|||
252 | prevtoken = None |
|
|||
253 |
|
||||
254 | # This looks like a function call. |
|
296 | # This looks like a function call. | |
255 |
if |
|
297 | if t.type == token.NAME and _isop(i + 1, '('): | |
256 | nexttoken.type == token.OP and nexttoken.string == '('): |
|
|||
257 | fn = t.string |
|
298 | fn = t.string | |
258 |
|
299 | |||
259 | # *attr() builtins don't accept byte strings to 2nd argument. |
|
300 | # *attr() builtins don't accept byte strings to 2nd argument. | |
260 | # Rewrite the token to include the unicode literal prefix so |
|
301 | if (fn in ('getattr', 'setattr', 'hasattr', 'safehasattr') and | |
261 | # the string transformer above doesn't add the byte prefix. |
|
302 | not _isop(i - 1, '.')): | |
262 | if fn in ('getattr', 'setattr', 'hasattr', 'safehasattr'): |
|
303 | arg1idx = _findargnofcall(1) | |
263 |
|
|
304 | if arg1idx is not None: | |
264 |
|
|
305 | _ensureunicode(arg1idx) | |
265 | # (OP, '(') |
|
|||
266 | # (NAME, 'foo') |
|
|||
267 | # (OP, ',') |
|
|||
268 | # (NAME|STRING, foo) |
|
|||
269 | st = tokens[i + 4] |
|
|||
270 | if (st.type == token.STRING and |
|
|||
271 | st.string[0] in ("'", '"')): |
|
|||
272 | rt = tokenize.TokenInfo(st.type, 'u%s' % st.string, |
|
|||
273 | st.start, st.end, st.line) |
|
|||
274 | tokens[i + 4] = rt |
|
|||
275 | except IndexError: |
|
|||
276 | pass |
|
|||
277 |
|
306 | |||
278 | # .encode() and .decode() on str/bytes/unicode don't accept |
|
307 | # .encode() and .decode() on str/bytes/unicode don't accept | |
279 |
# byte strings on Python 3. |
|
308 | # byte strings on Python 3. | |
280 | # unicode literal prefix so the string transformer above doesn't |
|
309 | elif fn in ('encode', 'decode') and _isop(i - 1, '.'): | |
281 | # add the byte prefix. The loop helps in handling multiple |
|
310 | for argn in range(2): | |
282 | # arguments. |
|
311 | argidx = _findargnofcall(argn) | |
283 | if (fn in ('encode', 'decode') and |
|
312 | if argidx is not None: | |
284 | prevtoken.type == token.OP and prevtoken.string == '.'): |
|
313 | _ensureunicode(argidx) | |
285 | # (OP, '.') |
|
|||
286 | # (NAME, 'encode') |
|
|||
287 | # (OP, '(') |
|
|||
288 | # [(VARIABLE, encoding)] |
|
|||
289 | # [(OP, '.')] |
|
|||
290 | # [(VARIABLE, encoding)] |
|
|||
291 | # [(OP, ',')] |
|
|||
292 | # (STRING, 'utf-8') |
|
|||
293 | # (OP, ')') |
|
|||
294 | j = i |
|
|||
295 | try: |
|
|||
296 | while (tokens[j + 1].string in ('(', ',', '.')): |
|
|||
297 | st = tokens[j + 2] |
|
|||
298 | if (st.type == token.STRING and |
|
|||
299 | st.string[0] in ("'", '"')): |
|
|||
300 | rt = tokenize.TokenInfo(st.type, |
|
|||
301 | 'u%s' % st.string, |
|
|||
302 | st.start, st.end, st.line) |
|
|||
303 | tokens[j + 2] = rt |
|
|||
304 | j = j + 2 |
|
|||
305 | except IndexError: |
|
|||
306 | pass |
|
|||
307 |
|
314 | |||
308 | # Bare open call (not an attribute on something else) |
|
315 | # Bare open call (not an attribute on something else), the | |
309 | if (fn == 'open' and not (prevtoken.type == token.OP and |
|
316 | # second argument (mode) must be a string, not bytes | |
310 | prevtoken.string == '.')): |
|
317 | elif fn == 'open' and not _isop(i - 1, '.'): | |
311 | try: |
|
318 | arg1idx = _findargnofcall(1) | |
312 |
|
|
319 | if arg1idx is not None: | |
313 |
|
|
320 | _ensureunicode(arg1idx) | |
314 | # (NAME|STRING, 'filename') |
|
|||
315 | # (OP, ',') |
|
|||
316 | # (NAME|STRING, mode) |
|
|||
317 | st = tokens[i + 4] |
|
|||
318 | if (st.type == token.STRING and |
|
|||
319 | st.string[0] in ("'", '"')): |
|
|||
320 | rt = tokenize.TokenInfo(st.type, 'u%s' % st.string, |
|
|||
321 | st.start, st.end, st.line) |
|
|||
322 | tokens[i + 4] = rt |
|
|||
323 | except IndexError: |
|
|||
324 | pass |
|
|||
325 |
|
321 | |||
326 | # It changes iteritems to items as iteritems is not |
|
322 | # It changes iteritems to items as iteritems is not | |
327 | # present in Python 3 world. |
|
323 | # present in Python 3 world. | |
328 | if fn == 'iteritems': |
|
324 | elif fn == 'iteritems': | |
329 | yield tokenize.TokenInfo(t.type, 'items', |
|
325 | yield tokenize.TokenInfo(t.type, 'items', | |
330 | t.start, t.end, t.line) |
|
326 | t.start, t.end, t.line) | |
331 | continue |
|
327 | continue | |
@@ -337,7 +333,7 b' if sys.version_info[0] >= 3:' | |||||
337 | # ``replacetoken`` or any mechanism that changes semantics of module |
|
333 | # ``replacetoken`` or any mechanism that changes semantics of module | |
338 | # loading is changed. Otherwise cached bytecode may get loaded without |
|
334 | # loading is changed. Otherwise cached bytecode may get loaded without | |
339 | # the new transformation mechanisms applied. |
|
335 | # the new transformation mechanisms applied. | |
340 |
BYTECODEHEADER = b'HG\x00\x0 |
|
336 | BYTECODEHEADER = b'HG\x00\x06' | |
341 |
|
337 | |||
342 | class hgloader(importlib.machinery.SourceFileLoader): |
|
338 | class hgloader(importlib.machinery.SourceFileLoader): | |
343 | """Custom module loader that transforms source code. |
|
339 | """Custom module loader that transforms source code. |
General Comments 0
You need to be logged in to leave comments.
Login now