##// END OF EJS Templates
byteify-strings: do not rewrite system string literals to u''...
Yuya Nishihara -
r38408:1d68fd5f default
parent child Browse files
Show More
@@ -27,6 +27,8 b' if True:'
27 The input token list may be mutated as part of processing. However,
27 The input token list may be mutated as part of processing. However,
28 its changes do not necessarily match the output token stream.
28 its changes do not necessarily match the output token stream.
29 """
29 """
30 sysstrtokens = set()
31
30 # The following utility functions access the tokens list and i index of
32 # The following utility functions access the tokens list and i index of
31 # the for i, t enumerate(tokens) loop below
33 # the for i, t enumerate(tokens) loop below
32 def _isop(j, *o):
34 def _isop(j, *o):
@@ -62,11 +64,11 b' if True:'
62
64
63 return None
65 return None
64
66
65 def _ensureunicode(j):
67 def _ensuresysstr(j):
66 """Make sure the token at j is a unicode string
68 """Make sure the token at j is a system string
67
69
68 This rewrites a string token to include the unicode literal prefix
70 Remember the given token so the string transformer won't add
69 so the string transformer won't add the byte prefix.
71 the byte prefix.
70
72
71 Ignores tokens that are not strings. Assumes bounds checking has
73 Ignores tokens that are not strings. Assumes bounds checking has
72 already been done.
74 already been done.
@@ -74,7 +76,7 b' if True:'
74 """
76 """
75 st = tokens[j]
77 st = tokens[j]
76 if st.type == token.STRING and st.string.startswith(("'", '"')):
78 if st.type == token.STRING and st.string.startswith(("'", '"')):
77 tokens[j] = st._replace(string='u%s' % st.string)
79 sysstrtokens.add(st)
78
80
79 for i, t in enumerate(tokens):
81 for i, t in enumerate(tokens):
80 # Convert most string literals to byte literals. String literals
82 # Convert most string literals to byte literals. String literals
@@ -83,7 +85,7 b' if True:'
83 # Rather than rewrite all string literals to use ``b''`` to indicate
85 # Rather than rewrite all string literals to use ``b''`` to indicate
84 # byte strings, we apply this token transformer to insert the ``b``
86 # byte strings, we apply this token transformer to insert the ``b``
85 # prefix nearly everywhere.
87 # prefix nearly everywhere.
86 if t.type == token.STRING:
88 if t.type == token.STRING and t not in sysstrtokens:
87 s = t.string
89 s = t.string
88
90
89 # Preserve docstrings as string literals. This is inconsistent
91 # Preserve docstrings as string literals. This is inconsistent
@@ -117,7 +119,7 b' if True:'
117 not _isop(i - 1, '.')):
119 not _isop(i - 1, '.')):
118 arg1idx = _findargnofcall(1)
120 arg1idx = _findargnofcall(1)
119 if arg1idx is not None:
121 if arg1idx is not None:
120 _ensureunicode(arg1idx)
122 _ensuresysstr(arg1idx)
121
123
122 # .encode() and .decode() on str/bytes/unicode don't accept
124 # .encode() and .decode() on str/bytes/unicode don't accept
123 # byte strings on Python 3.
125 # byte strings on Python 3.
@@ -125,7 +127,7 b' if True:'
125 for argn in range(2):
127 for argn in range(2):
126 argidx = _findargnofcall(argn)
128 argidx = _findargnofcall(argn)
127 if argidx is not None:
129 if argidx is not None:
128 _ensureunicode(argidx)
130 _ensuresysstr(argidx)
129
131
130 # It changes iteritems/values to items/values as they are not
132 # It changes iteritems/values to items/values as they are not
131 # present in Python 3 world.
133 # present in Python 3 world.
General Comments 0
You need to be logged in to leave comments. Login now