##// END OF EJS Templates
byteify-strings: remove superfluous "if True" block
Yuya Nishihara -
r38410:f77bbd34 default
parent child Browse files
Show More
@@ -23,155 +23,154 b' def adjusttokenpos(t, ofs):'
23 return t._replace(start=(t.start[0], t.start[1] + ofs),
23 return t._replace(start=(t.start[0], t.start[1] + ofs),
24 end=(t.end[0], t.end[1] + ofs))
24 end=(t.end[0], t.end[1] + ofs))
25
25
26 if True:
26 def replacetokens(tokens, opts):
27 def replacetokens(tokens, opts):
27 """Transform a stream of tokens from raw to Python 3.
28 """Transform a stream of tokens from raw to Python 3.
28
29 Returns a generator of possibly rewritten tokens.
30
31 The input token list may be mutated as part of processing. However,
32 its changes do not necessarily match the output token stream.
33 """
34 sysstrtokens = set()
29
35
30 Returns a generator of possibly rewritten tokens.
36 # The following utility functions access the tokens list and i index of
37 # the for i, t enumerate(tokens) loop below
38 def _isop(j, *o):
39 """Assert that tokens[j] is an OP with one of the given values"""
40 try:
41 return tokens[j].type == token.OP and tokens[j].string in o
42 except IndexError:
43 return False
31
44
32 The input token list may be mutated as part of processing. However,
45 def _findargnofcall(n):
33 its changes do not necessarily match the output token stream.
46 """Find arg n of a call expression (start at 0)
34 """
47
35 sysstrtokens = set()
48 Returns index of the first token of that argument, or None if
49 there is not that many arguments.
50
51 Assumes that token[i + 1] is '('.
36
52
37 # The following utility functions access the tokens list and i index of
53 """
38 # the for i, t enumerate(tokens) loop below
54 nested = 0
39 def _isop(j, *o):
55 for j in range(i + 2, len(tokens)):
40 """Assert that tokens[j] is an OP with one of the given values"""
56 if _isop(j, ')', ']', '}'):
41 try:
57 # end of call, tuple, subscription or dict / set
42 return tokens[j].type == token.OP and tokens[j].string in o
58 nested -= 1
43 except IndexError:
59 if nested < 0:
44 return False
60 return None
61 elif n == 0:
62 # this is the starting position of arg
63 return j
64 elif _isop(j, '(', '[', '{'):
65 nested += 1
66 elif _isop(j, ',') and nested == 0:
67 n -= 1
45
68
46 def _findargnofcall(n):
69 return None
47 """Find arg n of a call expression (start at 0)
70
71 def _ensuresysstr(j):
72 """Make sure the token at j is a system string
48
73
49 Returns index of the first token of that argument, or None if
74 Remember the given token so the string transformer won't add
50 there is not that many arguments.
75 the byte prefix.
51
76
52 Assumes that token[i + 1] is '('.
77 Ignores tokens that are not strings. Assumes bounds checking has
78 already been done.
53
79
54 """
80 """
55 nested = 0
81 st = tokens[j]
56 for j in range(i + 2, len(tokens)):
82 if st.type == token.STRING and st.string.startswith(("'", '"')):
57 if _isop(j, ')', ']', '}'):
83 sysstrtokens.add(st)
58 # end of call, tuple, subscription or dict / set
59 nested -= 1
60 if nested < 0:
61 return None
62 elif n == 0:
63 # this is the starting position of arg
64 return j
65 elif _isop(j, '(', '[', '{'):
66 nested += 1
67 elif _isop(j, ',') and nested == 0:
68 n -= 1
69
84
70 return None
85 coldelta = 0 # column increment for new opening parens
71
86 coloffset = -1 # column offset for the current line (-1: TBD)
72 def _ensuresysstr(j):
87 parens = [(0, 0, 0)] # stack of (line, end-column, column-offset)
73 """Make sure the token at j is a system string
88 for i, t in enumerate(tokens):
74
89 # Compute the column offset for the current line, such that
75 Remember the given token so the string transformer won't add
90 # the current line will be aligned to the last opening paren
76 the byte prefix.
91 # as before.
77
92 if coloffset < 0:
78 Ignores tokens that are not strings. Assumes bounds checking has
93 if t.start[1] == parens[-1][1]:
79 already been done.
94 coloffset = parens[-1][2]
95 elif t.start[1] + 1 == parens[-1][1]:
96 # fix misaligned indent of s/util.Abort/error.Abort/
97 coloffset = parens[-1][2] + (parens[-1][1] - t.start[1])
98 else:
99 coloffset = 0
80
100
81 """
101 # Reset per-line attributes at EOL.
82 st = tokens[j]
102 if t.type in (token.NEWLINE, tokenize.NL):
83 if st.type == token.STRING and st.string.startswith(("'", '"')):
103 yield adjusttokenpos(t, coloffset)
84 sysstrtokens.add(st)
104 coldelta = 0
105 coloffset = -1
106 continue
107
108 # Remember the last paren position.
109 if _isop(i, '(', '[', '{'):
110 parens.append(t.end + (coloffset + coldelta,))
111 elif _isop(i, ')', ']', '}'):
112 parens.pop()
85
113
86 coldelta = 0 # column increment for new opening parens
114 # Convert most string literals to byte literals. String literals
87 coloffset = -1 # column offset for the current line (-1: TBD)
115 # in Python 2 are bytes. String literals in Python 3 are unicode.
88 parens = [(0, 0, 0)] # stack of (line, end-column, column-offset)
116 # Most strings in Mercurial are bytes and unicode strings are rare.
89 for i, t in enumerate(tokens):
117 # Rather than rewrite all string literals to use ``b''`` to indicate
90 # Compute the column offset for the current line, such that
118 # byte strings, we apply this token transformer to insert the ``b``
91 # the current line will be aligned to the last opening paren
119 # prefix nearly everywhere.
92 # as before.
120 if t.type == token.STRING and t not in sysstrtokens:
93 if coloffset < 0:
121 s = t.string
94 if t.start[1] == parens[-1][1]:
95 coloffset = parens[-1][2]
96 elif t.start[1] + 1 == parens[-1][1]:
97 # fix misaligned indent of s/util.Abort/error.Abort/
98 coloffset = parens[-1][2] + (parens[-1][1] - t.start[1])
99 else:
100 coloffset = 0
101
122
102 # Reset per-line attributes at EOL.
123 # Preserve docstrings as string literals. This is inconsistent
103 if t.type in (token.NEWLINE, tokenize.NL):
124 # with regular unprefixed strings. However, the
125 # "from __future__" parsing (which allows a module docstring to
126 # exist before it) doesn't properly handle the docstring if it
127 # is b''' prefixed, leading to a SyntaxError. We leave all
128 # docstrings as unprefixed to avoid this. This means Mercurial
129 # components touching docstrings need to handle unicode,
130 # unfortunately.
131 if s[0:3] in ("'''", '"""'):
104 yield adjusttokenpos(t, coloffset)
132 yield adjusttokenpos(t, coloffset)
105 coldelta = 0
106 coloffset = -1
107 continue
133 continue
108
134
109 # Remember the last paren position.
135 # If the first character isn't a quote, it is likely a string
110 if _isop(i, '(', '[', '{'):
136 # prefixing character (such as 'b', 'u', or 'r'. Ignore.
111 parens.append(t.end + (coloffset + coldelta,))
137 if s[0] not in ("'", '"'):
112 elif _isop(i, ')', ']', '}'):
138 yield adjusttokenpos(t, coloffset)
113 parens.pop()
114
115 # Convert most string literals to byte literals. String literals
116 # in Python 2 are bytes. String literals in Python 3 are unicode.
117 # Most strings in Mercurial are bytes and unicode strings are rare.
118 # Rather than rewrite all string literals to use ``b''`` to indicate
119 # byte strings, we apply this token transformer to insert the ``b``
120 # prefix nearly everywhere.
121 if t.type == token.STRING and t not in sysstrtokens:
122 s = t.string
123
124 # Preserve docstrings as string literals. This is inconsistent
125 # with regular unprefixed strings. However, the
126 # "from __future__" parsing (which allows a module docstring to
127 # exist before it) doesn't properly handle the docstring if it
128 # is b''' prefixed, leading to a SyntaxError. We leave all
129 # docstrings as unprefixed to avoid this. This means Mercurial
130 # components touching docstrings need to handle unicode,
131 # unfortunately.
132 if s[0:3] in ("'''", '"""'):
133 yield adjusttokenpos(t, coloffset)
134 continue
135
136 # If the first character isn't a quote, it is likely a string
137 # prefixing character (such as 'b', 'u', or 'r'. Ignore.
138 if s[0] not in ("'", '"'):
139 yield adjusttokenpos(t, coloffset)
140 continue
141
142 # String literal. Prefix to make a b'' string.
143 yield adjusttokenpos(t._replace(string='b%s' % t.string),
144 coloffset)
145 coldelta += 1
146 continue
139 continue
147
140
148 # This looks like a function call.
141 # String literal. Prefix to make a b'' string.
149 if t.type == token.NAME and _isop(i + 1, '('):
142 yield adjusttokenpos(t._replace(string='b%s' % t.string),
150 fn = t.string
143 coloffset)
144 coldelta += 1
145 continue
151
146
152 # *attr() builtins don't accept byte strings to 2nd argument.
147 # This looks like a function call.
153 if (fn in ('getattr', 'setattr', 'hasattr', 'safehasattr') and
148 if t.type == token.NAME and _isop(i + 1, '('):
154 not _isop(i - 1, '.')):
149 fn = t.string
155 arg1idx = _findargnofcall(1)
150
156 if arg1idx is not None:
151 # *attr() builtins don't accept byte strings to 2nd argument.
157 _ensuresysstr(arg1idx)
152 if (fn in ('getattr', 'setattr', 'hasattr', 'safehasattr') and
153 not _isop(i - 1, '.')):
154 arg1idx = _findargnofcall(1)
155 if arg1idx is not None:
156 _ensuresysstr(arg1idx)
158
157
159 # .encode() and .decode() on str/bytes/unicode don't accept
158 # .encode() and .decode() on str/bytes/unicode don't accept
160 # byte strings on Python 3.
159 # byte strings on Python 3.
161 elif fn in ('encode', 'decode') and _isop(i - 1, '.'):
160 elif fn in ('encode', 'decode') and _isop(i - 1, '.'):
162 for argn in range(2):
161 for argn in range(2):
163 argidx = _findargnofcall(argn)
162 argidx = _findargnofcall(argn)
164 if argidx is not None:
163 if argidx is not None:
165 _ensuresysstr(argidx)
164 _ensuresysstr(argidx)
166
165
167 # It changes iteritems/values to items/values as they are not
166 # It changes iteritems/values to items/values as they are not
168 # present in Python 3 world.
167 # present in Python 3 world.
169 elif opts['dictiter'] and fn in ('iteritems', 'itervalues'):
168 elif opts['dictiter'] and fn in ('iteritems', 'itervalues'):
170 yield adjusttokenpos(t._replace(string=fn[4:]), coloffset)
169 yield adjusttokenpos(t._replace(string=fn[4:]), coloffset)
171 continue
170 continue
172
171
173 # Emit unmodified token.
172 # Emit unmodified token.
174 yield adjusttokenpos(t, coloffset)
173 yield adjusttokenpos(t, coloffset)
175
174
176 def process(fin, fout, opts):
175 def process(fin, fout, opts):
177 tokens = tokenize.tokenize(fin.readline)
176 tokens = tokenize.tokenize(fin.readline)
General Comments 0
You need to be logged in to leave comments. Login now