##// END OF EJS Templates
byteify-strings: try to preserve column alignment
Yuya Nishihara -
r38409:47dd23e6 default
parent child Browse files
Show More
@@ -18,6 +18,11 b' import tempfile'
18 import token
18 import token
19 import tokenize
19 import tokenize
20
20
21 def adjusttokenpos(t, ofs):
22 """Adjust start/end column of the given token"""
23 return t._replace(start=(t.start[0], t.start[1] + ofs),
24 end=(t.end[0], t.end[1] + ofs))
25
21 if True:
26 if True:
22 def replacetokens(tokens, opts):
27 def replacetokens(tokens, opts):
23 """Transform a stream of tokens from raw to Python 3.
28 """Transform a stream of tokens from raw to Python 3.
@@ -78,7 +83,35 b' if True:'
78 if st.type == token.STRING and st.string.startswith(("'", '"')):
83 if st.type == token.STRING and st.string.startswith(("'", '"')):
79 sysstrtokens.add(st)
84 sysstrtokens.add(st)
80
85
86 coldelta = 0 # column increment for new opening parens
87 coloffset = -1 # column offset for the current line (-1: TBD)
88 parens = [(0, 0, 0)] # stack of (line, end-column, column-offset)
81 for i, t in enumerate(tokens):
89 for i, t in enumerate(tokens):
90 # Compute the column offset for the current line, such that
91 # the current line will be aligned to the last opening paren
92 # as before.
93 if coloffset < 0:
94 if t.start[1] == parens[-1][1]:
95 coloffset = parens[-1][2]
96 elif t.start[1] + 1 == parens[-1][1]:
97 # fix misaligned indent of s/util.Abort/error.Abort/
98 coloffset = parens[-1][2] + (parens[-1][1] - t.start[1])
99 else:
100 coloffset = 0
101
102 # Reset per-line attributes at EOL.
103 if t.type in (token.NEWLINE, tokenize.NL):
104 yield adjusttokenpos(t, coloffset)
105 coldelta = 0
106 coloffset = -1
107 continue
108
109 # Remember the last paren position.
110 if _isop(i, '(', '[', '{'):
111 parens.append(t.end + (coloffset + coldelta,))
112 elif _isop(i, ')', ']', '}'):
113 parens.pop()
114
82 # Convert most string literals to byte literals. String literals
115 # Convert most string literals to byte literals. String literals
83 # in Python 2 are bytes. String literals in Python 3 are unicode.
116 # in Python 2 are bytes. String literals in Python 3 are unicode.
84 # Most strings in Mercurial are bytes and unicode strings are rare.
117 # Most strings in Mercurial are bytes and unicode strings are rare.
@@ -97,17 +130,19 b' if True:'
97 # components touching docstrings need to handle unicode,
130 # components touching docstrings need to handle unicode,
98 # unfortunately.
131 # unfortunately.
99 if s[0:3] in ("'''", '"""'):
132 if s[0:3] in ("'''", '"""'):
100 yield t
133 yield adjusttokenpos(t, coloffset)
101 continue
134 continue
102
135
103 # If the first character isn't a quote, it is likely a string
136 # If the first character isn't a quote, it is likely a string
104 # prefixing character (such as 'b', 'u', or 'r'. Ignore.
137 # prefixing character (such as 'b', 'u', or 'r'. Ignore.
105 if s[0] not in ("'", '"'):
138 if s[0] not in ("'", '"'):
106 yield t
139 yield adjusttokenpos(t, coloffset)
107 continue
140 continue
108
141
109 # String literal. Prefix to make a b'' string.
142 # String literal. Prefix to make a b'' string.
110 yield t._replace(string='b%s' % t.string)
143 yield adjusttokenpos(t._replace(string='b%s' % t.string),
144 coloffset)
145 coldelta += 1
111 continue
146 continue
112
147
113 # This looks like a function call.
148 # This looks like a function call.
@@ -132,11 +167,11 b' if True:'
132 # It changes iteritems/values to items/values as they are not
167 # It changes iteritems/values to items/values as they are not
133 # present in Python 3 world.
168 # present in Python 3 world.
134 elif opts['dictiter'] and fn in ('iteritems', 'itervalues'):
169 elif opts['dictiter'] and fn in ('iteritems', 'itervalues'):
135 yield t._replace(string=fn[4:])
170 yield adjusttokenpos(t._replace(string=fn[4:]), coloffset)
136 continue
171 continue
137
172
138 # Emit unmodified token.
173 # Emit unmodified token.
139 yield t
174 yield adjusttokenpos(t, coloffset)
140
175
141 def process(fin, fout, opts):
176 def process(fin, fout, opts):
142 tokens = tokenize.tokenize(fin.readline)
177 tokens = tokenize.tokenize(fin.readline)
General Comments 0
You need to be logged in to leave comments. Login now