upstream/mercurial-mirror Commit - r30165:42337729

py3: refactor token parsing to handle call args properly...

Martijn Pieters -

r30165:42337729 default

parent child

mercurial/__init__.py

0 +72 -76

@@ -185,6 +185,58 b' if sys.version_info[0] >= 3:'
185	OR CACHED FILES WON'T GET INVALIDATED PROPERLY.	185	OR CACHED FILES WON'T GET INVALIDATED PROPERLY.
186	"""	186	"""
187	futureimpline = False	187	futureimpline = False
		188
		189	# The following utility functions access the tokens list and i index of
		190	# the for i, t enumerate(tokens) loop below
		191	def _isop(j, *o):
		192	"""Assert that tokens[j] is an OP with one of the given values"""
		193	try:
		194	return tokens[j].type == token.OP and tokens[j].string in o
		195	except IndexError:
		196	return False
		197
		198	def _findargnofcall(n):
		199	"""Find arg n of a call expression (start at 0)
		200
		201	Returns index of the first token of that argument, or None if
		202	there is not that many arguments.
		203
		204	Assumes that token[i + 1] is '('.
		205
		206	"""
		207	nested = 0
		208	for j in range(i + 2, len(tokens)):
		209	if _isop(j, ')', ']', '}'):
		210	# end of call, tuple, subscription or dict / set
		211	nested -= 1
		212	if nested < 0:
		213	return None
		214	elif n == 0:
		215	# this is the starting position of arg
		216	return j
		217	elif _isop(j, '(', '[', '{'):
		218	nested += 1
		219	elif _isop(j, ',') and nested == 0:
		220	n -= 1
		221
		222	return None
		223
		224	def _ensureunicode(j):
		225	"""Make sure the token at j is a unicode string
		226
		227	This rewrites a string token to include the unicode literal prefix
		228	so the string transformer won't add the byte prefix.
		229
		230	Ignores tokens that are not strings. Assumes bounds checking has
		231	already been done.
		232
		233	"""
		234	st = tokens[j]
		235	if st.type == token.STRING and st.string.startswith(("'", '"')):
		236	rt = tokenize.TokenInfo(st.type, 'u%s' % st.string,
		237	st.start, st.end, st.line)
		238	tokens[j] = rt
		239
188	for i, t in enumerate(tokens):	240	for i, t in enumerate(tokens):
189	# Convert most string literals to byte literals. String literals	241	# Convert most string literals to byte literals. String literals
190	# in Python 2 are bytes. String literals in Python 3 are unicode.	242	# in Python 2 are bytes. String literals in Python 3 are unicode.
@@ -241,91 +293,35 b' if sys.version_info[0] >= 3:'
241	'')	293	'')
242	continue	294	continue
243		295
244	try:
245	nexttoken = tokens[i + 1]
246	except IndexError:
247	nexttoken = None
248
249	try:
250	prevtoken = tokens[i - 1]
251	except IndexError:
252	prevtoken = None
253
254	# This looks like a function call.	296	# This looks like a function call.
255	if (t.type == token.NAME and ~~nexttoken~~ ~~and~~	297	if t.type == token.NAME and _isop(i + 1, '('):
256	nexttoken.type == token.OP and nexttoken.string == '('):
257	fn = t.string	298	fn = t.string
258		299
259	# *attr() builtins don't accept byte strings to 2nd argument.	300	# *attr() builtins don't accept byte strings to 2nd argument.
260	# Rewrite the token to include the unicode literal prefix so	301	if (fn in ('getattr', 'setattr', 'hasattr', 'safehasattr') and
261	# the string transformer above doesn't add the byte prefix.	302	not _isop(i - 1, '.')):
262	if fn in ('getattr', 'setattr', 'hasattr', 'safehasattr'):	303	arg1idx = _findargnofcall(1)
263	~~try~~:	304	if arg1idx is not None:
264	~~# (NAME, 'getattr'~~)	305	_ensureunicode(arg1idx)
265	# (OP, '(')
266	# (NAME, 'foo')
267	# (OP, ',')
268	# (NAME\|STRING, foo)
269	st = tokens[i + 4]
270	if (st.type == token.STRING and
271	st.string[0] in ("'", '"')):
272	rt = tokenize.TokenInfo(st.type, 'u%s' % st.string,
273	st.start, st.end, st.line)
274	tokens[i + 4] = rt
275	except IndexError:
276	pass
277		306
278	# .encode() and .decode() on str/bytes/unicode don't accept	307	# .encode() and .decode() on str/bytes/unicode don't accept
279	# byte strings on Python 3. ~~Rewrite the token to include the~~	308	# byte strings on Python 3.
280	# unicode literal prefix so the string transformer above doesn't	309	elif fn in ('encode', 'decode') and _isop(i - 1, '.'):
281	# add the byte prefix. The loop helps in handling multiple	310	for argn in range(2):
282	# arguments.	311	argidx = _findargnofcall(argn)
283	if (fn in ('encode', 'decode') and	312	if argidx is not None:
284	prevtoken.type == token.OP and prevtoken.string == '.'):	313	_ensureunicode(argidx)
285	# (OP, '.')
286	# (NAME, 'encode')
287	# (OP, '(')
288	# [(VARIABLE, encoding)]
289	# [(OP, '.')]
290	# [(VARIABLE, encoding)]
291	# [(OP, ',')]
292	# (STRING, 'utf-8')
293	# (OP, ')')
294	j = i
295	try:
296	while (tokens[j + 1].string in ('(', ',', '.')):
297	st = tokens[j + 2]
298	if (st.type == token.STRING and
299	st.string[0] in ("'", '"')):
300	rt = tokenize.TokenInfo(st.type,
301	'u%s' % st.string,
302	st.start, st.end, st.line)
303	tokens[j + 2] = rt
304	j = j + 2
305	except IndexError:
306	pass
307		314
308	# Bare open call (not an attribute on something else)	315	# Bare open call (not an attribute on something else), the
309	if (fn == 'open' and not (prevtoken.type == token.OP and	316	# second argument (mode) must be a string, not bytes
310	prevtoken.string == '.')):	317	elif fn == 'open' and not _isop(i - 1, '.'):
311	try:	318	arg1idx = _findargnofcall(1)
312	~~# (NAME, 'open')~~	319	if arg1idx is not None:
313	~~# (OP, '('~~)	320	_ensureunicode(arg1idx)
314	# (NAME\|STRING, 'filename')
315	# (OP, ',')
316	# (NAME\|STRING, mode)
317	st = tokens[i + 4]
318	if (st.type == token.STRING and
319	st.string[0] in ("'", '"')):
320	rt = tokenize.TokenInfo(st.type, 'u%s' % st.string,
321	st.start, st.end, st.line)
322	tokens[i + 4] = rt
323	except IndexError:
324	pass
325		321
326	# It changes iteritems to items as iteritems is not	322	# It changes iteritems to items as iteritems is not
327	# present in Python 3 world.	323	# present in Python 3 world.
328	if fn == 'iteritems':	324	elif fn == 'iteritems':
329	yield tokenize.TokenInfo(t.type, 'items',	325	yield tokenize.TokenInfo(t.type, 'items',
330	t.start, t.end, t.line)	326	t.start, t.end, t.line)
331	continue	327	continue
@@ -337,7 +333,7 b' if sys.version_info[0] >= 3:'
337	# ``replacetoken`` or any mechanism that changes semantics of module	333	# ``replacetoken`` or any mechanism that changes semantics of module
338	# loading is changed. Otherwise cached bytecode may get loaded without	334	# loading is changed. Otherwise cached bytecode may get loaded without
339	# the new transformation mechanisms applied.	335	# the new transformation mechanisms applied.
340	BYTECODEHEADER = b'HG\x00\x05'	336	BYTECODEHEADER = b'HG\x00\x06'
341		337
342	class hgloader(importlib.machinery.SourceFileLoader):	338	class hgloader(importlib.machinery.SourceFileLoader):
343	"""Custom module loader that transforms source code.	339	"""Custom module loader that transforms source code.

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages