##// END OF EJS Templates
fuzz: try and ensure fuzzer tests run against the right python-config...
fuzz: try and ensure fuzzer tests run against the right python-config Also only under python 3. Differential Revision: https://phab.mercurial-scm.org/D9752

File last commit:

r46554:89a2afe3 default
r46875:11735eaa default
Show More
byteify-strings.py
350 lines | 11.5 KiB | text/x-python | PythonLexer
/ contrib / byteify-strings.py
Yuya Nishihara
byteify-strings: add basic command interface
r38404 #!/usr/bin/env python3
#
Yuya Nishihara
byteify-strings: fork py3 code transformer to make it a standalone command...
r38403 # byteify-strings.py - transform string literals to be Python 3 safe
#
# Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
Augie Fackler
contrib: have byteify-strings explode if run in Python 2...
r42433 from __future__ import absolute_import, print_function
Yuya Nishihara
byteify-strings: fork py3 code transformer to make it a standalone command...
r38403
Yuya Nishihara
byteify-strings: add basic command interface
r38404 import argparse
Yuya Nishihara
byteify-strings: add --inplace option to write back result
r38405 import contextlib
import errno
import os
Yuya Nishihara
byteify-strings: add basic command interface
r38404 import sys
Yuya Nishihara
byteify-strings: add --inplace option to write back result
r38405 import tempfile
Yuya Nishihara
byteify-strings: fork py3 code transformer to make it a standalone command...
r38403 import token
import tokenize
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
byteify-strings: try to preserve column alignment
r38409 def adjusttokenpos(t, ofs):
"""Adjust start/end column of the given token"""
Augie Fackler
formatting: blacken the codebase...
r43346 return t._replace(
start=(t.start[0], t.start[1] + ofs), end=(t.end[0], t.end[1] + ofs)
)
Yuya Nishihara
byteify-strings: try to preserve column alignment
r38409
Yuya Nishihara
byteify-strings: remove superfluous "if True" block
r38410 def replacetokens(tokens, opts):
"""Transform a stream of tokens from raw to Python 3.
Returns a generator of possibly rewritten tokens.
The input token list may be mutated as part of processing. However,
its changes do not necessarily match the output token stream.
"""
sysstrtokens = set()
Yuya Nishihara
byteify-strings: fork py3 code transformer to make it a standalone command...
r38403
Yuya Nishihara
byteify-strings: remove superfluous "if True" block
r38410 # The following utility functions access the tokens list and i index of
# the for i, t enumerate(tokens) loop below
def _isop(j, *o):
"""Assert that tokens[j] is an OP with one of the given values"""
try:
return tokens[j].type == token.OP and tokens[j].string in o
except IndexError:
return False
Yuya Nishihara
byteify-strings: fork py3 code transformer to make it a standalone command...
r38403
Yuya Nishihara
byteify-strings: remove superfluous "if True" block
r38410 def _findargnofcall(n):
"""Find arg n of a call expression (start at 0)
Returns index of the first token of that argument, or None if
there is not that many arguments.
Assumes that token[i + 1] is '('.
Yuya Nishihara
byteify-strings: do not rewrite system string literals to u''...
r38408
Yuya Nishihara
byteify-strings: remove superfluous "if True" block
r38410 """
nested = 0
for j in range(i + 2, len(tokens)):
if _isop(j, ')', ']', '}'):
# end of call, tuple, subscription or dict / set
nested -= 1
if nested < 0:
return None
elif n == 0:
# this is the starting position of arg
return j
elif _isop(j, '(', '[', '{'):
nested += 1
elif _isop(j, ',') and nested == 0:
n -= 1
Yuya Nishihara
byteify-strings: fork py3 code transformer to make it a standalone command...
r38403
Yuya Nishihara
byteify-strings: remove superfluous "if True" block
r38410 return None
def _ensuresysstr(j):
"""Make sure the token at j is a system string
Yuya Nishihara
byteify-strings: fork py3 code transformer to make it a standalone command...
r38403
Yuya Nishihara
byteify-strings: remove superfluous "if True" block
r38410 Remember the given token so the string transformer won't add
the byte prefix.
Yuya Nishihara
byteify-strings: fork py3 code transformer to make it a standalone command...
r38403
Yuya Nishihara
byteify-strings: remove superfluous "if True" block
r38410 Ignores tokens that are not strings. Assumes bounds checking has
already been done.
Yuya Nishihara
byteify-strings: fork py3 code transformer to make it a standalone command...
r38403
Yuya Nishihara
byteify-strings: remove superfluous "if True" block
r38410 """
Raphaël Gomès
byteify-strings: handle multi-line strings in _ensuresysstr...
r42904 k = j
currtoken = tokens[k]
while currtoken.type in (token.STRING, token.NEWLINE, tokenize.NL):
k += 1
Augie Fackler
formatting: blacken the codebase...
r43346 if currtoken.type == token.STRING and currtoken.string.startswith(
("'", '"')
Raphaël Gomès
byteify-strings: handle multi-line strings in _ensuresysstr...
r42904 ):
sysstrtokens.add(currtoken)
try:
currtoken = tokens[k]
except IndexError:
break
Yuya Nishihara
byteify-strings: fork py3 code transformer to make it a standalone command...
r38403
Raphaël Gomès
byteify-strings: add helpers to check for item access or method call...
r42907 def _isitemaccess(j):
"""Assert the next tokens form an item access on `tokens[j]` and that
`tokens[j]` is a name.
"""
try:
return (
tokens[j].type == token.NAME
and _isop(j + 1, '[')
and tokens[j + 2].type == token.STRING
and _isop(j + 3, ']')
)
except IndexError:
return False
def _ismethodcall(j, *methodnames):
"""Assert the next tokens form a call to `methodname` with a string
as first argument on `tokens[j]` and that `tokens[j]` is a name.
"""
try:
return (
tokens[j].type == token.NAME
and _isop(j + 1, '.')
and tokens[j + 2].type == token.NAME
and tokens[j + 2].string in methodnames
and _isop(j + 3, '(')
and tokens[j + 4].type == token.STRING
)
except IndexError:
return False
Yuya Nishihara
byteify-strings: remove superfluous "if True" block
r38410 coldelta = 0 # column increment for new opening parens
coloffset = -1 # column offset for the current line (-1: TBD)
Raphaël Gomès
byteify-strings: fix misalignment with multi-line parenthesis...
r42914 parens = [(0, 0, 0, -1)] # stack of (line, end-column, column-offset, type)
Raphaël Gomès
byteify-strings: add support for ignore comments...
r42906 ignorenextline = False # don't transform the next line
Augie Fackler
formatting: blacken the codebase...
r43346 insideignoreblock = False # don't transform until turned off
Yuya Nishihara
byteify-strings: remove superfluous "if True" block
r38410 for i, t in enumerate(tokens):
# Compute the column offset for the current line, such that
# the current line will be aligned to the last opening paren
# as before.
if coloffset < 0:
Raphaël Gomès
byteify-strings: fix misalignment with multi-line parenthesis...
r42914 lastparen = parens[-1]
if t.start[1] == lastparen[1]:
coloffset = lastparen[2]
Augie Fackler
formatting: blacken the codebase...
r43346 elif t.start[1] + 1 == lastparen[1] and lastparen[3] not in (
token.NEWLINE,
tokenize.NL,
Raphaël Gomès
byteify-strings: fix misalignment with multi-line parenthesis...
r42914 ):
Yuya Nishihara
byteify-strings: remove superfluous "if True" block
r38410 # fix misaligned indent of s/util.Abort/error.Abort/
Raphaël Gomès
byteify-strings: fix misalignment with multi-line parenthesis...
r42914 coloffset = lastparen[2] + (lastparen[1] - t.start[1])
Yuya Nishihara
byteify-strings: remove superfluous "if True" block
r38410 else:
coloffset = 0
Yuya Nishihara
byteify-strings: fork py3 code transformer to make it a standalone command...
r38403
Yuya Nishihara
byteify-strings: remove superfluous "if True" block
r38410 # Reset per-line attributes at EOL.
if t.type in (token.NEWLINE, tokenize.NL):
yield adjusttokenpos(t, coloffset)
coldelta = 0
coloffset = -1
Raphaël Gomès
byteify-strings: add support for ignore comments...
r42906 if not insideignoreblock:
ignorenextline = (
tokens[i - 1].type == token.COMMENT
Raphaël Gomès
byteify-strings: add space in special comments to silence flake8 error...
r42928 and tokens[i - 1].string == "# no-py3-transform"
Raphaël Gomès
byteify-strings: add support for ignore comments...
r42906 )
continue
if t.type == token.COMMENT:
Raphaël Gomès
byteify-strings: add space in special comments to silence flake8 error...
r42928 if t.string == "# py3-transform: off":
Raphaël Gomès
byteify-strings: add support for ignore comments...
r42906 insideignoreblock = True
Raphaël Gomès
byteify-strings: add space in special comments to silence flake8 error...
r42928 if t.string == "# py3-transform: on":
Raphaël Gomès
byteify-strings: add support for ignore comments...
r42906 insideignoreblock = False
if ignorenextline or insideignoreblock:
yield adjusttokenpos(t, coloffset)
Yuya Nishihara
byteify-strings: remove superfluous "if True" block
r38410 continue
# Remember the last paren position.
if _isop(i, '(', '[', '{'):
Raphaël Gomès
byteify-strings: fix misalignment with multi-line parenthesis...
r42914 parens.append(t.end + (coloffset + coldelta, tokens[i + 1].type))
Yuya Nishihara
byteify-strings: remove superfluous "if True" block
r38410 elif _isop(i, ')', ']', '}'):
parens.pop()
Yuya Nishihara
byteify-strings: fork py3 code transformer to make it a standalone command...
r38403
Yuya Nishihara
byteify-strings: remove superfluous "if True" block
r38410 # Convert most string literals to byte literals. String literals
# in Python 2 are bytes. String literals in Python 3 are unicode.
# Most strings in Mercurial are bytes and unicode strings are rare.
# Rather than rewrite all string literals to use ``b''`` to indicate
# byte strings, we apply this token transformer to insert the ``b``
# prefix nearly everywhere.
if t.type == token.STRING and t not in sysstrtokens:
s = t.string
Yuya Nishihara
byteify-strings: try to preserve column alignment
r38409
Yuya Nishihara
byteify-strings: remove superfluous "if True" block
r38410 # Preserve docstrings as string literals. This is inconsistent
# with regular unprefixed strings. However, the
# "from __future__" parsing (which allows a module docstring to
# exist before it) doesn't properly handle the docstring if it
# is b''' prefixed, leading to a SyntaxError. We leave all
# docstrings as unprefixed to avoid this. This means Mercurial
# components touching docstrings need to handle unicode,
# unfortunately.
if s[0:3] in ("'''", '"""'):
Raphaël Gomès
byteify-strings: handle triple quoted strings if they are not docstrings...
r42905 # If it's assigned to something, it's not a docstring
if not _isop(i - 1, '='):
yield adjusttokenpos(t, coloffset)
continue
Yuya Nishihara
byteify-strings: try to preserve column alignment
r38409
Yuya Nishihara
byteify-strings: remove superfluous "if True" block
r38410 # If the first character isn't a quote, it is likely a string
# prefixing character (such as 'b', 'u', or 'r'. Ignore.
if s[0] not in ("'", '"'):
yield adjusttokenpos(t, coloffset)
Yuya Nishihara
byteify-strings: fork py3 code transformer to make it a standalone command...
r38403 continue
Yuya Nishihara
byteify-strings: remove superfluous "if True" block
r38410 # String literal. Prefix to make a b'' string.
Augie Fackler
formatting: blacken the codebase...
r43346 yield adjusttokenpos(t._replace(string='b%s' % t.string), coloffset)
Yuya Nishihara
byteify-strings: remove superfluous "if True" block
r38410 coldelta += 1
continue
Yuya Nishihara
byteify-strings: fork py3 code transformer to make it a standalone command...
r38403
Yuya Nishihara
byteify-strings: remove superfluous "if True" block
r38410 # This looks like a function call.
if t.type == token.NAME and _isop(i + 1, '('):
fn = t.string
# *attr() builtins don't accept byte strings to 2nd argument.
Augie Fackler
formating: upgrade to black 20.8b1...
r46554 if (
fn
in (
'getattr',
'setattr',
'hasattr',
'safehasattr',
'wrapfunction',
'wrapclass',
'addattr',
)
and (opts['allow-attr-methods'] or not _isop(i - 1, '.'))
):
Yuya Nishihara
byteify-strings: remove superfluous "if True" block
r38410 arg1idx = _findargnofcall(1)
if arg1idx is not None:
_ensuresysstr(arg1idx)
Yuya Nishihara
byteify-strings: fork py3 code transformer to make it a standalone command...
r38403
Yuya Nishihara
byteify-strings: remove superfluous "if True" block
r38410 # .encode() and .decode() on str/bytes/unicode don't accept
# byte strings on Python 3.
elif fn in ('encode', 'decode') and _isop(i - 1, '.'):
for argn in range(2):
argidx = _findargnofcall(argn)
if argidx is not None:
_ensuresysstr(argidx)
Yuya Nishihara
byteify-strings: fork py3 code transformer to make it a standalone command...
r38403
Yuya Nishihara
byteify-strings: remove superfluous "if True" block
r38410 # It changes iteritems/values to items/values as they are not
# present in Python 3 world.
elif opts['dictiter'] and fn in ('iteritems', 'itervalues'):
yield adjusttokenpos(t._replace(string=fn[4:]), coloffset)
continue
Yuya Nishihara
byteify-strings: fork py3 code transformer to make it a standalone command...
r38403
Raphaël Gomès
byteify-strings: add --treat-as-kwargs argument to handle kwargs-like objects...
r42908 if t.type == token.NAME and t.string in opts['treat-as-kwargs']:
if _isitemaccess(i):
_ensuresysstr(i + 2)
if _ismethodcall(i, 'get', 'pop', 'setdefault', 'popitem'):
_ensuresysstr(i + 4)
Yuya Nishihara
byteify-strings: prevent "__name__ == '__main__'" from being transformed...
r39139 # Looks like "if __name__ == '__main__'".
Augie Fackler
formatting: blacken the codebase...
r43346 if (
t.type == token.NAME
and t.string == '__name__'
and _isop(i + 1, '==')
):
Yuya Nishihara
byteify-strings: prevent "__name__ == '__main__'" from being transformed...
r39139 _ensuresysstr(i + 2)
Yuya Nishihara
byteify-strings: remove superfluous "if True" block
r38410 # Emit unmodified token.
yield adjusttokenpos(t, coloffset)
Yuya Nishihara
byteify-strings: add basic command interface
r38404
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
byteify-strings: do not rewrite iteritems() and itervalues() by default...
r38407 def process(fin, fout, opts):
Yuya Nishihara
byteify-strings: add basic command interface
r38404 tokens = tokenize.tokenize(fin.readline)
Yuya Nishihara
byteify-strings: do not rewrite iteritems() and itervalues() by default...
r38407 tokens = replacetokens(list(tokens), opts)
Yuya Nishihara
byteify-strings: add basic command interface
r38404 fout.write(tokenize.untokenize(tokens))
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
byteify-strings: add --inplace option to write back result
r38405 def tryunlink(fname):
try:
os.unlink(fname)
except OSError as err:
if err.errno != errno.ENOENT:
raise
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
byteify-strings: add --inplace option to write back result
r38405 @contextlib.contextmanager
def editinplace(fname):
n = os.path.basename(fname)
d = os.path.dirname(fname)
Augie Fackler
formatting: blacken the codebase...
r43346 fp = tempfile.NamedTemporaryFile(
prefix='.%s-' % n, suffix='~', dir=d, delete=False
)
Yuya Nishihara
byteify-strings: add --inplace option to write back result
r38405 try:
yield fp
fp.close()
if os.name == 'nt':
tryunlink(fname)
os.rename(fp.name, fname)
finally:
fp.close()
tryunlink(fp.name)
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
byteify-strings: add basic command interface
r38404 def main():
ap = argparse.ArgumentParser()
Augie Fackler
formatting: blacken the codebase...
r43346 ap.add_argument(
'--version', action='version', version='Byteify strings 1.0'
)
ap.add_argument(
'-i',
'--inplace',
action='store_true',
default=False,
help='edit files in place',
)
ap.add_argument(
'--dictiter',
action='store_true',
default=False,
help='rewrite iteritems() and itervalues()',
),
ap.add_argument(
'--allow-attr-methods',
action='store_true',
default=False,
help='also handle attr*() when they are methods',
),
ap.add_argument(
'--treat-as-kwargs',
nargs="+",
default=[],
help="ignore kwargs-like objects",
),
Yuya Nishihara
byteify-strings: add basic command interface
r38404 ap.add_argument('files', metavar='FILE', nargs='+', help='source file')
args = ap.parse_args()
Yuya Nishihara
byteify-strings: do not rewrite iteritems() and itervalues() by default...
r38407 opts = {
'dictiter': args.dictiter,
Raphaël Gomès
byteify-strings: simplify default value for `--treat-as-kwargs`
r42909 'treat-as-kwargs': set(args.treat_as_kwargs),
Raphaël Gomès
byteify-strings: add cli argument to handle `attr*()` when they are methods...
r42910 'allow-attr-methods': args.allow_attr_methods,
Yuya Nishihara
byteify-strings: do not rewrite iteritems() and itervalues() by default...
r38407 }
Yuya Nishihara
byteify-strings: add basic command interface
r38404 for fname in args.files:
byteify-string: resolve symlink before byteifying...
r45067 fname = os.path.realpath(fname)
Yuya Nishihara
byteify-strings: add --inplace option to write back result
r38405 if args.inplace:
with editinplace(fname) as fout:
with open(fname, 'rb') as fin:
Yuya Nishihara
byteify-strings: do not rewrite iteritems() and itervalues() by default...
r38407 process(fin, fout, opts)
Yuya Nishihara
byteify-strings: add --inplace option to write back result
r38405 else:
with open(fname, 'rb') as fin:
fout = sys.stdout.buffer
Yuya Nishihara
byteify-strings: do not rewrite iteritems() and itervalues() by default...
r38407 process(fin, fout, opts)
Yuya Nishihara
byteify-strings: add basic command interface
r38404
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
byteify-strings: add basic command interface
r38404 if __name__ == '__main__':
Ian Moody
contrib: require Python 3.7 for byteify-strings.py...
r43725 if sys.version_info[0:2] < (3, 7):
print('This script must be run under Python 3.7+')
Augie Fackler
contrib: have byteify-strings explode if run in Python 2...
r42433 sys.exit(3)
Yuya Nishihara
byteify-strings: add basic command interface
r38404 main()