upstream/mercurial-mirror Commit - r42914:26a31c88

byteify-strings: fix misalignment with multi-line parenthesis...

Raphaël Gomès -

r42914:26a31c88 default

parent child

contrib/byteify-strings.py

0 +10 -6

             #!/usr/bin/env python3
             #
             # byteify-strings.py - transform string literals to be Python 3 safe
             #
             # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import, print_function
             import argparse
             import contextlib
             import errno
             import os
             import sys
             import tempfile
             import token
             import tokenize
             def adjusttokenpos(t, ofs):
                 """Adjust start/end column of the given token"""
                 return t._replace(start=(t.start[0], t.start[1] + ofs),
                                   end=(t.end[0], t.end[1] + ofs))
             def replacetokens(tokens, opts):
                 """Transform a stream of tokens from raw to Python 3.
                 Returns a generator of possibly rewritten tokens.
                 The input token list may be mutated as part of processing. However,
                 its changes do not necessarily match the output token stream.
                 """
                 sysstrtokens = set()
                 # The following utility functions access the tokens list and i index of
                 # the for i, t enumerate(tokens) loop below
                 def _isop(j, *o):
                     """Assert that tokens[j] is an OP with one of the given values"""
                     try:
                         return tokens[j].type == token.OP and tokens[j].string in o
                     except IndexError:
                         return False
                 def _findargnofcall(n):
                     """Find arg n of a call expression (start at 0)
                     Returns index of the first token of that argument, or None if
                     there is not that many arguments.
                     Assumes that token[i + 1] is '('.
                     """
                     nested = 0
                     for j in range(i + 2, len(tokens)):
                         if _isop(j, ')', ']', '}'):
                             # end of call, tuple, subscription or dict / set
                             nested -= 1
                             if nested < 0:
                                 return None
                         elif n == 0:
                             # this is the starting position of arg
                             return j
                         elif _isop(j, '(', '[', '{'):
                             nested += 1
                         elif _isop(j, ',') and nested == 0:
                             n -= 1
                     return None
                 def _ensuresysstr(j):
                     """Make sure the token at j is a system string
                     Remember the given token so the string transformer won't add
                     the byte prefix.
                     Ignores tokens that are not strings. Assumes bounds checking has
                     already been done.
                     """
                     k = j
                     currtoken = tokens[k]
                     while currtoken.type in (token.STRING, token.NEWLINE, tokenize.NL):
                         k += 1
                         if (
                             currtoken.type == token.STRING
                             and currtoken.string.startswith(("'", '"'))
                         ):
                             sysstrtokens.add(currtoken)
                         try:
                             currtoken = tokens[k]
                         except IndexError:
                             break
                 def _isitemaccess(j):
                     """Assert the next tokens form an item access on `tokens[j]` and that
                     `tokens[j]` is a name.
                     """
                     try:
                         return (
                             tokens[j].type == token.NAME
                             and _isop(j + 1, '[')
                             and tokens[j + 2].type == token.STRING
                             and _isop(j + 3, ']')
                         )
                     except IndexError:
                         return False
                 def _ismethodcall(j, *methodnames):
                     """Assert the next tokens form a call to `methodname` with a string
                     as first argument on `tokens[j]` and that `tokens[j]` is a name.
                     """
                     try:
                         return (
                             tokens[j].type == token.NAME
                             and _isop(j + 1, '.')
                             and tokens[j + 2].type == token.NAME
                             and tokens[j + 2].string in methodnames
                             and _isop(j + 3, '(')
                             and tokens[j + 4].type == token.STRING
                         )
                     except IndexError:
                         return False
                 coldelta = 0  # column increment for new opening parens
                 coloffset = -1  # column offset for the current line (-1: TBD)
-                parens = [(0, 0, 0)]  # stack of (line, end-column, column-offset)
+                parens = [(0, 0, 0, -1)]  # stack of (line, end-column, column-offset, type)
                 ignorenextline = False  # don't transform the next line
                 insideignoreblock = False # don't transform until turned off
                 for i, t in enumerate(tokens):
                     # Compute the column offset for the current line, such that
                     # the current line will be aligned to the last opening paren
                     # as before.
                     if coloffset < 0:
-                        if t.start[1] == parens[-1][1]:
+                        lastparen = parens[-1]
-                            coloffset = parens[-1][2]
+                        if t.start[1] == lastparen[1]:
-                        elif t.start[1] + 1 == parens[-1][1]:
+                            coloffset = lastparen[2]
+                        elif (
+                            t.start[1] + 1 == lastparen[1]
+                            and lastparen[3] not in (token.NEWLINE, tokenize.NL)
+                        ):
                             # fix misaligned indent of s/util.Abort/error.Abort/
-                            coloffset = parens[-1][2] + (parens[-1][1] - t.start[1])
+                            coloffset = lastparen[2] + (lastparen[1] - t.start[1])
                         else:
                             coloffset = 0
                     # Reset per-line attributes at EOL.
                     if t.type in (token.NEWLINE, tokenize.NL):
                         yield adjusttokenpos(t, coloffset)
                         coldelta = 0
                         coloffset = -1
                         if not insideignoreblock:
                             ignorenextline = (
                                 tokens[i - 1].type == token.COMMENT
                                 and tokens[i - 1].string == "#no-py3-transform"
                             )
                         continue
                     if t.type == token.COMMENT:
                         if t.string == "#py3-transform: off":
                             insideignoreblock = True
                         if t.string == "#py3-transform: on":
                             insideignoreblock = False
                     if ignorenextline or insideignoreblock:
                         yield adjusttokenpos(t, coloffset)
                         continue
                     # Remember the last paren position.
                     if _isop(i, '(', '[', '{'):
-                        parens.append(t.end + (coloffset + coldelta,))
+                        parens.append(t.end + (coloffset + coldelta, tokens[i + 1].type))
                     elif _isop(i, ')', ']', '}'):
                         parens.pop()
                     # Convert most string literals to byte literals. String literals
                     # in Python 2 are bytes. String literals in Python 3 are unicode.
                     # Most strings in Mercurial are bytes and unicode strings are rare.
                     # Rather than rewrite all string literals to use ``b''`` to indicate
                     # byte strings, we apply this token transformer to insert the ``b``
                     # prefix nearly everywhere.
                     if t.type == token.STRING and t not in sysstrtokens:
                         s = t.string
                         # Preserve docstrings as string literals. This is inconsistent
                         # with regular unprefixed strings. However, the
                         # "from __future__" parsing (which allows a module docstring to
                         # exist before it) doesn't properly handle the docstring if it
                         # is b''' prefixed, leading to a SyntaxError. We leave all
                         # docstrings as unprefixed to avoid this. This means Mercurial
                         # components touching docstrings need to handle unicode,
                         # unfortunately.
                         if s[0:3] in ("'''", '"""'):
                             # If it's assigned to something, it's not a docstring
                             if not _isop(i - 1, '='):
                                 yield adjusttokenpos(t, coloffset)
                                 continue
                         # If the first character isn't a quote, it is likely a string
                         # prefixing character (such as 'b', 'u', or 'r'. Ignore.
                         if s[0] not in ("'", '"'):
                             yield adjusttokenpos(t, coloffset)
                             continue
                         # String literal. Prefix to make a b'' string.
                         yield adjusttokenpos(t._replace(string='b%s' % t.string),
                                              coloffset)
                         coldelta += 1
                         continue
                     # This looks like a function call.
                     if t.type == token.NAME and _isop(i + 1, '('):
                         fn = t.string
                         # *attr() builtins don't accept byte strings to 2nd argument.
                         if fn in (
                             'getattr', 'setattr', 'hasattr', 'safehasattr', 'wrapfunction',
                             'wrapclass', 'addattr'
                         ) and (opts['allow-attr-methods'] or not _isop(i - 1, '.')):
                             arg1idx = _findargnofcall(1)
                             if arg1idx is not None:
                                 _ensuresysstr(arg1idx)
                         # .encode() and .decode() on str/bytes/unicode don't accept
                         # byte strings on Python 3.
                         elif fn in ('encode', 'decode') and _isop(i - 1, '.'):
                             for argn in range(2):
                                 argidx = _findargnofcall(argn)
                                 if argidx is not None:
                                     _ensuresysstr(argidx)
                         # It changes iteritems/values to items/values as they are not
                         # present in Python 3 world.
                         elif opts['dictiter'] and fn in ('iteritems', 'itervalues'):
                             yield adjusttokenpos(t._replace(string=fn[4:]), coloffset)
                             continue
                     if t.type == token.NAME and t.string in opts['treat-as-kwargs']:
                         if _isitemaccess(i):
                             _ensuresysstr(i + 2)
                         if _ismethodcall(i, 'get', 'pop', 'setdefault', 'popitem'):
                             _ensuresysstr(i + 4)
                     # Looks like "if __name__ == '__main__'".
                     if (t.type == token.NAME and t.string == '__name__'
                         and _isop(i + 1, '==')):
                         _ensuresysstr(i + 2)
                     # Emit unmodified token.
                     yield adjusttokenpos(t, coloffset)
             def process(fin, fout, opts):
                 tokens = tokenize.tokenize(fin.readline)
                 tokens = replacetokens(list(tokens), opts)
                 fout.write(tokenize.untokenize(tokens))
             def tryunlink(fname):
                 try:
                     os.unlink(fname)
                 except OSError as err:
                     if err.errno != errno.ENOENT:
                         raise
             @contextlib.contextmanager
             def editinplace(fname):
                 n = os.path.basename(fname)
                 d = os.path.dirname(fname)
                 fp = tempfile.NamedTemporaryFile(prefix='.%s-' % n, suffix='~', dir=d,
                                                  delete=False)
                 try:
                     yield fp
                     fp.close()
                     if os.name == 'nt':
                         tryunlink(fname)
                     os.rename(fp.name, fname)
                 finally:
                     fp.close()
                     tryunlink(fp.name)
             def main():
                 ap = argparse.ArgumentParser()
                 ap.add_argument('-i', '--inplace', action='store_true', default=False,
                                 help='edit files in place')
                 ap.add_argument('--dictiter', action='store_true', default=False,
                                 help='rewrite iteritems() and itervalues()'),
                 ap.add_argument('--allow-attr-methods', action='store_true',
                                 default=False,
                                 help='also handle attr*() when they are methods'),
                 ap.add_argument('--treat-as-kwargs', nargs="+", default=[],
                                 help="ignore kwargs-like objects"),
                 ap.add_argument('files', metavar='FILE', nargs='+', help='source file')
                 args = ap.parse_args()
                 opts = {
                     'dictiter': args.dictiter,
                     'treat-as-kwargs': set(args.treat_as_kwargs),
                     'allow-attr-methods': args.allow_attr_methods,
                 }
                 for fname in args.files:
                     if args.inplace:
                         with editinplace(fname) as fout:
                             with open(fname, 'rb') as fin:
                                 process(fin, fout, opts)
                     else:
                         with open(fname, 'rb') as fin:
                             fout = sys.stdout.buffer
                             process(fin, fout, opts)
             if __name__ == '__main__':
                 if sys.version_info.major < 3:
                     print('This script must be run under Python 3.')
                     sys.exit(3)
                 main()

tests/test-byteify-strings.t

0 +44 0

             #require py3
               $ byteify_strings () {
               >   $PYTHON "$TESTDIR/../contrib/byteify-strings.py" "$@"
               > }
             Test in-place
               $ cat > testfile.py <<EOF
               > obj['test'] = b"1234"
               > mydict.iteritems()
               > EOF
               $ byteify_strings testfile.py -i
               $ cat testfile.py
               obj[b'test'] = b"1234"
               mydict.iteritems()
             Test with dictiter
               $ cat > testfile.py <<EOF
               > obj['test'] = b"1234"
               > mydict.iteritems()
               > EOF
               $ byteify_strings testfile.py --dictiter
               obj[b'test'] = b"1234"
               mydict.items()
             Test kwargs-like objects
               $ cat > testfile.py <<EOF
               > kwargs['test'] = "123"
               > kwargs[test['testing']]
               > kwargs[test[[['testing']]]]
               > kwargs[kwargs['testing']]
               > kwargs.get('test')
               > kwargs.pop('test')
               > kwargs.get('test', 'testing')
               > kwargs.pop('test', 'testing')
               > kwargs.setdefault('test', 'testing')
               >
               > opts['test'] = "123"
               > opts[test['testing']]
               > opts[test[[['testing']]]]
               > opts[opts['testing']]
               > opts.get('test')
               > opts.pop('test')
               > opts.get('test', 'testing')
               > opts.pop('test', 'testing')
               > opts.setdefault('test', 'testing')
               >
               > commitopts['test'] = "123"
               > commitopts[test['testing']]
               > commitopts[test[[['testing']]]]
               > commitopts[commitopts['testing']]
               > commitopts.get('test')
               > commitopts.pop('test')
               > commitopts.get('test', 'testing')
               > commitopts.pop('test', 'testing')
               > commitopts.setdefault('test', 'testing')
               > EOF
               $ byteify_strings testfile.py --treat-as-kwargs kwargs opts commitopts
               kwargs['test'] = b"123"
               kwargs[test[b'testing']]
               kwargs[test[[[b'testing']]]]
               kwargs[kwargs['testing']]
               kwargs.get('test')
               kwargs.pop('test')
               kwargs.get('test', b'testing')
               kwargs.pop('test', b'testing')
               kwargs.setdefault('test', b'testing')
               opts['test'] = b"123"
               opts[test[b'testing']]
               opts[test[[[b'testing']]]]
               opts[opts['testing']]
               opts.get('test')
               opts.pop('test')
               opts.get('test', b'testing')
               opts.pop('test', b'testing')
               opts.setdefault('test', b'testing')
               commitopts['test'] = b"123"
               commitopts[test[b'testing']]
               commitopts[test[[[b'testing']]]]
               commitopts[commitopts['testing']]
               commitopts.get('test')
               commitopts.pop('test')
               commitopts.get('test', b'testing')
               commitopts.pop('test', b'testing')
               commitopts.setdefault('test', b'testing')
             Test attr*() as methods
               $ cat > testfile.py <<EOF
               > setattr(o, 'a', 1)
               > util.setattr(o, 'ae', 1)
               > util.getattr(o, 'alksjdf', 'default')
               > util.addattr(o, 'asdf')
               > util.hasattr(o, 'lksjdf', 'default')
               > util.safehasattr(o, 'lksjdf', 'default')
               > @eh.wrapfunction(func, 'lksjdf')
               > def f():
               >     pass
               > @eh.wrapclass(klass, 'lksjdf')
               > def f():
               >     pass
               > EOF
               $ byteify_strings testfile.py --allow-attr-methods
               setattr(o, 'a', 1)
               util.setattr(o, 'ae', 1)
               util.getattr(o, 'alksjdf', b'default')
               util.addattr(o, 'asdf')
               util.hasattr(o, 'lksjdf', b'default')
               util.safehasattr(o, 'lksjdf', b'default')
               @eh.wrapfunction(func, 'lksjdf')
               def f():
                   pass
               @eh.wrapclass(klass, 'lksjdf')
               def f():
                   pass
             Test without attr*() as methods
               $ cat > testfile.py <<EOF
               > setattr(o, 'a', 1)
               > util.setattr(o, 'ae', 1)
               > util.getattr(o, 'alksjdf', 'default')
               > util.addattr(o, 'asdf')
               > util.hasattr(o, 'lksjdf', 'default')
               > util.safehasattr(o, 'lksjdf', 'default')
               > @eh.wrapfunction(func, 'lksjdf')
               > def f():
               >     pass
               > @eh.wrapclass(klass, 'lksjdf')
               > def f():
               >     pass
               > EOF
               $ byteify_strings testfile.py
               setattr(o, 'a', 1)
               util.setattr(o, b'ae', 1)
               util.getattr(o, b'alksjdf', b'default')
               util.addattr(o, b'asdf')
               util.hasattr(o, b'lksjdf', b'default')
               util.safehasattr(o, b'lksjdf', b'default')
               @eh.wrapfunction(func, b'lksjdf')
               def f():
                   pass
               @eh.wrapclass(klass, b'lksjdf')
               def f():
                   pass
             Test ignore comments
               $ cat > testfile.py <<EOF
               > #py3-transform: off
               > "none"
               > "of"
               > 'these'
               > s = """should"""
               > d = '''be'''
               > #py3-transform: on
               > "this should"
               > 'and this also'
               >
               > #no-py3-transform
               > l = "this should be ignored"
               > l2 = "this shouldn't"
               >
               > EOF
               $ byteify_strings testfile.py
               #py3-transform: off
               "none"
               "of"
               'these'
               s = """should"""
               d = '''be'''
               #py3-transform: on
               b"this should"
               b'and this also'
               #no-py3-transform
               l = "this should be ignored"
               l2 = b"this shouldn't"
             Test triple-quoted strings
               $ cat > testfile.py <<EOF
               > """This is ignored
               > """
               >
               > line = """
               >   This should not be
               > """
               > line = '''
               > Neither should this
               > '''
               > EOF
               $ byteify_strings testfile.py
               """This is ignored
               """
               line = b"""
                 This should not be
               """
               line = b'''
               Neither should this
               '''
             Test prefixed strings
               $ cat > testfile.py <<EOF
               > obj['test'] = b"1234"
               > obj[r'test'] = u"1234"
               > EOF
               $ byteify_strings testfile.py
               obj[b'test'] = b"1234"
               obj[r'test'] = u"1234"
+            Test multi-line alignment
+              $ cat > testfile.py <<'EOF'
+              > def foo():
+              >     error.Abort(_("foo"
+              >                  "bar"
+              >                  "%s")
+              >                % parameter)
+              > {
+              >     'test': dict,
+              >     'test2': dict,
+              > }
+              > [
+              >    "thing",
+              >    "thing2"
+              > ]
+              > (
+              >    "tuple",
+              >    "tuple2",
+              > )
+              > {"thing",
+              >  }
+              > EOF
+              $ byteify_strings testfile.py
+              def foo():
+                  error.Abort(_(b"foo"
+                                b"bar"
+                                b"%s")
+                              % parameter)
+              {
+                  b'test': dict,
+                  b'test2': dict,
+              }
+              [
+                 b"thing",
+                 b"thing2"
+              ]
+              (
+                 b"tuple",
+                 b"tuple2",
+              )
+              {b"thing",
+               }

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages