upstream/mercurial-mirror Commit - r42914:26a31c88

byteify-strings: fix misalignment with multi-line parenthesis...

Raphaël Gomès -

r42914:26a31c88 default

parent child

contrib/byteify-strings.py

0 +10 -6

              #!/usr/bin/env python3
              #
              # byteify-strings.py - transform string literals to be Python 3 safe
              #
              # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              from __future__ import absolute_import, print_function
              import argparse
              import contextlib
              import errno
              import os
              import sys
              import tempfile
              import token
              import tokenize
              def adjusttokenpos(t, ofs):
                  """Adjust start/end column of the given token"""
                  return t._replace(start=(t.start[0], t.start[1] + ofs),
                                    end=(t.end[0], t.end[1] + ofs))
              def replacetokens(tokens, opts):
                  """Transform a stream of tokens from raw to Python 3.
                  Returns a generator of possibly rewritten tokens.
                  The input token list may be mutated as part of processing. However,
                  its changes do not necessarily match the output token stream.
                  """
                  sysstrtokens = set()
                  # The following utility functions access the tokens list and i index of
                  # the for i, t enumerate(tokens) loop below
                  def _isop(j, *o):
                      """Assert that tokens[j] is an OP with one of the given values"""
                      try:
                          return tokens[j].type == token.OP and tokens[j].string in o
                      except IndexError:
                          return False
                  def _findargnofcall(n):
                      """Find arg n of a call expression (start at 0)
                      Returns index of the first token of that argument, or None if
                      there is not that many arguments.
                      Assumes that token[i + 1] is '('.
                      """
                      nested = 0
                      for j in range(i + 2, len(tokens)):
                          if _isop(j, ')', ']', '}'):
                              # end of call, tuple, subscription or dict / set
                              nested -= 1
                              if nested < 0:
                                  return None
                          elif n == 0:
                              # this is the starting position of arg
                              return j
                          elif _isop(j, '(', '[', '{'):
                              nested += 1
                          elif _isop(j, ',') and nested == 0:
                              n -= 1
                      return None
                  def _ensuresysstr(j):
                      """Make sure the token at j is a system string
                      Remember the given token so the string transformer won't add
                      the byte prefix.
                      Ignores tokens that are not strings. Assumes bounds checking has
                      already been done.
                      """
                      k = j
                      currtoken = tokens[k]
                      while currtoken.type in (token.STRING, token.NEWLINE, tokenize.NL):
                          k += 1
                          if (
                              currtoken.type == token.STRING
                              and currtoken.string.startswith(("'", '"'))
                          ):
                              sysstrtokens.add(currtoken)
                          try:
                              currtoken = tokens[k]
                          except IndexError:
                              break
                  def _isitemaccess(j):
                      """Assert the next tokens form an item access on `tokens[j]` and that
                      `tokens[j]` is a name.
                      """
                      try:
                          return (
                              tokens[j].type == token.NAME
                              and _isop(j + 1, '[')
                              and tokens[j + 2].type == token.STRING
                              and _isop(j + 3, ']')
                          )
                      except IndexError:
                          return False
                  def _ismethodcall(j, *methodnames):
                      """Assert the next tokens form a call to `methodname` with a string
                      as first argument on `tokens[j]` and that `tokens[j]` is a name.
                      """
                      try:
                          return (
                              tokens[j].type == token.NAME
                              and _isop(j + 1, '.')
                              and tokens[j + 2].type == token.NAME
                              and tokens[j + 2].string in methodnames
                              and _isop(j + 3, '(')
                              and tokens[j + 4].type == token.STRING
                          )
                      except IndexError:
                          return False
                  coldelta = 0  # column increment for new opening parens
                  coloffset = -1  # column offset for the current line (-1: TBD)
-                 parens = [(0, 0, 0)]  # stack of (line, end-column, column-offset)
+                 parens = [(0, 0, 0, -1)]  # stack of (line, end-column, column-offset, type)
                  ignorenextline = False  # don't transform the next line
                  insideignoreblock = False # don't transform until turned off
                  for i, t in enumerate(tokens):
                      # Compute the column offset for the current line, such that
                      # the current line will be aligned to the last opening paren
                      # as before.
                      if coloffset < 0:
-                         if t.start[1] == parens[-1][1]:
-                             coloffset = parens[-1][2]
-                         elif t.start[1] + 1 == parens[-1][1]:
+                         lastparen = parens[-1]
+                         if t.start[1] == lastparen[1]:
+                             coloffset = lastparen[2]
+                         elif (
+                             t.start[1] + 1 == lastparen[1]
+                             and lastparen[3] not in (token.NEWLINE, tokenize.NL)
+                         ):
                              # fix misaligned indent of s/util.Abort/error.Abort/
-                             coloffset = parens[-1][2] + (parens[-1][1] - t.start[1])
+                             coloffset = lastparen[2] + (lastparen[1] - t.start[1])
                          else:
                              coloffset = 0
                      # Reset per-line attributes at EOL.
                      if t.type in (token.NEWLINE, tokenize.NL):
                          yield adjusttokenpos(t, coloffset)
                          coldelta = 0
                          coloffset = -1
                          if not insideignoreblock:
                              ignorenextline = (
                                  tokens[i - 1].type == token.COMMENT
                                  and tokens[i - 1].string == "#no-py3-transform"
                              )
                          continue
                      if t.type == token.COMMENT:
                          if t.string == "#py3-transform: off":
                              insideignoreblock = True
                          if t.string == "#py3-transform: on":
                              insideignoreblock = False
                      if ignorenextline or insideignoreblock:
                          yield adjusttokenpos(t, coloffset)
                          continue
                      # Remember the last paren position.
                      if _isop(i, '(', '[', '{'):
-                         parens.append(t.end + (coloffset + coldelta,))
+                         parens.append(t.end + (coloffset + coldelta, tokens[i + 1].type))
                      elif _isop(i, ')', ']', '}'):
                          parens.pop()
                      # Convert most string literals to byte literals. String literals
                      # in Python 2 are bytes. String literals in Python 3 are unicode.
                      # Most strings in Mercurial are bytes and unicode strings are rare.
                      # Rather than rewrite all string literals to use ``b''`` to indicate
                      # byte strings, we apply this token transformer to insert the ``b``
                      # prefix nearly everywhere.
                      if t.type == token.STRING and t not in sysstrtokens:
                          s = t.string
                          # Preserve docstrings as string literals. This is inconsistent
                          # with regular unprefixed strings. However, the
                          # "from __future__" parsing (which allows a module docstring to
                          # exist before it) doesn't properly handle the docstring if it
                          # is b''' prefixed, leading to a SyntaxError. We leave all
                          # docstrings as unprefixed to avoid this. This means Mercurial
                          # components touching docstrings need to handle unicode,
                          # unfortunately.
                          if s[0:3] in ("'''", '"""'):
                              # If it's assigned to something, it's not a docstring
                              if not _isop(i - 1, '='):
                                  yield adjusttokenpos(t, coloffset)
                                  continue
                          # If the first character isn't a quote, it is likely a string
                          # prefixing character (such as 'b', 'u', or 'r'. Ignore.
                          if s[0] not in ("'", '"'):
                              yield adjusttokenpos(t, coloffset)
                              continue
                          # String literal. Prefix to make a b'' string.
                          yield adjusttokenpos(t._replace(string='b%s' % t.string),
                                               coloffset)
                          coldelta += 1
                          continue
                      # This looks like a function call.
                      if t.type == token.NAME and _isop(i + 1, '('):
                          fn = t.string
                          # *attr() builtins don't accept byte strings to 2nd argument.
                          if fn in (
                              'getattr', 'setattr', 'hasattr', 'safehasattr', 'wrapfunction',
                              'wrapclass', 'addattr'
                          ) and (opts['allow-attr-methods'] or not _isop(i - 1, '.')):
                              arg1idx = _findargnofcall(1)
                              if arg1idx is not None:
                                  _ensuresysstr(arg1idx)
                          # .encode() and .decode() on str/bytes/unicode don't accept
                          # byte strings on Python 3.
                          elif fn in ('encode', 'decode') and _isop(i - 1, '.'):
                              for argn in range(2):
                                  argidx = _findargnofcall(argn)
                                  if argidx is not None:
                                      _ensuresysstr(argidx)
                          # It changes iteritems/values to items/values as they are not
                          # present in Python 3 world.
                          elif opts['dictiter'] and fn in ('iteritems', 'itervalues'):
                              yield adjusttokenpos(t._replace(string=fn[4:]), coloffset)
                              continue
                      if t.type == token.NAME and t.string in opts['treat-as-kwargs']:
                          if _isitemaccess(i):
                              _ensuresysstr(i + 2)
                          if _ismethodcall(i, 'get', 'pop', 'setdefault', 'popitem'):
                              _ensuresysstr(i + 4)
                      # Looks like "if __name__ == '__main__'".
                      if (t.type == token.NAME and t.string == '__name__'
                          and _isop(i + 1, '==')):
                          _ensuresysstr(i + 2)
                      # Emit unmodified token.
                      yield adjusttokenpos(t, coloffset)
              def process(fin, fout, opts):
                  tokens = tokenize.tokenize(fin.readline)
                  tokens = replacetokens(list(tokens), opts)
                  fout.write(tokenize.untokenize(tokens))
              def tryunlink(fname):
                  try:
                      os.unlink(fname)
                  except OSError as err:
                      if err.errno != errno.ENOENT:
                          raise
              @contextlib.contextmanager
              def editinplace(fname):
                  n = os.path.basename(fname)
                  d = os.path.dirname(fname)
                  fp = tempfile.NamedTemporaryFile(prefix='.%s-' % n, suffix='~', dir=d,
                                                   delete=False)
                  try:
                      yield fp
                      fp.close()
                      if os.name == 'nt':
                          tryunlink(fname)
                      os.rename(fp.name, fname)
                  finally:
                      fp.close()
                      tryunlink(fp.name)
              def main():
                  ap = argparse.ArgumentParser()
                  ap.add_argument('-i', '--inplace', action='store_true', default=False,
                                  help='edit files in place')
                  ap.add_argument('--dictiter', action='store_true', default=False,
                                  help='rewrite iteritems() and itervalues()'),
                  ap.add_argument('--allow-attr-methods', action='store_true',
                                  default=False,
                                  help='also handle attr*() when they are methods'),
                  ap.add_argument('--treat-as-kwargs', nargs="+", default=[],
                                  help="ignore kwargs-like objects"),
                  ap.add_argument('files', metavar='FILE', nargs='+', help='source file')
                  args = ap.parse_args()
                  opts = {
                      'dictiter': args.dictiter,
                      'treat-as-kwargs': set(args.treat_as_kwargs),
                      'allow-attr-methods': args.allow_attr_methods,
                  }
                  for fname in args.files:
                      if args.inplace:
                          with editinplace(fname) as fout:
                              with open(fname, 'rb') as fin:
                                  process(fin, fout, opts)
                      else:
                          with open(fname, 'rb') as fin:
                              fout = sys.stdout.buffer
                              process(fin, fout, opts)
              if __name__ == '__main__':
                  if sys.version_info.major < 3:
                      print('This script must be run under Python 3.')
                      sys.exit(3)
                  main()

tests/test-byteify-strings.t

0 +44 0

              #require py3
                $ byteify_strings () {
                >   $PYTHON "$TESTDIR/../contrib/byteify-strings.py" "$@"
                > }
              Test in-place
                $ cat > testfile.py <<EOF
                > obj['test'] = b"1234"
                > mydict.iteritems()
                > EOF
                $ byteify_strings testfile.py -i
                $ cat testfile.py
                obj[b'test'] = b"1234"
                mydict.iteritems()
              Test with dictiter
                $ cat > testfile.py <<EOF
                > obj['test'] = b"1234"
                > mydict.iteritems()
                > EOF
                $ byteify_strings testfile.py --dictiter
                obj[b'test'] = b"1234"
                mydict.items()
              Test kwargs-like objects
                $ cat > testfile.py <<EOF
                > kwargs['test'] = "123"
                > kwargs[test['testing']]
                > kwargs[test[[['testing']]]]
                > kwargs[kwargs['testing']]
                > kwargs.get('test')
                > kwargs.pop('test')
                > kwargs.get('test', 'testing')
                > kwargs.pop('test', 'testing')
                > kwargs.setdefault('test', 'testing')
                >
                > opts['test'] = "123"
                > opts[test['testing']]
                > opts[test[[['testing']]]]
                > opts[opts['testing']]
                > opts.get('test')
                > opts.pop('test')
                > opts.get('test', 'testing')
                > opts.pop('test', 'testing')
                > opts.setdefault('test', 'testing')
                >
                > commitopts['test'] = "123"
                > commitopts[test['testing']]
                > commitopts[test[[['testing']]]]
                > commitopts[commitopts['testing']]
                > commitopts.get('test')
                > commitopts.pop('test')
                > commitopts.get('test', 'testing')
                > commitopts.pop('test', 'testing')
                > commitopts.setdefault('test', 'testing')
                > EOF
                $ byteify_strings testfile.py --treat-as-kwargs kwargs opts commitopts
                kwargs['test'] = b"123"
                kwargs[test[b'testing']]
                kwargs[test[[[b'testing']]]]
                kwargs[kwargs['testing']]
                kwargs.get('test')
                kwargs.pop('test')
                kwargs.get('test', b'testing')
                kwargs.pop('test', b'testing')
                kwargs.setdefault('test', b'testing')
                opts['test'] = b"123"
                opts[test[b'testing']]
                opts[test[[[b'testing']]]]
                opts[opts['testing']]
                opts.get('test')
                opts.pop('test')
                opts.get('test', b'testing')
                opts.pop('test', b'testing')
                opts.setdefault('test', b'testing')
                commitopts['test'] = b"123"
                commitopts[test[b'testing']]
                commitopts[test[[[b'testing']]]]
                commitopts[commitopts['testing']]
                commitopts.get('test')
                commitopts.pop('test')
                commitopts.get('test', b'testing')
                commitopts.pop('test', b'testing')
                commitopts.setdefault('test', b'testing')
              Test attr*() as methods
                $ cat > testfile.py <<EOF
                > setattr(o, 'a', 1)
                > util.setattr(o, 'ae', 1)
                > util.getattr(o, 'alksjdf', 'default')
                > util.addattr(o, 'asdf')
                > util.hasattr(o, 'lksjdf', 'default')
                > util.safehasattr(o, 'lksjdf', 'default')
                > @eh.wrapfunction(func, 'lksjdf')
                > def f():
                >     pass
                > @eh.wrapclass(klass, 'lksjdf')
                > def f():
                >     pass
                > EOF
                $ byteify_strings testfile.py --allow-attr-methods
                setattr(o, 'a', 1)
                util.setattr(o, 'ae', 1)
                util.getattr(o, 'alksjdf', b'default')
                util.addattr(o, 'asdf')
                util.hasattr(o, 'lksjdf', b'default')
                util.safehasattr(o, 'lksjdf', b'default')
                @eh.wrapfunction(func, 'lksjdf')
                def f():
                    pass
                @eh.wrapclass(klass, 'lksjdf')
                def f():
                    pass
              Test without attr*() as methods
                $ cat > testfile.py <<EOF
                > setattr(o, 'a', 1)
                > util.setattr(o, 'ae', 1)
                > util.getattr(o, 'alksjdf', 'default')
                > util.addattr(o, 'asdf')
                > util.hasattr(o, 'lksjdf', 'default')
                > util.safehasattr(o, 'lksjdf', 'default')
                > @eh.wrapfunction(func, 'lksjdf')
                > def f():
                >     pass
                > @eh.wrapclass(klass, 'lksjdf')
                > def f():
                >     pass
                > EOF
                $ byteify_strings testfile.py
                setattr(o, 'a', 1)
                util.setattr(o, b'ae', 1)
                util.getattr(o, b'alksjdf', b'default')
                util.addattr(o, b'asdf')
                util.hasattr(o, b'lksjdf', b'default')
                util.safehasattr(o, b'lksjdf', b'default')
                @eh.wrapfunction(func, b'lksjdf')
                def f():
                    pass
                @eh.wrapclass(klass, b'lksjdf')
                def f():
                    pass
              Test ignore comments
                $ cat > testfile.py <<EOF
                > #py3-transform: off
                > "none"
                > "of"
                > 'these'
                > s = """should"""
                > d = '''be'''
                > #py3-transform: on
                > "this should"
                > 'and this also'
                >
                > #no-py3-transform
                > l = "this should be ignored"
                > l2 = "this shouldn't"
                >
                > EOF
                $ byteify_strings testfile.py
                #py3-transform: off
                "none"
                "of"
                'these'
                s = """should"""
                d = '''be'''
                #py3-transform: on
                b"this should"
                b'and this also'
                #no-py3-transform
                l = "this should be ignored"
                l2 = b"this shouldn't"
              Test triple-quoted strings
                $ cat > testfile.py <<EOF
                > """This is ignored
                > """
                >
                > line = """
                >   This should not be
                > """
                > line = '''
                > Neither should this
                > '''
                > EOF
                $ byteify_strings testfile.py
                """This is ignored
                """
                line = b"""
                  This should not be
                """
                line = b'''
                Neither should this
                '''
              Test prefixed strings
                $ cat > testfile.py <<EOF
                > obj['test'] = b"1234"
                > obj[r'test'] = u"1234"
                > EOF
                $ byteify_strings testfile.py
                obj[b'test'] = b"1234"
                obj[r'test'] = u"1234"
+             Test multi-line alignment
+               $ cat > testfile.py <<'EOF'
+               > def foo():
+               >     error.Abort(_("foo"
+               >                  "bar"
+               >                  "%s")
+               >                % parameter)
+               > {
+               >     'test': dict,
+               >     'test2': dict,
+               > }
+               > [
+               >    "thing",
+               >    "thing2"
+               > ]
+               > (
+               >    "tuple",
+               >    "tuple2",
+               > )
+               > {"thing",
+               >  }
+               > EOF
+               $ byteify_strings testfile.py
+               def foo():
+                   error.Abort(_(b"foo"
+                                 b"bar"
+                                 b"%s")
+                               % parameter)
+               {
+                   b'test': dict,
+                   b'test2': dict,
+               }
+               [
+                  b"thing",
+                  b"thing2"
+               ]
+               (
+                  b"tuple",
+                  b"tuple2",
+               )
+               {b"thing",
+                }

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages