upstream/mercurial-mirror Commit - r43361:127cc1f7

py3: stop normalizing .encode()/.decode() arguments to unicode...

Gregory Szorc -

r43361:127cc1f7 default

parent child

contrib/testparseutil.py

0 +1 -1

              # testparseutil.py - utilities to parse test script for check tools
              #
              #  Copyright 2018 FUJIWARA Katsunori <foozy@lares.dti.ne.jp> and others
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              from __future__ import absolute_import, print_function
              import abc
              import re
              import sys
              ####################
              # for Python3 compatibility (almost comes from mercurial/pycompat.py)
              ispy3 = sys.version_info[0] >= 3
              def identity(a):
                  return a
              def _rapply(f, xs):
                  if xs is None:
                      # assume None means non-value of optional data
                      return xs
                  if isinstance(xs, (list, set, tuple)):
                      return type(xs)(_rapply(f, x) for x in xs)
                  if isinstance(xs, dict):
                      return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
                  return f(xs)
              def rapply(f, xs):
                  if f is identity:
                      # fast path mainly for py2
                      return xs
                  return _rapply(f, xs)
              if ispy3:
                  import builtins
                  def bytestr(s):
                      # tiny version of pycompat.bytestr
                      return s.encode('latin1')
                  def sysstr(s):
                      if isinstance(s, builtins.str):
                          return s
-                     return s.decode(u'latin-1')
+                     return s.decode('latin-1')
                  def opentext(f):
                      return open(f, 'r')
              else:
                  bytestr = str
                  sysstr = identity
                  opentext = open
              def b2s(x):
                  # convert BYTES elements in "x" to SYSSTR recursively
                  return rapply(sysstr, x)
              def writeout(data):
                  # write "data" in BYTES into stdout
                  sys.stdout.write(data)
              def writeerr(data):
                  # write "data" in BYTES into stderr
                  sys.stderr.write(data)
              ####################
              class embeddedmatcher(object):
                  """Base class to detect embedded code fragments in *.t test script
                  """
                  __metaclass__ = abc.ABCMeta
                  def __init__(self, desc):
                      self.desc = desc
                  @abc.abstractmethod
                  def startsat(self, line):
                      """Examine whether embedded code starts at line
                      This can return arbitrary object, and it is used as 'ctx' for
                      subsequent method invocations.
                      """
                  @abc.abstractmethod
                  def endsat(self, ctx, line):
                      """Examine whether embedded code ends at line"""
                  @abc.abstractmethod
                  def isinside(self, ctx, line):
                      """Examine whether line is inside embedded code, if not yet endsat
                      """
                  @abc.abstractmethod
                  def ignores(self, ctx):
                      """Examine whether detected embedded code should be ignored"""
                  @abc.abstractmethod
                  def filename(self, ctx):
                      """Return filename of embedded code
                      If filename isn't specified for embedded code explicitly, this
                      returns None.
                      """
                  @abc.abstractmethod
                  def codeatstart(self, ctx, line):
                      """Return actual code at the start line of embedded code
                      This might return None, if the start line doesn't contain
                      actual code.
                      """
                  @abc.abstractmethod
                  def codeatend(self, ctx, line):
                      """Return actual code at the end line of embedded code
                      This might return None, if the end line doesn't contain actual
                      code.
                      """
                  @abc.abstractmethod
                  def codeinside(self, ctx, line):
                      """Return actual code at line inside embedded code"""
              def embedded(basefile, lines, errors, matchers):
                  """pick embedded code fragments up from given lines
                  This is common parsing logic, which examines specified matchers on
                  given lines.
                  :basefile: a name of a file, from which lines to be parsed come.
                  :lines: to be parsed (might be a value returned by "open(basefile)")
                  :errors: an array, into which messages for detected error are stored
                  :matchers: an array of embeddedmatcher objects
                  This function yields '(filename, starts, ends, code)' tuple.
                  :filename: a name of embedded code, if it is explicitly specified
                             (e.g.  "foobar" of "cat >> foobar <<EOF").
                             Otherwise, this is None
                  :starts: line number (1-origin), at which embedded code starts (inclusive)
                  :ends: line number (1-origin), at which embedded code ends (exclusive)
                  :code: extracted embedded code, which is single-stringified
                  >>> class ambigmatcher(object):
                  ...     # mock matcher class to examine implementation of
                  ...     # "ambiguous matching" corner case
                  ...     def __init__(self, desc, matchfunc):
                  ...         self.desc = desc
                  ...         self.matchfunc = matchfunc
                  ...     def startsat(self, line):
                  ...         return self.matchfunc(line)
                  >>> ambig1 = ambigmatcher('ambiguous #1',
                  ...                       lambda l: l.startswith('  $ cat '))
                  >>> ambig2 = ambigmatcher('ambiguous #2',
                  ...                       lambda l: l.endswith('<< EOF\\n'))
                  >>> lines = ['  $ cat > foo.py << EOF\\n']
                  >>> errors = []
                  >>> matchers = [ambig1, ambig2]
                  >>> list(t for t in embedded('<dummy>', lines, errors, matchers))
                  []
                  >>> b2s(errors)
                  ['<dummy>:1: ambiguous line for "ambiguous #1", "ambiguous #2"']
                  """
                  matcher = None
                  ctx = filename = code = startline = None  # for pyflakes
                  for lineno, line in enumerate(lines, 1):
                      if not line.endswith('\n'):
                          line += '\n'  # to normalize EOF line
                      if matcher:  # now, inside embedded code
                          if matcher.endsat(ctx, line):
                              codeatend = matcher.codeatend(ctx, line)
                              if codeatend is not None:
                                  code.append(codeatend)
                              if not matcher.ignores(ctx):
                                  yield (filename, startline, lineno, ''.join(code))
                              matcher = None
                              # DO NOT "continue", because line might start next fragment
                          elif not matcher.isinside(ctx, line):
                              # this is an error of basefile
                              # (if matchers are implemented correctly)
                              errors.append(
                                  '%s:%d: unexpected line for "%s"'
                                  % (basefile, lineno, matcher.desc)
                              )
                              # stop extracting embedded code by current 'matcher',
                              # because appearance of unexpected line might mean
                              # that expected end-of-embedded-code line might never
                              # appear
                              matcher = None
                              # DO NOT "continue", because line might start next fragment
                          else:
                              code.append(matcher.codeinside(ctx, line))
                              continue
                      # examine whether current line starts embedded code or not
                      assert not matcher
                      matched = []
                      for m in matchers:
                          ctx = m.startsat(line)
                          if ctx:
                              matched.append((m, ctx))
                      if matched:
                          if len(matched) > 1:
                              # this is an error of matchers, maybe
                              errors.append(
                                  '%s:%d: ambiguous line for %s'
                                  % (
                                      basefile,
                                      lineno,
                                      ', '.join(['"%s"' % m.desc for m, c in matched]),
                                  )
                              )
                              # omit extracting embedded code, because choosing
                              # arbitrary matcher from matched ones might fail to
                              # detect the end of embedded code as expected.
                              continue
                          matcher, ctx = matched[0]
                          filename = matcher.filename(ctx)
                          code = []
                          codeatstart = matcher.codeatstart(ctx, line)
                          if codeatstart is not None:
                              code.append(codeatstart)
                              startline = lineno
                          else:
                              startline = lineno + 1
                  if matcher:
                      # examine whether EOF ends embedded code, because embedded
                      # code isn't yet ended explicitly
                      if matcher.endsat(ctx, '\n'):
                          codeatend = matcher.codeatend(ctx, '\n')
                          if codeatend is not None:
                              code.append(codeatend)
                          if not matcher.ignores(ctx):
                              yield (filename, startline, lineno + 1, ''.join(code))
                      else:
                          # this is an error of basefile
                          # (if matchers are implemented correctly)
                          errors.append(
                              '%s:%d: unexpected end of file for "%s"'
                              % (basefile, lineno, matcher.desc)
                          )
              # heredoc limit mark to ignore embedded code at check-code.py or so
              heredocignorelimit = 'NO_CHECK_EOF'
              # the pattern to match against cases below, and to return a limit mark
              # string as 'lname' group
              #
              # - << LIMITMARK
              # - << "LIMITMARK"
              # - << 'LIMITMARK'
              heredoclimitpat = r'\s*<<\s*(?P<lquote>["\']?)(?P<limit>\w+)(?P=lquote)'
              class fileheredocmatcher(embeddedmatcher):
                  """Detect "cat > FILE << LIMIT" style embedded code
                  >>> matcher = fileheredocmatcher('heredoc .py file', r'[^<]+\\.py')
                  >>> b2s(matcher.startsat('  $ cat > file.py << EOF\\n'))
                  ('file.py', '  > EOF\\n')
                  >>> b2s(matcher.startsat('  $ cat   >>file.py   <<EOF\\n'))
                  ('file.py', '  > EOF\\n')
                  >>> b2s(matcher.startsat('  $ cat>  \\x27any file.py\\x27<<  "EOF"\\n'))
                  ('any file.py', '  > EOF\\n')
                  >>> b2s(matcher.startsat("  $ cat > file.py << 'ANYLIMIT'\\n"))
                  ('file.py', '  > ANYLIMIT\\n')
                  >>> b2s(matcher.startsat('  $ cat<<ANYLIMIT>"file.py"\\n'))
                  ('file.py', '  > ANYLIMIT\\n')
                  >>> start = '  $ cat > file.py << EOF\\n'
                  >>> ctx = matcher.startsat(start)
                  >>> matcher.codeatstart(ctx, start)
                  >>> b2s(matcher.filename(ctx))
                  'file.py'
                  >>> matcher.ignores(ctx)
                  False
                  >>> inside = '  > foo = 1\\n'
                  >>> matcher.endsat(ctx, inside)
                  False
                  >>> matcher.isinside(ctx, inside)
                  True
                  >>> b2s(matcher.codeinside(ctx, inside))
                  'foo = 1\\n'
                  >>> end = '  > EOF\\n'
                  >>> matcher.endsat(ctx, end)
                  True
                  >>> matcher.codeatend(ctx, end)
                  >>> matcher.endsat(ctx, '  > EOFEOF\\n')
                  False
                  >>> ctx = matcher.startsat('  $ cat > file.py << NO_CHECK_EOF\\n')
                  >>> matcher.ignores(ctx)
                  True
                  """
                  _prefix = '  > '
                  def __init__(self, desc, namepat):
                      super(fileheredocmatcher, self).__init__(desc)
                      # build the pattern to match against cases below (and ">>"
                      # variants), and to return a target filename string as 'name'
                      # group
                      #
                      # - > NAMEPAT
                      # - > "NAMEPAT"
                      # - > 'NAMEPAT'
                      namepat = (
                          r'\s*>>?\s*(?P<nquote>["\']?)(?P<name>%s)(?P=nquote)' % namepat
                      )
                      self._fileres = [
                          # "cat > NAME << LIMIT" case
                          re.compile(r'  \$ \s*cat' + namepat + heredoclimitpat),
                          # "cat << LIMIT > NAME" case
                          re.compile(r'  \$ \s*cat' + heredoclimitpat + namepat),
                      ]
                  def startsat(self, line):
                      # ctx is (filename, END-LINE-OF-EMBEDDED-CODE) tuple
                      for filere in self._fileres:
                          matched = filere.match(line)
                          if matched:
                              return (
                                  matched.group('name'),
                                  '  > %s\n' % matched.group('limit'),
                              )
                  def endsat(self, ctx, line):
                      return ctx[1] == line
                  def isinside(self, ctx, line):
                      return line.startswith(self._prefix)
                  def ignores(self, ctx):
                      return '  > %s\n' % heredocignorelimit == ctx[1]
                  def filename(self, ctx):
                      return ctx[0]
                  def codeatstart(self, ctx, line):
                      return None  # no embedded code at start line
                  def codeatend(self, ctx, line):
                      return None  # no embedded code at end line
                  def codeinside(self, ctx, line):
                      return line[len(self._prefix) :]  # strip prefix
              ####
              # for embedded python script
              class pydoctestmatcher(embeddedmatcher):
                  """Detect ">>> code" style embedded python code
                  >>> matcher = pydoctestmatcher()
                  >>> startline = '  >>> foo = 1\\n'
                  >>> matcher.startsat(startline)
                  True
                  >>> matcher.startsat('  ... foo = 1\\n')
                  False
                  >>> ctx = matcher.startsat(startline)
                  >>> matcher.filename(ctx)
                  >>> matcher.ignores(ctx)
                  False
                  >>> b2s(matcher.codeatstart(ctx, startline))
                  'foo = 1\\n'
                  >>> inside = '  >>> foo = 1\\n'
                  >>> matcher.endsat(ctx, inside)
                  False
                  >>> matcher.isinside(ctx, inside)
                  True
                  >>> b2s(matcher.codeinside(ctx, inside))
                  'foo = 1\\n'
                  >>> inside = '  ... foo = 1\\n'
                  >>> matcher.endsat(ctx, inside)
                  False
                  >>> matcher.isinside(ctx, inside)
                  True
                  >>> b2s(matcher.codeinside(ctx, inside))
                  'foo = 1\\n'
                  >>> inside = '  expected output\\n'
                  >>> matcher.endsat(ctx, inside)
                  False
                  >>> matcher.isinside(ctx, inside)
                  True
                  >>> b2s(matcher.codeinside(ctx, inside))
                  '\\n'
                  >>> inside = '  \\n'
                  >>> matcher.endsat(ctx, inside)
                  False
                  >>> matcher.isinside(ctx, inside)
                  True
                  >>> b2s(matcher.codeinside(ctx, inside))
                  '\\n'
                  >>> end = '  $ foo bar\\n'
                  >>> matcher.endsat(ctx, end)
                  True
                  >>> matcher.codeatend(ctx, end)
                  >>> end = '\\n'
                  >>> matcher.endsat(ctx, end)
                  True
                  >>> matcher.codeatend(ctx, end)
                  """
                  _prefix = '  >>> '
                  _prefixre = re.compile(r'  (>>>|\.\.\.) ')
                  # If a line matches against not _prefixre but _outputre, that line
                  # is "an expected output line" (= not a part of code fragment).
                  #
                  # Strictly speaking, a line matching against "(#if|#else|#endif)"
                  # is also treated similarly in "inline python code" semantics by
                  # run-tests.py. But "directive line inside inline python code"
                  # should be rejected by Mercurial reviewers. Therefore, this
                  # regexp does not matche against such directive lines.
                  _outputre = re.compile(r'  $|  [^$]')
                  def __init__(self):
                      super(pydoctestmatcher, self).__init__("doctest style python code")
                  def startsat(self, line):
                      # ctx is "True"
                      return line.startswith(self._prefix)
                  def endsat(self, ctx, line):
                      return not (self._prefixre.match(line) or self._outputre.match(line))
                  def isinside(self, ctx, line):
                      return True  # always true, if not yet ended
                  def ignores(self, ctx):
                      return False  # should be checked always
                  def filename(self, ctx):
                      return None  # no filename
                  def codeatstart(self, ctx, line):
                      return line[len(self._prefix) :]  # strip prefix '  >>> '/'  ... '
                  def codeatend(self, ctx, line):
                      return None  # no embedded code at end line
                  def codeinside(self, ctx, line):
                      if self._prefixre.match(line):
                          return line[len(self._prefix) :]  # strip prefix '  >>> '/'  ... '
                      return '\n'  # an expected output line is treated as an empty line
              class pyheredocmatcher(embeddedmatcher):
                  """Detect "python << LIMIT" style embedded python code
                  >>> matcher = pyheredocmatcher()
                  >>> b2s(matcher.startsat('  $ python << EOF\\n'))
                  '  > EOF\\n'
                  >>> b2s(matcher.startsat('  $ $PYTHON   <<EOF\\n'))
                  '  > EOF\\n'
                  >>> b2s(matcher.startsat('  $ "$PYTHON"<<  "EOF"\\n'))
                  '  > EOF\\n'
                  >>> b2s(matcher.startsat("  $ $PYTHON << 'ANYLIMIT'\\n"))
                  '  > ANYLIMIT\\n'
                  >>> matcher.startsat('  $ "$PYTHON" < EOF\\n')
                  >>> start = '  $ python << EOF\\n'
                  >>> ctx = matcher.startsat(start)
                  >>> matcher.codeatstart(ctx, start)
                  >>> matcher.filename(ctx)
                  >>> matcher.ignores(ctx)
                  False
                  >>> inside = '  > foo = 1\\n'
                  >>> matcher.endsat(ctx, inside)
                  False
                  >>> matcher.isinside(ctx, inside)
                  True
                  >>> b2s(matcher.codeinside(ctx, inside))
                  'foo = 1\\n'
                  >>> end = '  > EOF\\n'
                  >>> matcher.endsat(ctx, end)
                  True
                  >>> matcher.codeatend(ctx, end)
                  >>> matcher.endsat(ctx, '  > EOFEOF\\n')
                  False
                  >>> ctx = matcher.startsat('  $ python << NO_CHECK_EOF\\n')
                  >>> matcher.ignores(ctx)
                  True
                  """
                  _prefix = '  > '
                  _startre = re.compile(
                      r'  \$ (\$PYTHON|"\$PYTHON"|python).*' + heredoclimitpat
                  )
                  def __init__(self):
                      super(pyheredocmatcher, self).__init__("heredoc python invocation")
                  def startsat(self, line):
                      # ctx is END-LINE-OF-EMBEDDED-CODE
                      matched = self._startre.match(line)
                      if matched:
                          return '  > %s\n' % matched.group('limit')
                  def endsat(self, ctx, line):
                      return ctx == line
                  def isinside(self, ctx, line):
                      return line.startswith(self._prefix)
                  def ignores(self, ctx):
                      return '  > %s\n' % heredocignorelimit == ctx
                  def filename(self, ctx):
                      return None  # no filename
                  def codeatstart(self, ctx, line):
                      return None  # no embedded code at start line
                  def codeatend(self, ctx, line):
                      return None  # no embedded code at end line
                  def codeinside(self, ctx, line):
                      return line[len(self._prefix) :]  # strip prefix
              _pymatchers = [
                  pydoctestmatcher(),
                  pyheredocmatcher(),
                  # use '[^<]+' instead of '\S+', in order to match against
                  # paths including whitespaces
                  fileheredocmatcher('heredoc .py file', r'[^<]+\.py'),
              ]
              def pyembedded(basefile, lines, errors):
                  return embedded(basefile, lines, errors, _pymatchers)
              ####
              # for embedded shell script
              _shmatchers = [
                  # use '[^<]+' instead of '\S+', in order to match against
                  # paths including whitespaces
                  fileheredocmatcher('heredoc .sh file', r'[^<]+\.sh'),
              ]
              def shembedded(basefile, lines, errors):
                  return embedded(basefile, lines, errors, _shmatchers)
              ####
              # for embedded hgrc configuration
              _hgrcmatchers = [
                  # use '[^<]+' instead of '\S+', in order to match against
                  # paths including whitespaces
                  fileheredocmatcher(
                      'heredoc hgrc file', r'(([^/<]+/)+hgrc|\$HGRCPATH|\${HGRCPATH})'
                  ),
              ]
              def hgrcembedded(basefile, lines, errors):
                  return embedded(basefile, lines, errors, _hgrcmatchers)
              ####
              if __name__ == "__main__":
                  import optparse
                  import sys
                  def showembedded(basefile, lines, embeddedfunc, opts):
                      errors = []
                      for name, starts, ends, code in embeddedfunc(basefile, lines, errors):
                          if not name:
                              name = '<anonymous>'
                          writeout("%s:%d: %s starts\n" % (basefile, starts, name))
                          if opts.verbose and code:
                              writeout("  |%s\n" % "\n  |".join(l for l in code.splitlines()))
                          writeout("%s:%d: %s ends\n" % (basefile, ends, name))
                      for e in errors:
                          writeerr("%s\n" % e)
                      return len(errors)
                  def applyembedded(args, embeddedfunc, opts):
                      ret = 0
                      if args:
                          for f in args:
                              with opentext(f) as fp:
                                  if showembedded(f, fp, embeddedfunc, opts):
                                      ret = 1
                      else:
                          lines = [l for l in sys.stdin.readlines()]
                          if showembedded('<stdin>', lines, embeddedfunc, opts):
                              ret = 1
                      return ret
                  commands = {}
                  def command(name, desc):
                      def wrap(func):
                          commands[name] = (desc, func)
                      return wrap
                  @command("pyembedded", "detect embedded python script")
                  def pyembeddedcmd(args, opts):
                      return applyembedded(args, pyembedded, opts)
                  @command("shembedded", "detect embedded shell script")
                  def shembeddedcmd(args, opts):
                      return applyembedded(args, shembedded, opts)
                  @command("hgrcembedded", "detect embedded hgrc configuration")
                  def hgrcembeddedcmd(args, opts):
                      return applyembedded(args, hgrcembedded, opts)
                  availablecommands = "\n".join(
                      ["  - %s: %s" % (key, value[0]) for key, value in commands.items()]
                  )
                  parser = optparse.OptionParser(
                      """%prog COMMAND [file ...]
              Pick up embedded code fragments from given file(s) or stdin, and list
              up start/end lines of them in standard compiler format
              ("FILENAME:LINENO:").
              Available commands are:
              """
                      + availablecommands
                      + """
              """
                  )
                  parser.add_option(
                      "-v",
                      "--verbose",
                      help="enable additional output (e.g. actual code)",
                      action="store_true",
                  )
                  (opts, args) = parser.parse_args()
                  if not args or args[0] not in commands:
                      parser.print_help()
                      sys.exit(255)
                  sys.exit(commands[args[0]][1](args[1:], opts))

hgext/lfs/blobstore.py

0 +1 -1

              # blobstore.py - local and remote (speaking Git-LFS protocol) blob storages
              #
              # Copyright 2017 Facebook, Inc.
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              from __future__ import absolute_import
              import contextlib
              import errno
              import hashlib
              import json
              import os
              import re
              import socket
              from mercurial.i18n import _
              from mercurial.pycompat import getattr
              from mercurial import (
                  encoding,
                  error,
                  node,
                  pathutil,
                  pycompat,
                  url as urlmod,
                  util,
                  vfs as vfsmod,
                  worker,
              )
              from mercurial.utils import stringutil
              from ..largefiles import lfutil
              # 64 bytes for SHA256
              _lfsre = re.compile(br'\A[a-f0-9]{64}\Z')
              class lfsvfs(vfsmod.vfs):
                  def join(self, path):
                      """split the path at first two characters, like: XX/XXXXX..."""
                      if not _lfsre.match(path):
                          raise error.ProgrammingError(b'unexpected lfs path: %s' % path)
                      return super(lfsvfs, self).join(path[0:2], path[2:])
                  def walk(self, path=None, onerror=None):
                      """Yield (dirpath, [], oids) tuple for blobs under path
                      Oids only exist in the root of this vfs, so dirpath is always ''.
                      """
                      root = os.path.normpath(self.base)
                      # when dirpath == root, dirpath[prefixlen:] becomes empty
                      # because len(dirpath) < prefixlen.
                      prefixlen = len(pathutil.normasprefix(root))
                      oids = []
                      for dirpath, dirs, files in os.walk(
                          self.reljoin(self.base, path or b''), onerror=onerror
                      ):
                          dirpath = dirpath[prefixlen:]
                          # Silently skip unexpected files and directories
                          if len(dirpath) == 2:
                              oids.extend(
                                  [dirpath + f for f in files if _lfsre.match(dirpath + f)]
                              )
                      yield (b'', [], oids)
              class nullvfs(lfsvfs):
                  def __init__(self):
                      pass
                  def exists(self, oid):
                      return False
                  def read(self, oid):
                      # store.read() calls into here if the blob doesn't exist in its
                      # self.vfs.  Raise the same error as a normal vfs when asked to read a
                      # file that doesn't exist.  The only difference is the full file path
                      # isn't available in the error.
                      raise IOError(
                          errno.ENOENT,
                          pycompat.sysstr(b'%s: No such file or directory' % oid),
                      )
                  def walk(self, path=None, onerror=None):
                      return (b'', [], [])
                  def write(self, oid, data):
                      pass
              class filewithprogress(object):
                  """a file-like object that supports __len__ and read.
                  Useful to provide progress information for how many bytes are read.
                  """
                  def __init__(self, fp, callback):
                      self._fp = fp
                      self._callback = callback  # func(readsize)
                      fp.seek(0, os.SEEK_END)
                      self._len = fp.tell()
                      fp.seek(0)
                  def __len__(self):
                      return self._len
                  def read(self, size):
                      if self._fp is None:
                          return b''
                      data = self._fp.read(size)
                      if data:
                          if self._callback:
                              self._callback(len(data))
                      else:
                          self._fp.close()
                          self._fp = None
                      return data
              class local(object):
                  """Local blobstore for large file contents.
                  This blobstore is used both as a cache and as a staging area for large blobs
                  to be uploaded to the remote blobstore.
                  """
                  def __init__(self, repo):
                      fullpath = repo.svfs.join(b'lfs/objects')
                      self.vfs = lfsvfs(fullpath)
                      if repo.ui.configbool(b'experimental', b'lfs.disableusercache'):
                          self.cachevfs = nullvfs()
                      else:
                          usercache = lfutil._usercachedir(repo.ui, b'lfs')
                          self.cachevfs = lfsvfs(usercache)
                      self.ui = repo.ui
                  def open(self, oid):
                      """Open a read-only file descriptor to the named blob, in either the
                      usercache or the local store."""
                      # The usercache is the most likely place to hold the file.  Commit will
                      # write to both it and the local store, as will anything that downloads
                      # the blobs.  However, things like clone without an update won't
                      # populate the local store.  For an init + push of a local clone,
                      # the usercache is the only place it _could_ be.  If not present, the
                      # missing file msg here will indicate the local repo, not the usercache.
                      if self.cachevfs.exists(oid):
                          return self.cachevfs(oid, b'rb')
                      return self.vfs(oid, b'rb')
                  def download(self, oid, src):
                      """Read the blob from the remote source in chunks, verify the content,
                      and write to this local blobstore."""
                      sha256 = hashlib.sha256()
                      with self.vfs(oid, b'wb', atomictemp=True) as fp:
                          for chunk in util.filechunkiter(src, size=1048576):
                              fp.write(chunk)
                              sha256.update(chunk)
                          realoid = node.hex(sha256.digest())
                          if realoid != oid:
                              raise LfsCorruptionError(
                                  _(b'corrupt remote lfs object: %s') % oid
                              )
                      self._linktousercache(oid)
                  def write(self, oid, data):
                      """Write blob to local blobstore.
                      This should only be called from the filelog during a commit or similar.
                      As such, there is no need to verify the data.  Imports from a remote
                      store must use ``download()`` instead."""
                      with self.vfs(oid, b'wb', atomictemp=True) as fp:
                          fp.write(data)
                      self._linktousercache(oid)
                  def linkfromusercache(self, oid):
                      """Link blobs found in the user cache into this store.
                      The server module needs to do this when it lets the client know not to
                      upload the blob, to ensure it is always available in this store.
                      Normally this is done implicitly when the client reads or writes the
                      blob, but that doesn't happen when the server tells the client that it
                      already has the blob.
                      """
                      if not isinstance(self.cachevfs, nullvfs) and not self.vfs.exists(oid):
                          self.ui.note(_(b'lfs: found %s in the usercache\n') % oid)
                          lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid))
                  def _linktousercache(self, oid):
                      # XXX: should we verify the content of the cache, and hardlink back to
                      # the local store on success, but truncate, write and link on failure?
                      if not self.cachevfs.exists(oid) and not isinstance(
                          self.cachevfs, nullvfs
                      ):
                          self.ui.note(_(b'lfs: adding %s to the usercache\n') % oid)
                          lfutil.link(self.vfs.join(oid), self.cachevfs.join(oid))
                  def read(self, oid, verify=True):
                      """Read blob from local blobstore."""
                      if not self.vfs.exists(oid):
                          blob = self._read(self.cachevfs, oid, verify)
                          # Even if revlog will verify the content, it needs to be verified
                          # now before making the hardlink to avoid propagating corrupt blobs.
                          # Don't abort if corruption is detected, because `hg verify` will
                          # give more useful info about the corruption- simply don't add the
                          # hardlink.
                          if verify or node.hex(hashlib.sha256(blob).digest()) == oid:
                              self.ui.note(_(b'lfs: found %s in the usercache\n') % oid)
                              lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid))
                      else:
                          self.ui.note(_(b'lfs: found %s in the local lfs store\n') % oid)
                          blob = self._read(self.vfs, oid, verify)
                      return blob
                  def _read(self, vfs, oid, verify):
                      """Read blob (after verifying) from the given store"""
                      blob = vfs.read(oid)
                      if verify:
                          _verify(oid, blob)
                      return blob
                  def verify(self, oid):
                      """Indicate whether or not the hash of the underlying file matches its
                      name."""
                      sha256 = hashlib.sha256()
                      with self.open(oid) as fp:
                          for chunk in util.filechunkiter(fp, size=1048576):
                              sha256.update(chunk)
                      return oid == node.hex(sha256.digest())
                  def has(self, oid):
                      """Returns True if the local blobstore contains the requested blob,
                      False otherwise."""
                      return self.cachevfs.exists(oid) or self.vfs.exists(oid)
              def _urlerrorreason(urlerror):
                  '''Create a friendly message for the given URLError to be used in an
                  LfsRemoteError message.
                  '''
                  inst = urlerror
                  if isinstance(urlerror.reason, Exception):
                      inst = urlerror.reason
                  if util.safehasattr(inst, b'reason'):
                      try:  # usually it is in the form (errno, strerror)
                          reason = inst.reason.args[1]
                      except (AttributeError, IndexError):
                          # it might be anything, for example a string
                          reason = inst.reason
                      if isinstance(reason, pycompat.unicode):
                          # SSLError of Python 2.7.9 contains a unicode
                          reason = encoding.unitolocal(reason)
                      return reason
                  elif getattr(inst, "strerror", None):
                      return encoding.strtolocal(inst.strerror)
                  else:
                      return stringutil.forcebytestr(urlerror)
              class lfsauthhandler(util.urlreq.basehandler):
                  handler_order = 480  # Before HTTPDigestAuthHandler (== 490)
                  def http_error_401(self, req, fp, code, msg, headers):
                      """Enforces that any authentication performed is HTTP Basic
                      Authentication.  No authentication is also acceptable.
                      """
                      authreq = headers.get(r'www-authenticate', None)
                      if authreq:
                          scheme = authreq.split()[0]
                          if scheme.lower() != r'basic':
                              msg = _(b'the server must support Basic Authentication')
                              raise util.urlerr.httperror(
                                  req.get_full_url(),
                                  code,
                                  encoding.strfromlocal(msg),
                                  headers,
                                  fp,
                              )
                      return None
              class _gitlfsremote(object):
                  def __init__(self, repo, url):
                      ui = repo.ui
                      self.ui = ui
                      baseurl, authinfo = url.authinfo()
                      self.baseurl = baseurl.rstrip(b'/')
                      useragent = repo.ui.config(b'experimental', b'lfs.user-agent')
                      if not useragent:
                          useragent = b'git-lfs/2.3.4 (Mercurial %s)' % util.version()
                      self.urlopener = urlmod.opener(ui, authinfo, useragent)
                      self.urlopener.add_handler(lfsauthhandler())
                      self.retry = ui.configint(b'lfs', b'retry')
                  def writebatch(self, pointers, fromstore):
                      """Batch upload from local to remote blobstore."""
                      self._batch(_deduplicate(pointers), fromstore, b'upload')
                  def readbatch(self, pointers, tostore):
                      """Batch download from remote to local blostore."""
                      self._batch(_deduplicate(pointers), tostore, b'download')
                  def _batchrequest(self, pointers, action):
                      """Get metadata about objects pointed by pointers for given action
                      Return decoded JSON object like {'objects': [{'oid': '', 'size': 1}]}
                      See https://github.com/git-lfs/git-lfs/blob/master/docs/api/batch.md
                      """
                      objects = [
                          {r'oid': pycompat.strurl(p.oid()), r'size': p.size()}
                          for p in pointers
                      ]
                      requestdata = pycompat.bytesurl(
                          json.dumps(
                              {r'objects': objects, r'operation': pycompat.strurl(action),}
                          )
                      )
                      url = b'%s/objects/batch' % self.baseurl
                      batchreq = util.urlreq.request(pycompat.strurl(url), data=requestdata)
                      batchreq.add_header(r'Accept', r'application/vnd.git-lfs+json')
                      batchreq.add_header(r'Content-Type', r'application/vnd.git-lfs+json')
                      try:
                          with contextlib.closing(self.urlopener.open(batchreq)) as rsp:
                              rawjson = rsp.read()
                      except util.urlerr.httperror as ex:
                          hints = {
 : _(
                                  b'check that lfs serving is enabled on %s and "%s" is '
                                  b'supported'
                              )
                              % (self.baseurl, action),
 : _(b'the "lfs.url" config may be used to override %s')
                              % self.baseurl,
                          }
                          hint = hints.get(ex.code, _(b'api=%s, action=%s') % (url, action))
                          raise LfsRemoteError(
                              _(b'LFS HTTP error: %s') % stringutil.forcebytestr(ex),
                              hint=hint,
                          )
                      except util.urlerr.urlerror as ex:
                          hint = (
                              _(b'the "lfs.url" config may be used to override %s')
                              % self.baseurl
                          )
                          raise LfsRemoteError(
                              _(b'LFS error: %s') % _urlerrorreason(ex), hint=hint
                          )
                      try:
                          response = json.loads(rawjson)
                      except ValueError:
                          raise LfsRemoteError(
                              _(b'LFS server returns invalid JSON: %s')
                              % rawjson.encode("utf-8")
                          )
                      if self.ui.debugflag:
                          self.ui.debug(b'Status: %d\n' % rsp.status)
                          # lfs-test-server and hg serve return headers in different order
                          headers = pycompat.bytestr(rsp.info()).strip()
                          self.ui.debug(b'%s\n' % b'\n'.join(sorted(headers.splitlines())))
                          if r'objects' in response:
                              response[r'objects'] = sorted(
                                  response[r'objects'], key=lambda p: p[r'oid']
                              )
                          self.ui.debug(
                              b'%s\n'
                              % pycompat.bytesurl(
                                  json.dumps(
                                      response,
                                      indent=2,
                                      separators=(r'', r': '),
                                      sort_keys=True,
                                  )
                              )
                          )
                      def encodestr(x):
                          if isinstance(x, pycompat.unicode):
-                             return x.encode(u'utf-8')
+                             return x.encode('utf-8')
                          return x
                      return pycompat.rapply(encodestr, response)
                  def _checkforservererror(self, pointers, responses, action):
                      """Scans errors from objects
                      Raises LfsRemoteError if any objects have an error"""
                      for response in responses:
                          # The server should return 404 when objects cannot be found. Some
                          # server implementation (ex. lfs-test-server)  does not set "error"
                          # but just removes "download" from "actions". Treat that case
                          # as the same as 404 error.
                          if b'error' not in response:
                              if action == b'download' and action not in response.get(
                                  b'actions', []
                              ):
                                  code = 404
                              else:
                                  continue
                          else:
                              # An error dict without a code doesn't make much sense, so
                              # treat as a server error.
                              code = response.get(b'error').get(b'code', 500)
                          ptrmap = {p.oid(): p for p in pointers}
                          p = ptrmap.get(response[b'oid'], None)
                          if p:
                              filename = getattr(p, 'filename', b'unknown')
                              errors = {
 : b'The object does not exist',
 : b'The object was removed by the owner',
 : b'Validation error',
 : b'Internal server error',
                              }
                              msg = errors.get(code, b'status code %d' % code)
                              raise LfsRemoteError(
                                  _(b'LFS server error for "%s": %s') % (filename, msg)
                              )
                          else:
                              raise LfsRemoteError(
                                  _(b'LFS server error. Unsolicited response for oid %s')
                                  % response[b'oid']
                              )
                  def _extractobjects(self, response, pointers, action):
                      """extract objects from response of the batch API
                      response: parsed JSON object returned by batch API
                      return response['objects'] filtered by action
                      raise if any object has an error
                      """
                      # Scan errors from objects - fail early
                      objects = response.get(b'objects', [])
                      self._checkforservererror(pointers, objects, action)
                      # Filter objects with given action. Practically, this skips uploading
                      # objects which exist in the server.
                      filteredobjects = [
                          o for o in objects if action in o.get(b'actions', [])
                      ]
                      return filteredobjects
                  def _basictransfer(self, obj, action, localstore):
                      """Download or upload a single object using basic transfer protocol
                      obj: dict, an object description returned by batch API
                      action: string, one of ['upload', 'download']
                      localstore: blobstore.local
                      See https://github.com/git-lfs/git-lfs/blob/master/docs/api/\
                      basic-transfers.md
                      """
                      oid = obj[b'oid']
                      href = obj[b'actions'][action].get(b'href')
                      headers = obj[b'actions'][action].get(b'header', {}).items()
                      request = util.urlreq.request(pycompat.strurl(href))
                      if action == b'upload':
                          # If uploading blobs, read data from local blobstore.
                          if not localstore.verify(oid):
                              raise error.Abort(
                                  _(b'detected corrupt lfs object: %s') % oid,
                                  hint=_(b'run hg verify'),
                              )
                          request.data = filewithprogress(localstore.open(oid), None)
                          request.get_method = lambda: r'PUT'
                          request.add_header(r'Content-Type', r'application/octet-stream')
                          request.add_header(r'Content-Length', len(request.data))
                      for k, v in headers:
                          request.add_header(pycompat.strurl(k), pycompat.strurl(v))
                      response = b''
                      try:
                          with contextlib.closing(self.urlopener.open(request)) as req:
                              ui = self.ui  # Shorten debug lines
                              if self.ui.debugflag:
                                  ui.debug(b'Status: %d\n' % req.status)
                                  # lfs-test-server and hg serve return headers in different
                                  # order
                                  headers = pycompat.bytestr(req.info()).strip()
                                  ui.debug(b'%s\n' % b'\n'.join(sorted(headers.splitlines())))
                              if action == b'download':
                                  # If downloading blobs, store downloaded data to local
                                  # blobstore
                                  localstore.download(oid, req)
                              else:
                                  while True:
                                      data = req.read(1048576)
                                      if not data:
                                          break
                                      response += data
                                  if response:
                                      ui.debug(b'lfs %s response: %s' % (action, response))
                      except util.urlerr.httperror as ex:
                          if self.ui.debugflag:
                              self.ui.debug(
                                  b'%s: %s\n' % (oid, ex.read())
                              )  # XXX: also bytes?
                          raise LfsRemoteError(
                              _(b'LFS HTTP error: %s (oid=%s, action=%s)')
                              % (stringutil.forcebytestr(ex), oid, action)
                          )
                      except util.urlerr.urlerror as ex:
                          hint = _(b'attempted connection to %s') % pycompat.bytesurl(
                              util.urllibcompat.getfullurl(request)
                          )
                          raise LfsRemoteError(
                              _(b'LFS error: %s') % _urlerrorreason(ex), hint=hint
                          )
                  def _batch(self, pointers, localstore, action):
                      if action not in [b'upload', b'download']:
                          raise error.ProgrammingError(b'invalid Git-LFS action: %s' % action)
                      response = self._batchrequest(pointers, action)
                      objects = self._extractobjects(response, pointers, action)
                      total = sum(x.get(b'size', 0) for x in objects)
                      sizes = {}
                      for obj in objects:
                          sizes[obj.get(b'oid')] = obj.get(b'size', 0)
                      topic = {
                          b'upload': _(b'lfs uploading'),
                          b'download': _(b'lfs downloading'),
                      }[action]
                      if len(objects) > 1:
                          self.ui.note(
                              _(b'lfs: need to transfer %d objects (%s)\n')
                              % (len(objects), util.bytecount(total))
                          )
                      def transfer(chunk):
                          for obj in chunk:
                              objsize = obj.get(b'size', 0)
                              if self.ui.verbose:
                                  if action == b'download':
                                      msg = _(b'lfs: downloading %s (%s)\n')
                                  elif action == b'upload':
                                      msg = _(b'lfs: uploading %s (%s)\n')
                                  self.ui.note(
                                      msg % (obj.get(b'oid'), util.bytecount(objsize))
                                  )
                              retry = self.retry
                              while True:
                                  try:
                                      self._basictransfer(obj, action, localstore)
                                      yield 1, obj.get(b'oid')
                                      break
                                  except socket.error as ex:
                                      if retry > 0:
                                          self.ui.note(
                                              _(b'lfs: failed: %r (remaining retry %d)\n')
                                              % (stringutil.forcebytestr(ex), retry)
                                          )
                                          retry -= 1
                                          continue
                                      raise
                      # Until https multiplexing gets sorted out
                      if self.ui.configbool(b'experimental', b'lfs.worker-enable'):
                          oids = worker.worker(
                              self.ui,
 .1,
                              transfer,
                              (),
                              sorted(objects, key=lambda o: o.get(b'oid')),
                          )
                      else:
                          oids = transfer(sorted(objects, key=lambda o: o.get(b'oid')))
                      with self.ui.makeprogress(topic, total=total) as progress:
                          progress.update(0)
                          processed = 0
                          blobs = 0
                          for _one, oid in oids:
                              processed += sizes[oid]
                              blobs += 1
                              progress.update(processed)
                              self.ui.note(_(b'lfs: processed: %s\n') % oid)
                      if blobs > 0:
                          if action == b'upload':
                              self.ui.status(
                                  _(b'lfs: uploaded %d files (%s)\n')
                                  % (blobs, util.bytecount(processed))
                              )
                          elif action == b'download':
                              self.ui.status(
                                  _(b'lfs: downloaded %d files (%s)\n')
                                  % (blobs, util.bytecount(processed))
                              )
                  def __del__(self):
                      # copied from mercurial/httppeer.py
                      urlopener = getattr(self, 'urlopener', None)
                      if urlopener:
                          for h in urlopener.handlers:
                              h.close()
                              getattr(h, "close_all", lambda: None)()
              class _dummyremote(object):
                  """Dummy store storing blobs to temp directory."""
                  def __init__(self, repo, url):
                      fullpath = repo.vfs.join(b'lfs', url.path)
                      self.vfs = lfsvfs(fullpath)
                  def writebatch(self, pointers, fromstore):
                      for p in _deduplicate(pointers):
                          content = fromstore.read(p.oid(), verify=True)
                          with self.vfs(p.oid(), b'wb', atomictemp=True) as fp:
                              fp.write(content)
                  def readbatch(self, pointers, tostore):
                      for p in _deduplicate(pointers):
                          with self.vfs(p.oid(), b'rb') as fp:
                              tostore.download(p.oid(), fp)
              class _nullremote(object):
                  """Null store storing blobs to /dev/null."""
                  def __init__(self, repo, url):
                      pass
                  def writebatch(self, pointers, fromstore):
                      pass
                  def readbatch(self, pointers, tostore):
                      pass
              class _promptremote(object):
                  """Prompt user to set lfs.url when accessed."""
                  def __init__(self, repo, url):
                      pass
                  def writebatch(self, pointers, fromstore, ui=None):
                      self._prompt()
                  def readbatch(self, pointers, tostore, ui=None):
                      self._prompt()
                  def _prompt(self):
                      raise error.Abort(_(b'lfs.url needs to be configured'))
              _storemap = {
                  b'https': _gitlfsremote,
                  b'http': _gitlfsremote,
                  b'file': _dummyremote,
                  b'null': _nullremote,
                  None: _promptremote,
              }
              def _deduplicate(pointers):
                  """Remove any duplicate oids that exist in the list"""
                  reduced = util.sortdict()
                  for p in pointers:
                      reduced[p.oid()] = p
                  return reduced.values()
              def _verify(oid, content):
                  realoid = node.hex(hashlib.sha256(content).digest())
                  if realoid != oid:
                      raise LfsCorruptionError(
                          _(b'detected corrupt lfs object: %s') % oid,
                          hint=_(b'run hg verify'),
                      )
              def remote(repo, remote=None):
                  """remotestore factory. return a store in _storemap depending on config
                  If ``lfs.url`` is specified, use that remote endpoint.  Otherwise, try to
                  infer the endpoint, based on the remote repository using the same path
                  adjustments as git.  As an extension, 'http' is supported as well so that
                  ``hg serve`` works out of the box.
                  https://github.com/git-lfs/git-lfs/blob/master/docs/api/server-discovery.md
                  """
                  lfsurl = repo.ui.config(b'lfs', b'url')
                  url = util.url(lfsurl or b'')
                  if lfsurl is None:
                      if remote:
                          path = remote
                      elif util.safehasattr(repo, b'_subtoppath'):
                          # The pull command sets this during the optional update phase, which
                          # tells exactly where the pull originated, whether 'paths.default'
                          # or explicit.
                          path = repo._subtoppath
                      else:
                          # TODO: investigate 'paths.remote:lfsurl' style path customization,
                          # and fall back to inferring from 'paths.remote' if unspecified.
                          path = repo.ui.config(b'paths', b'default') or b''
                      defaulturl = util.url(path)
                      # TODO: support local paths as well.
                      # TODO: consider the ssh -> https transformation that git applies
                      if defaulturl.scheme in (b'http', b'https'):
                          if defaulturl.path and defaulturl.path[:-1] != b'/':
                              defaulturl.path += b'/'
                          defaulturl.path = (defaulturl.path or b'') + b'.git/info/lfs'
                          url = util.url(bytes(defaulturl))
                          repo.ui.note(_(b'lfs: assuming remote store: %s\n') % url)
                  scheme = url.scheme
                  if scheme not in _storemap:
                      raise error.Abort(_(b'lfs: unknown url scheme: %s') % scheme)
                  return _storemap[scheme](repo, url)
              class LfsRemoteError(error.StorageError):
                  pass
              class LfsCorruptionError(error.Abort):
                  """Raised when a corrupt blob is detected, aborting an operation
                  It exists to allow specialized handling on the server side."""

mercurial/__init__.py

0 +1 -9

              # __init__.py - Startup and module loading logic for Mercurial.
              #
              # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              from __future__ import absolute_import
              import sys
              # Allow 'from mercurial import demandimport' to keep working.
              import hgdemandimport
              demandimport = hgdemandimport
              __all__ = []
              # Python 3 uses a custom module loader that transforms source code between
              # source file reading and compilation. This is done by registering a custom
              # finder that changes the spec for Mercurial modules to use a custom loader.
              if sys.version_info[0] >= 3:
                  import importlib
                  import importlib.abc
                  import io
                  import token
                  import tokenize
                  class hgpathentryfinder(importlib.abc.MetaPathFinder):
                      """A sys.meta_path finder that uses a custom module loader."""
                      def find_spec(self, fullname, path, target=None):
                          # Only handle Mercurial-related modules.
                          if not fullname.startswith(('mercurial.', 'hgext.')):
                              return None
                          # don't try to parse binary
                          if fullname.startswith('mercurial.cext.'):
                              return None
                          # third-party packages are expected to be dual-version clean
                          if fullname.startswith('mercurial.thirdparty'):
                              return None
                          # zstd is already dual-version clean, don't try and mangle it
                          if fullname.startswith('mercurial.zstd'):
                              return None
                          # rustext is built for the right python version,
                          # don't try and mangle it
                          if fullname.startswith('mercurial.rustext'):
                              return None
                          # pywatchman is already dual-version clean, don't try and mangle it
                          if fullname.startswith('hgext.fsmonitor.pywatchman'):
                              return None
                          # Try to find the module using other registered finders.
                          spec = None
                          for finder in sys.meta_path:
                              if finder == self:
                                  continue
                              # Originally the API was a `find_module` method, but it was
                              # renamed to `find_spec` in python 3.4, with a new `target`
                              # argument.
                              find_spec_method = getattr(finder, 'find_spec', None)
                              if find_spec_method:
                                  spec = find_spec_method(fullname, path, target=target)
                              else:
                                  spec = finder.find_module(fullname)
                                  if spec is not None:
                                      spec = importlib.util.spec_from_loader(fullname, spec)
                              if spec:
                                  break
                          # This is a Mercurial-related module but we couldn't find it
                          # using the previously-registered finders. This likely means
                          # the module doesn't exist.
                          if not spec:
                              return None
                          # TODO need to support loaders from alternate specs, like zip
                          # loaders.
                          loader = hgloader(spec.name, spec.origin)
                          # Can't use util.safehasattr here because that would require
                          # importing util, and we're in import code.
                          if hasattr(spec.loader, 'loader'):  # hasattr-py3-only
                              # This is a nested loader (maybe a lazy loader?)
                              spec.loader.loader = loader
                          else:
                              spec.loader = loader
                          return spec
                  def replacetokens(tokens, fullname):
                      """Transform a stream of tokens from raw to Python 3.
                      It is called by the custom module loading machinery to rewrite
                      source/tokens between source decoding and compilation.
                      Returns a generator of possibly rewritten tokens.
                      The input token list may be mutated as part of processing. However,
                      its changes do not necessarily match the output token stream.
                      REMEMBER TO CHANGE ``BYTECODEHEADER`` WHEN CHANGING THIS FUNCTION
                      OR CACHED FILES WON'T GET INVALIDATED PROPERLY.
                      """
                      # The following utility functions access the tokens list and i index of
                      # the for i, t enumerate(tokens) loop below
                      def _isop(j, *o):
                          """Assert that tokens[j] is an OP with one of the given values"""
                          try:
                              return tokens[j].type == token.OP and tokens[j].string in o
                          except IndexError:
                              return False
                      def _findargnofcall(n):
                          """Find arg n of a call expression (start at 0)
                          Returns index of the first token of that argument, or None if
                          there is not that many arguments.
                          Assumes that token[i + 1] is '('.
                          """
                          nested = 0
                          for j in range(i + 2, len(tokens)):
                              if _isop(j, ')', ']', '}'):
                                  # end of call, tuple, subscription or dict / set
                                  nested -= 1
                                  if nested < 0:
                                      return None
                              elif n == 0:
                                  # this is the starting position of arg
                                  return j
                              elif _isop(j, '(', '[', '{'):
                                  nested += 1
                              elif _isop(j, ',') and nested == 0:
                                  n -= 1
                          return None
                      def _ensureunicode(j):
                          """Make sure the token at j is a unicode string
                          This rewrites a string token to include the unicode literal prefix
                          so the string transformer won't add the byte prefix.
                          Ignores tokens that are not strings. Assumes bounds checking has
                          already been done.
                          """
                          st = tokens[j]
                          if st.type == token.STRING and st.string.startswith(("'", '"')):
                              tokens[j] = st._replace(string='u%s' % st.string)
                      for i, t in enumerate(tokens):
                          # This looks like a function call.
                          if t.type == token.NAME and _isop(i + 1, '('):
                              fn = t.string
                              # *attr() builtins don't accept byte strings to 2nd argument.
                              if fn in (
                                  'getattr',
                                  'setattr',
                                  'hasattr',
                                  'safehasattr',
                              ) and not _isop(i - 1, '.'):
                                  arg1idx = _findargnofcall(1)
                                  if arg1idx is not None:
                                      _ensureunicode(arg1idx)
-                             # .encode() and .decode() on str/bytes/unicode don't accept
-                             # byte strings on Python 3.
-                             elif fn in ('encode', 'decode') and _isop(i - 1, '.'):
-                                 for argn in range(2):
-                                     argidx = _findargnofcall(argn)
-                                     if argidx is not None:
-                                         _ensureunicode(argidx)
                              # It changes iteritems/values to items/values as they are not
                              # present in Python 3 world.
                              elif fn in ('iteritems', 'itervalues') and not (
                                  tokens[i - 1].type == token.NAME
                                  and tokens[i - 1].string == 'def'
                              ):
                                  yield t._replace(string=fn[4:])
                                  continue
                          # Emit unmodified token.
                          yield t
                  # Header to add to bytecode files. This MUST be changed when
                  # ``replacetoken`` or any mechanism that changes semantics of module
                  # loading is changed. Otherwise cached bytecode may get loaded without
                  # the new transformation mechanisms applied.
-                 BYTECODEHEADER = b'HG\x00\x12'
+                 BYTECODEHEADER = b'HG\x00\x13'
                  class hgloader(importlib.machinery.SourceFileLoader):
                      """Custom module loader that transforms source code.
                      When the source code is converted to a code object, we transform
                      certain patterns to be Python 3 compatible. This allows us to write code
                      that is natively Python 2 and compatible with Python 3 without
                      making the code excessively ugly.
                      We do this by transforming the token stream between parse and compile.
                      Implementing transformations invalidates caching assumptions made
                      by the built-in importer. The built-in importer stores a header on
                      saved bytecode files indicating the Python/bytecode version. If the
                      version changes, the cached bytecode is ignored. The Mercurial
                      transformations could change at any time. This means we need to check
                      that cached bytecode was generated with the current transformation
                      code or there could be a mismatch between cached bytecode and what
                      would be generated from this class.
                      We supplement the bytecode caching layer by wrapping ``get_data``
                      and ``set_data``. These functions are called when the
                      ``SourceFileLoader`` retrieves and saves bytecode cache files,
                      respectively. We simply add an additional header on the file. As
                      long as the version in this file is changed when semantics change,
                      cached bytecode should be invalidated when transformations change.
                      The added header has the form ``HG<VERSION>``. That is a literal
                      ``HG`` with 2 binary bytes indicating the transformation version.
                      """
                      def get_data(self, path):
                          data = super(hgloader, self).get_data(path)
                          if not path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
                              return data
                          # There should be a header indicating the Mercurial transformation
                          # version. If it doesn't exist or doesn't match the current version,
                          # we raise an OSError because that is what
                          # ``SourceFileLoader.get_code()`` expects when loading bytecode
                          # paths to indicate the cached file is "bad."
                          if data[0:2] != b'HG':
                              raise OSError('no hg header')
                          if data[0:4] != BYTECODEHEADER:
                              raise OSError('hg header version mismatch')
                          return data[4:]
                      def set_data(self, path, data, *args, **kwargs):
                          if path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
                              data = BYTECODEHEADER + data
                          return super(hgloader, self).set_data(path, data, *args, **kwargs)
                      def source_to_code(self, data, path):
                          """Perform token transformation before compilation."""
                          buf = io.BytesIO(data)
                          tokens = tokenize.tokenize(buf.readline)
                          data = tokenize.untokenize(replacetokens(list(tokens), self.name))
                          # Python's built-in importer strips frames from exceptions raised
                          # for this code. Unfortunately, that mechanism isn't extensible
                          # and our frame will be blamed for the import failure. There
                          # are extremely hacky ways to do frame stripping. We haven't
                          # implemented them because they are very ugly.
                          return super(hgloader, self).source_to_code(data, path)
                  # We automagically register our custom importer as a side-effect of
                  # loading. This is necessary to ensure that any entry points are able
                  # to import mercurial.* modules without having to perform this
                  # registration themselves.
                  if not any(isinstance(x, hgpathentryfinder) for x in sys.meta_path):
                      # meta_path is used before any implicit finders and before sys.path.
                      sys.meta_path.insert(0, hgpathentryfinder())

mercurial/pycompat.py

0 +5 -5

              # pycompat.py - portability shim for python 3
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              """Mercurial portability shim for python 3.
              This contains aliases to hide python version-specific details from the core.
              """
              from __future__ import absolute_import
              import getopt
              import inspect
              import os
              import shlex
              import sys
              import tempfile
              ispy3 = sys.version_info[0] >= 3
              ispypy = r'__pypy__' in sys.builtin_module_names
              if not ispy3:
                  import cookielib
                  import cPickle as pickle
                  import httplib
                  import Queue as queue
                  import SocketServer as socketserver
                  import xmlrpclib
                  from .thirdparty.concurrent import futures
                  def future_set_exception_info(f, exc_info):
                      f.set_exception_info(*exc_info)
              else:
                  import concurrent.futures as futures
                  import http.cookiejar as cookielib
                  import http.client as httplib
                  import pickle
                  import queue as queue
                  import socketserver
                  import xmlrpc.client as xmlrpclib
                  def future_set_exception_info(f, exc_info):
                      f.set_exception(exc_info[0])
              def identity(a):
                  return a
              def _rapply(f, xs):
                  if xs is None:
                      # assume None means non-value of optional data
                      return xs
                  if isinstance(xs, (list, set, tuple)):
                      return type(xs)(_rapply(f, x) for x in xs)
                  if isinstance(xs, dict):
                      return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
                  return f(xs)
              def rapply(f, xs):
                  """Apply function recursively to every item preserving the data structure
                  >>> def f(x):
                  ...     return 'f(%s)' % x
                  >>> rapply(f, None) is None
                  True
                  >>> rapply(f, 'a')
                  'f(a)'
                  >>> rapply(f, {'a'}) == {'f(a)'}
                  True
                  >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
                  ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
                  >>> xs = [object()]
                  >>> rapply(identity, xs) is xs
                  True
                  """
                  if f is identity:
                      # fast path mainly for py2
                      return xs
                  return _rapply(f, xs)
              if ispy3:
                  import builtins
                  import functools
                  import io
                  import struct
                  fsencode = os.fsencode
                  fsdecode = os.fsdecode
                  oscurdir = os.curdir.encode('ascii')
                  oslinesep = os.linesep.encode('ascii')
                  osname = os.name.encode('ascii')
                  ospathsep = os.pathsep.encode('ascii')
                  ospardir = os.pardir.encode('ascii')
                  ossep = os.sep.encode('ascii')
                  osaltsep = os.altsep
                  if osaltsep:
                      osaltsep = osaltsep.encode('ascii')
                  sysplatform = sys.platform.encode('ascii')
                  sysexecutable = sys.executable
                  if sysexecutable:
                      sysexecutable = os.fsencode(sysexecutable)
                  bytesio = io.BytesIO
                  # TODO deprecate stringio name, as it is a lie on Python 3.
                  stringio = bytesio
                  def maplist(*args):
                      return list(map(*args))
                  def rangelist(*args):
                      return list(range(*args))
                  def ziplist(*args):
                      return list(zip(*args))
                  rawinput = input
                  getargspec = inspect.getfullargspec
                  long = int
                  # TODO: .buffer might not exist if std streams were replaced; we'll need
                  # a silly wrapper to make a bytes stream backed by a unicode one.
                  stdin = sys.stdin.buffer
                  stdout = sys.stdout.buffer
                  stderr = sys.stderr.buffer
                  # Since Python 3 converts argv to wchar_t type by Py_DecodeLocale() on Unix,
                  # we can use os.fsencode() to get back bytes argv.
                  #
                  # https://hg.python.org/cpython/file/v3.5.1/Programs/python.c#l55
                  #
                  # TODO: On Windows, the native argv is wchar_t, so we'll need a different
                  # workaround to simulate the Python 2 (i.e. ANSI Win32 API) behavior.
                  if getattr(sys, 'argv', None) is not None:
                      sysargv = list(map(os.fsencode, sys.argv))
                  bytechr = struct.Struct(r'>B').pack
                  byterepr = b'%r'.__mod__
                  class bytestr(bytes):
                      """A bytes which mostly acts as a Python 2 str
                      >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
                      ('', 'foo', 'ascii', '1')
                      >>> s = bytestr(b'foo')
                      >>> assert s is bytestr(s)
                      __bytes__() should be called if provided:
                      >>> class bytesable(object):
                      ...     def __bytes__(self):
                      ...         return b'bytes'
                      >>> bytestr(bytesable())
                      'bytes'
                      There's no implicit conversion from non-ascii str as its encoding is
                      unknown:
                      >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
                      Traceback (most recent call last):
                        ...
                      UnicodeEncodeError: ...
                      Comparison between bytestr and bytes should work:
                      >>> assert bytestr(b'foo') == b'foo'
                      >>> assert b'foo' == bytestr(b'foo')
                      >>> assert b'f' in bytestr(b'foo')
                      >>> assert bytestr(b'f') in b'foo'
                      Sliced elements should be bytes, not integer:
                      >>> s[1], s[:2]
                      (b'o', b'fo')
                      >>> list(s), list(reversed(s))
                      ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
                      As bytestr type isn't propagated across operations, you need to cast
                      bytes to bytestr explicitly:
                      >>> s = bytestr(b'foo').upper()
                      >>> t = bytestr(s)
                      >>> s[0], t[0]
                      (70, b'F')
                      Be careful to not pass a bytestr object to a function which expects
                      bytearray-like behavior.
                      >>> t = bytes(t)  # cast to bytes
                      >>> assert type(t) is bytes
                      """
                      def __new__(cls, s=b''):
                          if isinstance(s, bytestr):
                              return s
                          if not isinstance(
                              s, (bytes, bytearray)
                          ) and not hasattr(  # hasattr-py3-only
                              s, u'__bytes__'
                          ):
-                             s = str(s).encode(u'ascii')
+                             s = str(s).encode('ascii')
                          return bytes.__new__(cls, s)
                      def __getitem__(self, key):
                          s = bytes.__getitem__(self, key)
                          if not isinstance(s, bytes):
                              s = bytechr(s)
                          return s
                      def __iter__(self):
                          return iterbytestr(bytes.__iter__(self))
                      def __repr__(self):
                          return bytes.__repr__(self)[1:]  # drop b''
                  def iterbytestr(s):
                      """Iterate bytes as if it were a str object of Python 2"""
                      return map(bytechr, s)
                  def maybebytestr(s):
                      """Promote bytes to bytestr"""
                      if isinstance(s, bytes):
                          return bytestr(s)
                      return s
                  def sysbytes(s):
                      """Convert an internal str (e.g. keyword, __doc__) back to bytes
                      This never raises UnicodeEncodeError, but only ASCII characters
                      can be round-trip by sysstr(sysbytes(s)).
                      """
-                     return s.encode(u'utf-8')
+                     return s.encode('utf-8')
                  def sysstr(s):
                      """Return a keyword str to be passed to Python functions such as
                      getattr() and str.encode()
                      This never raises UnicodeDecodeError. Non-ascii characters are
                      considered invalid and mapped to arbitrary but unique code points
                      such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
                      """
                      if isinstance(s, builtins.str):
                          return s
-                     return s.decode(u'latin-1')
+                     return s.decode('latin-1')
                  def strurl(url):
                      """Converts a bytes url back to str"""
                      if isinstance(url, bytes):
-                         return url.decode(u'ascii')
+                         return url.decode('ascii')
                      return url
                  def bytesurl(url):
                      """Converts a str url to bytes by encoding in ascii"""
                      if isinstance(url, str):
-                         return url.encode(u'ascii')
+                         return url.encode('ascii')
                      return url
                  def raisewithtb(exc, tb):
                      """Raise exception with the given traceback"""
                      raise exc.with_traceback(tb)
                  def getdoc(obj):
                      """Get docstring as bytes; may be None so gettext() won't confuse it
                      with _('')"""
                      doc = getattr(obj, u'__doc__', None)
                      if doc is None:
                          return doc
                      return sysbytes(doc)
                  def _wrapattrfunc(f):
                      @functools.wraps(f)
                      def w(object, name, *args):
                          return f(object, sysstr(name), *args)
                      return w
                  # these wrappers are automagically imported by hgloader
                  delattr = _wrapattrfunc(builtins.delattr)
                  getattr = _wrapattrfunc(builtins.getattr)
                  hasattr = _wrapattrfunc(builtins.hasattr)
                  setattr = _wrapattrfunc(builtins.setattr)
                  xrange = builtins.range
                  unicode = str
                  def open(name, mode=b'r', buffering=-1, encoding=None):
                      return builtins.open(name, sysstr(mode), buffering, encoding)
                  safehasattr = _wrapattrfunc(builtins.hasattr)
                  def _getoptbwrapper(orig, args, shortlist, namelist):
                      """
                      Takes bytes arguments, converts them to unicode, pass them to
                      getopt.getopt(), convert the returned values back to bytes and then
                      return them for Python 3 compatibility as getopt.getopt() don't accepts
                      bytes on Python 3.
                      """
                      args = [a.decode('latin-1') for a in args]
                      shortlist = shortlist.decode('latin-1')
                      namelist = [a.decode('latin-1') for a in namelist]
                      opts, args = orig(args, shortlist, namelist)
                      opts = [(a[0].encode('latin-1'), a[1].encode('latin-1')) for a in opts]
                      args = [a.encode('latin-1') for a in args]
                      return opts, args
                  def strkwargs(dic):
                      """
                      Converts the keys of a python dictonary to str i.e. unicodes so that
                      they can be passed as keyword arguments as dictonaries with bytes keys
                      can't be passed as keyword arguments to functions on Python 3.
                      """
                      dic = dict((k.decode('latin-1'), v) for k, v in dic.iteritems())
                      return dic
                  def byteskwargs(dic):
                      """
                      Converts keys of python dictonaries to bytes as they were converted to
                      str to pass that dictonary as a keyword argument on Python 3.
                      """
                      dic = dict((k.encode('latin-1'), v) for k, v in dic.iteritems())
                      return dic
                  # TODO: handle shlex.shlex().
                  def shlexsplit(s, comments=False, posix=True):
                      """
                      Takes bytes argument, convert it to str i.e. unicodes, pass that into
                      shlex.split(), convert the returned value to bytes and return that for
                      Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
                      """
                      ret = shlex.split(s.decode('latin-1'), comments, posix)
                      return [a.encode('latin-1') for a in ret]
                  shlexquote = shlex.quote
              else:
                  import cStringIO
                  import pipes
                  xrange = xrange
                  unicode = unicode
                  bytechr = chr
                  byterepr = repr
                  bytestr = str
                  iterbytestr = iter
                  maybebytestr = identity
                  sysbytes = identity
                  sysstr = identity
                  strurl = identity
                  bytesurl = identity
                  open = open
                  delattr = delattr
                  getattr = getattr
                  hasattr = hasattr
                  setattr = setattr
                  # this can't be parsed on Python 3
                  exec(b'def raisewithtb(exc, tb):\n' b'    raise exc, None, tb\n')
                  def fsencode(filename):
                      """
                      Partial backport from os.py in Python 3, which only accepts bytes.
                      In Python 2, our paths should only ever be bytes, a unicode path
                      indicates a bug.
                      """
                      if isinstance(filename, str):
                          return filename
                      else:
                          raise TypeError(r"expect str, not %s" % type(filename).__name__)
                  # In Python 2, fsdecode() has a very chance to receive bytes. So it's
                  # better not to touch Python 2 part as it's already working fine.
                  fsdecode = identity
                  def getdoc(obj):
                      return getattr(obj, '__doc__', None)
                  _notset = object()
                  def safehasattr(thing, attr):
                      return getattr(thing, attr, _notset) is not _notset
                  def _getoptbwrapper(orig, args, shortlist, namelist):
                      return orig(args, shortlist, namelist)
                  strkwargs = identity
                  byteskwargs = identity
                  oscurdir = os.curdir
                  oslinesep = os.linesep
                  osname = os.name
                  ospathsep = os.pathsep
                  ospardir = os.pardir
                  ossep = os.sep
                  osaltsep = os.altsep
                  long = long
                  stdin = sys.stdin
                  stdout = sys.stdout
                  stderr = sys.stderr
                  if getattr(sys, 'argv', None) is not None:
                      sysargv = sys.argv
                  sysplatform = sys.platform
                  sysexecutable = sys.executable
                  shlexsplit = shlex.split
                  shlexquote = pipes.quote
                  bytesio = cStringIO.StringIO
                  stringio = bytesio
                  maplist = map
                  rangelist = range
                  ziplist = zip
                  rawinput = raw_input
                  getargspec = inspect.getargspec
              isjython = sysplatform.startswith(b'java')
              isdarwin = sysplatform.startswith(b'darwin')
              islinux = sysplatform.startswith(b'linux')
              isposix = osname == b'posix'
              iswindows = osname == b'nt'
              def getoptb(args, shortlist, namelist):
                  return _getoptbwrapper(getopt.getopt, args, shortlist, namelist)
              def gnugetoptb(args, shortlist, namelist):
                  return _getoptbwrapper(getopt.gnu_getopt, args, shortlist, namelist)
              def mkdtemp(suffix=b'', prefix=b'tmp', dir=None):
                  return tempfile.mkdtemp(suffix, prefix, dir)
              # text=True is not supported; use util.from/tonativeeol() instead
              def mkstemp(suffix=b'', prefix=b'tmp', dir=None):
                  return tempfile.mkstemp(suffix, prefix, dir)
              # mode must include 'b'ytes as encoding= is not supported
              def namedtempfile(
                  mode=b'w+b', bufsize=-1, suffix=b'', prefix=b'tmp', dir=None, delete=True
              ):
                  mode = sysstr(mode)
                  assert r'b' in mode
                  return tempfile.NamedTemporaryFile(
                      mode, bufsize, suffix=suffix, prefix=prefix, dir=dir, delete=delete
                  )

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages