upstream/mercurial-mirror Commit - r43361:127cc1f7

py3: stop normalizing .encode()/.decode() arguments to unicode...

Gregory Szorc -

r43361:127cc1f7 default

parent child

contrib/testparseutil.py

0 +1 -1

             # testparseutil.py - utilities to parse test script for check tools
             #
             #  Copyright 2018 FUJIWARA Katsunori <foozy@lares.dti.ne.jp> and others
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import, print_function
             import abc
             import re
             import sys
             ####################
             # for Python3 compatibility (almost comes from mercurial/pycompat.py)
             ispy3 = sys.version_info[0] >= 3
             def identity(a):
                 return a
             def _rapply(f, xs):
                 if xs is None:
                     # assume None means non-value of optional data
                     return xs
                 if isinstance(xs, (list, set, tuple)):
                     return type(xs)(_rapply(f, x) for x in xs)
                 if isinstance(xs, dict):
                     return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
                 return f(xs)
             def rapply(f, xs):
                 if f is identity:
                     # fast path mainly for py2
                     return xs
                 return _rapply(f, xs)
             if ispy3:
                 import builtins
                 def bytestr(s):
                     # tiny version of pycompat.bytestr
                     return s.encode('latin1')
                 def sysstr(s):
                     if isinstance(s, builtins.str):
                         return s
-                    return s.decode(u'latin-1')
+                    return s.decode('latin-1')
                 def opentext(f):
                     return open(f, 'r')
             else:
                 bytestr = str
                 sysstr = identity
                 opentext = open
             def b2s(x):
                 # convert BYTES elements in "x" to SYSSTR recursively
                 return rapply(sysstr, x)
             def writeout(data):
                 # write "data" in BYTES into stdout
                 sys.stdout.write(data)
             def writeerr(data):
                 # write "data" in BYTES into stderr
                 sys.stderr.write(data)
             ####################
             class embeddedmatcher(object):
                 """Base class to detect embedded code fragments in *.t test script
                 """
                 __metaclass__ = abc.ABCMeta
                 def __init__(self, desc):
                     self.desc = desc
                 @abc.abstractmethod
                 def startsat(self, line):
                     """Examine whether embedded code starts at line
                     This can return arbitrary object, and it is used as 'ctx' for
                     subsequent method invocations.
                     """
                 @abc.abstractmethod
                 def endsat(self, ctx, line):
                     """Examine whether embedded code ends at line"""
                 @abc.abstractmethod
                 def isinside(self, ctx, line):
                     """Examine whether line is inside embedded code, if not yet endsat
                     """
                 @abc.abstractmethod
                 def ignores(self, ctx):
                     """Examine whether detected embedded code should be ignored"""
                 @abc.abstractmethod
                 def filename(self, ctx):
                     """Return filename of embedded code
                     If filename isn't specified for embedded code explicitly, this
                     returns None.
                     """
                 @abc.abstractmethod
                 def codeatstart(self, ctx, line):
                     """Return actual code at the start line of embedded code
                     This might return None, if the start line doesn't contain
                     actual code.
                     """
                 @abc.abstractmethod
                 def codeatend(self, ctx, line):
                     """Return actual code at the end line of embedded code
                     This might return None, if the end line doesn't contain actual
                     code.
                     """
                 @abc.abstractmethod
                 def codeinside(self, ctx, line):
                     """Return actual code at line inside embedded code"""
             def embedded(basefile, lines, errors, matchers):
                 """pick embedded code fragments up from given lines
                 This is common parsing logic, which examines specified matchers on
                 given lines.
                 :basefile: a name of a file, from which lines to be parsed come.
                 :lines: to be parsed (might be a value returned by "open(basefile)")
                 :errors: an array, into which messages for detected error are stored
                 :matchers: an array of embeddedmatcher objects
                 This function yields '(filename, starts, ends, code)' tuple.
                 :filename: a name of embedded code, if it is explicitly specified
                            (e.g.  "foobar" of "cat >> foobar <<EOF").
                            Otherwise, this is None
                 :starts: line number (1-origin), at which embedded code starts (inclusive)
                 :ends: line number (1-origin), at which embedded code ends (exclusive)
                 :code: extracted embedded code, which is single-stringified
                 >>> class ambigmatcher(object):
                 ...     # mock matcher class to examine implementation of
                 ...     # "ambiguous matching" corner case
                 ...     def __init__(self, desc, matchfunc):
                 ...         self.desc = desc
                 ...         self.matchfunc = matchfunc
                 ...     def startsat(self, line):
                 ...         return self.matchfunc(line)
                 >>> ambig1 = ambigmatcher('ambiguous #1',
                 ...                       lambda l: l.startswith('  $ cat '))
                 >>> ambig2 = ambigmatcher('ambiguous #2',
                 ...                       lambda l: l.endswith('<< EOF\\n'))
                 >>> lines = ['  $ cat > foo.py << EOF\\n']
                 >>> errors = []
                 >>> matchers = [ambig1, ambig2]
                 >>> list(t for t in embedded('<dummy>', lines, errors, matchers))
                 []
                 >>> b2s(errors)
                 ['<dummy>:1: ambiguous line for "ambiguous #1", "ambiguous #2"']
                 """
                 matcher = None
                 ctx = filename = code = startline = None  # for pyflakes
                 for lineno, line in enumerate(lines, 1):
                     if not line.endswith('\n'):
                         line += '\n'  # to normalize EOF line
                     if matcher:  # now, inside embedded code
                         if matcher.endsat(ctx, line):
                             codeatend = matcher.codeatend(ctx, line)
                             if codeatend is not None:
                                 code.append(codeatend)
                             if not matcher.ignores(ctx):
                                 yield (filename, startline, lineno, ''.join(code))
                             matcher = None
                             # DO NOT "continue", because line might start next fragment
                         elif not matcher.isinside(ctx, line):
                             # this is an error of basefile
                             # (if matchers are implemented correctly)
                             errors.append(
                                 '%s:%d: unexpected line for "%s"'
                                 % (basefile, lineno, matcher.desc)
                             )
                             # stop extracting embedded code by current 'matcher',
                             # because appearance of unexpected line might mean
                             # that expected end-of-embedded-code line might never
                             # appear
                             matcher = None
                             # DO NOT "continue", because line might start next fragment
                         else:
                             code.append(matcher.codeinside(ctx, line))
                             continue
                     # examine whether current line starts embedded code or not
                     assert not matcher
                     matched = []
                     for m in matchers:
                         ctx = m.startsat(line)
                         if ctx:
                             matched.append((m, ctx))
                     if matched:
                         if len(matched) > 1:
                             # this is an error of matchers, maybe
                             errors.append(
                                 '%s:%d: ambiguous line for %s'
                                 % (
                                     basefile,
                                     lineno,
                                     ', '.join(['"%s"' % m.desc for m, c in matched]),
                                 )
                             )
                             # omit extracting embedded code, because choosing
                             # arbitrary matcher from matched ones might fail to
                             # detect the end of embedded code as expected.
                             continue
                         matcher, ctx = matched[0]
                         filename = matcher.filename(ctx)
                         code = []
                         codeatstart = matcher.codeatstart(ctx, line)
                         if codeatstart is not None:
                             code.append(codeatstart)
                             startline = lineno
                         else:
                             startline = lineno + 1
                 if matcher:
                     # examine whether EOF ends embedded code, because embedded
                     # code isn't yet ended explicitly
                     if matcher.endsat(ctx, '\n'):
                         codeatend = matcher.codeatend(ctx, '\n')
                         if codeatend is not None:
                             code.append(codeatend)
                         if not matcher.ignores(ctx):
                             yield (filename, startline, lineno + 1, ''.join(code))
                     else:
                         # this is an error of basefile
                         # (if matchers are implemented correctly)
                         errors.append(
                             '%s:%d: unexpected end of file for "%s"'
                             % (basefile, lineno, matcher.desc)
                         )
             # heredoc limit mark to ignore embedded code at check-code.py or so
             heredocignorelimit = 'NO_CHECK_EOF'
             # the pattern to match against cases below, and to return a limit mark
             # string as 'lname' group
             #
             # - << LIMITMARK
             # - << "LIMITMARK"
             # - << 'LIMITMARK'
             heredoclimitpat = r'\s*<<\s*(?P<lquote>["\']?)(?P<limit>\w+)(?P=lquote)'
             class fileheredocmatcher(embeddedmatcher):
                 """Detect "cat > FILE << LIMIT" style embedded code
                 >>> matcher = fileheredocmatcher('heredoc .py file', r'[^<]+\\.py')
                 >>> b2s(matcher.startsat('  $ cat > file.py << EOF\\n'))
                 ('file.py', '  > EOF\\n')
                 >>> b2s(matcher.startsat('  $ cat   >>file.py   <<EOF\\n'))
                 ('file.py', '  > EOF\\n')
                 >>> b2s(matcher.startsat('  $ cat>  \\x27any file.py\\x27<<  "EOF"\\n'))
                 ('any file.py', '  > EOF\\n')
                 >>> b2s(matcher.startsat("  $ cat > file.py << 'ANYLIMIT'\\n"))
                 ('file.py', '  > ANYLIMIT\\n')
                 >>> b2s(matcher.startsat('  $ cat<<ANYLIMIT>"file.py"\\n'))
                 ('file.py', '  > ANYLIMIT\\n')
                 >>> start = '  $ cat > file.py << EOF\\n'
                 >>> ctx = matcher.startsat(start)
                 >>> matcher.codeatstart(ctx, start)
                 >>> b2s(matcher.filename(ctx))
                 'file.py'
                 >>> matcher.ignores(ctx)
                 False
                 >>> inside = '  > foo = 1\\n'
                 >>> matcher.endsat(ctx, inside)
                 False
                 >>> matcher.isinside(ctx, inside)
                 True
                 >>> b2s(matcher.codeinside(ctx, inside))
                 'foo = 1\\n'
                 >>> end = '  > EOF\\n'
                 >>> matcher.endsat(ctx, end)
                 True
                 >>> matcher.codeatend(ctx, end)
                 >>> matcher.endsat(ctx, '  > EOFEOF\\n')
                 False
                 >>> ctx = matcher.startsat('  $ cat > file.py << NO_CHECK_EOF\\n')
                 >>> matcher.ignores(ctx)
                 True
                 """
                 _prefix = '  > '
                 def __init__(self, desc, namepat):
                     super(fileheredocmatcher, self).__init__(desc)
                     # build the pattern to match against cases below (and ">>"
                     # variants), and to return a target filename string as 'name'
                     # group
                     #
                     # - > NAMEPAT
                     # - > "NAMEPAT"
                     # - > 'NAMEPAT'
                     namepat = (
                         r'\s*>>?\s*(?P<nquote>["\']?)(?P<name>%s)(?P=nquote)' % namepat
                     )
                     self._fileres = [
                         # "cat > NAME << LIMIT" case
                         re.compile(r'  \$ \s*cat' + namepat + heredoclimitpat),
                         # "cat << LIMIT > NAME" case
                         re.compile(r'  \$ \s*cat' + heredoclimitpat + namepat),
                     ]
                 def startsat(self, line):
                     # ctx is (filename, END-LINE-OF-EMBEDDED-CODE) tuple
                     for filere in self._fileres:
                         matched = filere.match(line)
                         if matched:
                             return (
                                 matched.group('name'),
                                 '  > %s\n' % matched.group('limit'),
                             )
                 def endsat(self, ctx, line):
                     return ctx[1] == line
                 def isinside(self, ctx, line):
                     return line.startswith(self._prefix)
                 def ignores(self, ctx):
                     return '  > %s\n' % heredocignorelimit == ctx[1]
                 def filename(self, ctx):
                     return ctx[0]
                 def codeatstart(self, ctx, line):
                     return None  # no embedded code at start line
                 def codeatend(self, ctx, line):
                     return None  # no embedded code at end line
                 def codeinside(self, ctx, line):
                     return line[len(self._prefix) :]  # strip prefix
             ####
             # for embedded python script
             class pydoctestmatcher(embeddedmatcher):
                 """Detect ">>> code" style embedded python code
                 >>> matcher = pydoctestmatcher()
                 >>> startline = '  >>> foo = 1\\n'
                 >>> matcher.startsat(startline)
                 True
                 >>> matcher.startsat('  ... foo = 1\\n')
                 False
                 >>> ctx = matcher.startsat(startline)
                 >>> matcher.filename(ctx)
                 >>> matcher.ignores(ctx)
                 False
                 >>> b2s(matcher.codeatstart(ctx, startline))
                 'foo = 1\\n'
                 >>> inside = '  >>> foo = 1\\n'
                 >>> matcher.endsat(ctx, inside)
                 False
                 >>> matcher.isinside(ctx, inside)
                 True
                 >>> b2s(matcher.codeinside(ctx, inside))
                 'foo = 1\\n'
                 >>> inside = '  ... foo = 1\\n'
                 >>> matcher.endsat(ctx, inside)
                 False
                 >>> matcher.isinside(ctx, inside)
                 True
                 >>> b2s(matcher.codeinside(ctx, inside))
                 'foo = 1\\n'
                 >>> inside = '  expected output\\n'
                 >>> matcher.endsat(ctx, inside)
                 False
                 >>> matcher.isinside(ctx, inside)
                 True
                 >>> b2s(matcher.codeinside(ctx, inside))
                 '\\n'
                 >>> inside = '  \\n'
                 >>> matcher.endsat(ctx, inside)
                 False
                 >>> matcher.isinside(ctx, inside)
                 True
                 >>> b2s(matcher.codeinside(ctx, inside))
                 '\\n'
                 >>> end = '  $ foo bar\\n'
                 >>> matcher.endsat(ctx, end)
                 True
                 >>> matcher.codeatend(ctx, end)
                 >>> end = '\\n'
                 >>> matcher.endsat(ctx, end)
                 True
                 >>> matcher.codeatend(ctx, end)
                 """
                 _prefix = '  >>> '
                 _prefixre = re.compile(r'  (>>>|\.\.\.) ')
                 # If a line matches against not _prefixre but _outputre, that line
                 # is "an expected output line" (= not a part of code fragment).
                 #
                 # Strictly speaking, a line matching against "(#if|#else|#endif)"
                 # is also treated similarly in "inline python code" semantics by
                 # run-tests.py. But "directive line inside inline python code"
                 # should be rejected by Mercurial reviewers. Therefore, this
                 # regexp does not matche against such directive lines.
                 _outputre = re.compile(r'  $|  [^$]')
                 def __init__(self):
                     super(pydoctestmatcher, self).__init__("doctest style python code")
                 def startsat(self, line):
                     # ctx is "True"
                     return line.startswith(self._prefix)
                 def endsat(self, ctx, line):
                     return not (self._prefixre.match(line) or self._outputre.match(line))
                 def isinside(self, ctx, line):
                     return True  # always true, if not yet ended
                 def ignores(self, ctx):
                     return False  # should be checked always
                 def filename(self, ctx):
                     return None  # no filename
                 def codeatstart(self, ctx, line):
                     return line[len(self._prefix) :]  # strip prefix '  >>> '/'  ... '
                 def codeatend(self, ctx, line):
                     return None  # no embedded code at end line
                 def codeinside(self, ctx, line):
                     if self._prefixre.match(line):
                         return line[len(self._prefix) :]  # strip prefix '  >>> '/'  ... '
                     return '\n'  # an expected output line is treated as an empty line
             class pyheredocmatcher(embeddedmatcher):
                 """Detect "python << LIMIT" style embedded python code
                 >>> matcher = pyheredocmatcher()
                 >>> b2s(matcher.startsat('  $ python << EOF\\n'))
                 '  > EOF\\n'
                 >>> b2s(matcher.startsat('  $ $PYTHON   <<EOF\\n'))
                 '  > EOF\\n'
                 >>> b2s(matcher.startsat('  $ "$PYTHON"<<  "EOF"\\n'))
                 '  > EOF\\n'
                 >>> b2s(matcher.startsat("  $ $PYTHON << 'ANYLIMIT'\\n"))
                 '  > ANYLIMIT\\n'
                 >>> matcher.startsat('  $ "$PYTHON" < EOF\\n')
                 >>> start = '  $ python << EOF\\n'
                 >>> ctx = matcher.startsat(start)
                 >>> matcher.codeatstart(ctx, start)
                 >>> matcher.filename(ctx)
                 >>> matcher.ignores(ctx)
                 False
                 >>> inside = '  > foo = 1\\n'
                 >>> matcher.endsat(ctx, inside)
                 False
                 >>> matcher.isinside(ctx, inside)
                 True
                 >>> b2s(matcher.codeinside(ctx, inside))
                 'foo = 1\\n'
                 >>> end = '  > EOF\\n'
                 >>> matcher.endsat(ctx, end)
                 True
                 >>> matcher.codeatend(ctx, end)
                 >>> matcher.endsat(ctx, '  > EOFEOF\\n')
                 False
                 >>> ctx = matcher.startsat('  $ python << NO_CHECK_EOF\\n')
                 >>> matcher.ignores(ctx)
                 True
                 """
                 _prefix = '  > '
                 _startre = re.compile(
                     r'  \$ (\$PYTHON|"\$PYTHON"|python).*' + heredoclimitpat
                 )
                 def __init__(self):
                     super(pyheredocmatcher, self).__init__("heredoc python invocation")
                 def startsat(self, line):
                     # ctx is END-LINE-OF-EMBEDDED-CODE
                     matched = self._startre.match(line)
                     if matched:
                         return '  > %s\n' % matched.group('limit')
                 def endsat(self, ctx, line):
                     return ctx == line
                 def isinside(self, ctx, line):
                     return line.startswith(self._prefix)
                 def ignores(self, ctx):
                     return '  > %s\n' % heredocignorelimit == ctx
                 def filename(self, ctx):
                     return None  # no filename
                 def codeatstart(self, ctx, line):
                     return None  # no embedded code at start line
                 def codeatend(self, ctx, line):
                     return None  # no embedded code at end line
                 def codeinside(self, ctx, line):
                     return line[len(self._prefix) :]  # strip prefix
             _pymatchers = [
                 pydoctestmatcher(),
                 pyheredocmatcher(),
                 # use '[^<]+' instead of '\S+', in order to match against
                 # paths including whitespaces
                 fileheredocmatcher('heredoc .py file', r'[^<]+\.py'),
             ]
             def pyembedded(basefile, lines, errors):
                 return embedded(basefile, lines, errors, _pymatchers)
             ####
             # for embedded shell script
             _shmatchers = [
                 # use '[^<]+' instead of '\S+', in order to match against
                 # paths including whitespaces
                 fileheredocmatcher('heredoc .sh file', r'[^<]+\.sh'),
             ]
             def shembedded(basefile, lines, errors):
                 return embedded(basefile, lines, errors, _shmatchers)
             ####
             # for embedded hgrc configuration
             _hgrcmatchers = [
                 # use '[^<]+' instead of '\S+', in order to match against
                 # paths including whitespaces
                 fileheredocmatcher(
                     'heredoc hgrc file', r'(([^/<]+/)+hgrc|\$HGRCPATH|\${HGRCPATH})'
                 ),
             ]
             def hgrcembedded(basefile, lines, errors):
                 return embedded(basefile, lines, errors, _hgrcmatchers)
             ####
             if __name__ == "__main__":
                 import optparse
                 import sys
                 def showembedded(basefile, lines, embeddedfunc, opts):
                     errors = []
                     for name, starts, ends, code in embeddedfunc(basefile, lines, errors):
                         if not name:
                             name = '<anonymous>'
                         writeout("%s:%d: %s starts\n" % (basefile, starts, name))
                         if opts.verbose and code:
                             writeout("  |%s\n" % "\n  |".join(l for l in code.splitlines()))
                         writeout("%s:%d: %s ends\n" % (basefile, ends, name))
                     for e in errors:
                         writeerr("%s\n" % e)
                     return len(errors)
                 def applyembedded(args, embeddedfunc, opts):
                     ret = 0
                     if args:
                         for f in args:
                             with opentext(f) as fp:
                                 if showembedded(f, fp, embeddedfunc, opts):
                                     ret = 1
                     else:
                         lines = [l for l in sys.stdin.readlines()]
                         if showembedded('<stdin>', lines, embeddedfunc, opts):
                             ret = 1
                     return ret
                 commands = {}
                 def command(name, desc):
                     def wrap(func):
                         commands[name] = (desc, func)
                     return wrap
                 @command("pyembedded", "detect embedded python script")
                 def pyembeddedcmd(args, opts):
                     return applyembedded(args, pyembedded, opts)
                 @command("shembedded", "detect embedded shell script")
                 def shembeddedcmd(args, opts):
                     return applyembedded(args, shembedded, opts)
                 @command("hgrcembedded", "detect embedded hgrc configuration")
                 def hgrcembeddedcmd(args, opts):
                     return applyembedded(args, hgrcembedded, opts)
                 availablecommands = "\n".join(
                     ["  - %s: %s" % (key, value[0]) for key, value in commands.items()]
                 )
                 parser = optparse.OptionParser(
                     """%prog COMMAND [file ...]
             Pick up embedded code fragments from given file(s) or stdin, and list
             up start/end lines of them in standard compiler format
             ("FILENAME:LINENO:").
             Available commands are:
             """
                     + availablecommands
                     + """
             """
                 )
                 parser.add_option(
                     "-v",
                     "--verbose",
                     help="enable additional output (e.g. actual code)",
                     action="store_true",
                 )
                 (opts, args) = parser.parse_args()
                 if not args or args[0] not in commands:
                     parser.print_help()
                     sys.exit(255)
                 sys.exit(commands[args[0]][1](args[1:], opts))

hgext/lfs/blobstore.py

0 +1 -1

             # blobstore.py - local and remote (speaking Git-LFS protocol) blob storages
             #
             # Copyright 2017 Facebook, Inc.
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import contextlib
             import errno
             import hashlib
             import json
             import os
             import re
             import socket
             from mercurial.i18n import _
             from mercurial.pycompat import getattr
             from mercurial import (
                 encoding,
                 error,
                 node,
                 pathutil,
                 pycompat,
                 url as urlmod,
                 util,
                 vfs as vfsmod,
                 worker,
             )
             from mercurial.utils import stringutil
             from ..largefiles import lfutil
             # 64 bytes for SHA256
             _lfsre = re.compile(br'\A[a-f0-9]{64}\Z')
             class lfsvfs(vfsmod.vfs):
                 def join(self, path):
                     """split the path at first two characters, like: XX/XXXXX..."""
                     if not _lfsre.match(path):
                         raise error.ProgrammingError(b'unexpected lfs path: %s' % path)
                     return super(lfsvfs, self).join(path[0:2], path[2:])
                 def walk(self, path=None, onerror=None):
                     """Yield (dirpath, [], oids) tuple for blobs under path
                     Oids only exist in the root of this vfs, so dirpath is always ''.
                     """
                     root = os.path.normpath(self.base)
                     # when dirpath == root, dirpath[prefixlen:] becomes empty
                     # because len(dirpath) < prefixlen.
                     prefixlen = len(pathutil.normasprefix(root))
                     oids = []
                     for dirpath, dirs, files in os.walk(
                         self.reljoin(self.base, path or b''), onerror=onerror
                     ):
                         dirpath = dirpath[prefixlen:]
                         # Silently skip unexpected files and directories
                         if len(dirpath) == 2:
                             oids.extend(
                                 [dirpath + f for f in files if _lfsre.match(dirpath + f)]
                             )
                     yield (b'', [], oids)
             class nullvfs(lfsvfs):
                 def __init__(self):
                     pass
                 def exists(self, oid):
                     return False
                 def read(self, oid):
                     # store.read() calls into here if the blob doesn't exist in its
                     # self.vfs.  Raise the same error as a normal vfs when asked to read a
                     # file that doesn't exist.  The only difference is the full file path
                     # isn't available in the error.
                     raise IOError(
                         errno.ENOENT,
                         pycompat.sysstr(b'%s: No such file or directory' % oid),
                     )
                 def walk(self, path=None, onerror=None):
                     return (b'', [], [])
                 def write(self, oid, data):
                     pass
             class filewithprogress(object):
                 """a file-like object that supports __len__ and read.
                 Useful to provide progress information for how many bytes are read.
                 """
                 def __init__(self, fp, callback):
                     self._fp = fp
                     self._callback = callback  # func(readsize)
                     fp.seek(0, os.SEEK_END)
                     self._len = fp.tell()
                     fp.seek(0)
                 def __len__(self):
                     return self._len
                 def read(self, size):
                     if self._fp is None:
                         return b''
                     data = self._fp.read(size)
                     if data:
                         if self._callback:
                             self._callback(len(data))
                     else:
                         self._fp.close()
                         self._fp = None
                     return data
             class local(object):
                 """Local blobstore for large file contents.
                 This blobstore is used both as a cache and as a staging area for large blobs
                 to be uploaded to the remote blobstore.
                 """
                 def __init__(self, repo):
                     fullpath = repo.svfs.join(b'lfs/objects')
                     self.vfs = lfsvfs(fullpath)
                     if repo.ui.configbool(b'experimental', b'lfs.disableusercache'):
                         self.cachevfs = nullvfs()
                     else:
                         usercache = lfutil._usercachedir(repo.ui, b'lfs')
                         self.cachevfs = lfsvfs(usercache)
                     self.ui = repo.ui
                 def open(self, oid):
                     """Open a read-only file descriptor to the named blob, in either the
                     usercache or the local store."""
                     # The usercache is the most likely place to hold the file.  Commit will
                     # write to both it and the local store, as will anything that downloads
                     # the blobs.  However, things like clone without an update won't
                     # populate the local store.  For an init + push of a local clone,
                     # the usercache is the only place it _could_ be.  If not present, the
                     # missing file msg here will indicate the local repo, not the usercache.
                     if self.cachevfs.exists(oid):
                         return self.cachevfs(oid, b'rb')
                     return self.vfs(oid, b'rb')
                 def download(self, oid, src):
                     """Read the blob from the remote source in chunks, verify the content,
                     and write to this local blobstore."""
                     sha256 = hashlib.sha256()
                     with self.vfs(oid, b'wb', atomictemp=True) as fp:
                         for chunk in util.filechunkiter(src, size=1048576):
                             fp.write(chunk)
                             sha256.update(chunk)
                         realoid = node.hex(sha256.digest())
                         if realoid != oid:
                             raise LfsCorruptionError(
                                 _(b'corrupt remote lfs object: %s') % oid
                             )
                     self._linktousercache(oid)
                 def write(self, oid, data):
                     """Write blob to local blobstore.
                     This should only be called from the filelog during a commit or similar.
                     As such, there is no need to verify the data.  Imports from a remote
                     store must use ``download()`` instead."""
                     with self.vfs(oid, b'wb', atomictemp=True) as fp:
                         fp.write(data)
                     self._linktousercache(oid)
                 def linkfromusercache(self, oid):
                     """Link blobs found in the user cache into this store.
                     The server module needs to do this when it lets the client know not to
                     upload the blob, to ensure it is always available in this store.
                     Normally this is done implicitly when the client reads or writes the
                     blob, but that doesn't happen when the server tells the client that it
                     already has the blob.
                     """
                     if not isinstance(self.cachevfs, nullvfs) and not self.vfs.exists(oid):
                         self.ui.note(_(b'lfs: found %s in the usercache\n') % oid)
                         lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid))
                 def _linktousercache(self, oid):
                     # XXX: should we verify the content of the cache, and hardlink back to
                     # the local store on success, but truncate, write and link on failure?
                     if not self.cachevfs.exists(oid) and not isinstance(
                         self.cachevfs, nullvfs
                     ):
                         self.ui.note(_(b'lfs: adding %s to the usercache\n') % oid)
                         lfutil.link(self.vfs.join(oid), self.cachevfs.join(oid))
                 def read(self, oid, verify=True):
                     """Read blob from local blobstore."""
                     if not self.vfs.exists(oid):
                         blob = self._read(self.cachevfs, oid, verify)
                         # Even if revlog will verify the content, it needs to be verified
                         # now before making the hardlink to avoid propagating corrupt blobs.
                         # Don't abort if corruption is detected, because `hg verify` will
                         # give more useful info about the corruption- simply don't add the
                         # hardlink.
                         if verify or node.hex(hashlib.sha256(blob).digest()) == oid:
                             self.ui.note(_(b'lfs: found %s in the usercache\n') % oid)
                             lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid))
                     else:
                         self.ui.note(_(b'lfs: found %s in the local lfs store\n') % oid)
                         blob = self._read(self.vfs, oid, verify)
                     return blob
                 def _read(self, vfs, oid, verify):
                     """Read blob (after verifying) from the given store"""
                     blob = vfs.read(oid)
                     if verify:
                         _verify(oid, blob)
                     return blob
                 def verify(self, oid):
                     """Indicate whether or not the hash of the underlying file matches its
                     name."""
                     sha256 = hashlib.sha256()
                     with self.open(oid) as fp:
                         for chunk in util.filechunkiter(fp, size=1048576):
                             sha256.update(chunk)
                     return oid == node.hex(sha256.digest())
                 def has(self, oid):
                     """Returns True if the local blobstore contains the requested blob,
                     False otherwise."""
                     return self.cachevfs.exists(oid) or self.vfs.exists(oid)
             def _urlerrorreason(urlerror):
                 '''Create a friendly message for the given URLError to be used in an
                 LfsRemoteError message.
                 '''
                 inst = urlerror
                 if isinstance(urlerror.reason, Exception):
                     inst = urlerror.reason
                 if util.safehasattr(inst, b'reason'):
                     try:  # usually it is in the form (errno, strerror)
                         reason = inst.reason.args[1]
                     except (AttributeError, IndexError):
                         # it might be anything, for example a string
                         reason = inst.reason
                     if isinstance(reason, pycompat.unicode):
                         # SSLError of Python 2.7.9 contains a unicode
                         reason = encoding.unitolocal(reason)
                     return reason
                 elif getattr(inst, "strerror", None):
                     return encoding.strtolocal(inst.strerror)
                 else:
                     return stringutil.forcebytestr(urlerror)
             class lfsauthhandler(util.urlreq.basehandler):
                 handler_order = 480  # Before HTTPDigestAuthHandler (== 490)
                 def http_error_401(self, req, fp, code, msg, headers):
                     """Enforces that any authentication performed is HTTP Basic
                     Authentication.  No authentication is also acceptable.
                     """
                     authreq = headers.get(r'www-authenticate', None)
                     if authreq:
                         scheme = authreq.split()[0]
                         if scheme.lower() != r'basic':
                             msg = _(b'the server must support Basic Authentication')
                             raise util.urlerr.httperror(
                                 req.get_full_url(),
                                 code,
                                 encoding.strfromlocal(msg),
                                 headers,
                                 fp,
                             )
                     return None
             class _gitlfsremote(object):
                 def __init__(self, repo, url):
                     ui = repo.ui
                     self.ui = ui
                     baseurl, authinfo = url.authinfo()
                     self.baseurl = baseurl.rstrip(b'/')
                     useragent = repo.ui.config(b'experimental', b'lfs.user-agent')
                     if not useragent:
                         useragent = b'git-lfs/2.3.4 (Mercurial %s)' % util.version()
                     self.urlopener = urlmod.opener(ui, authinfo, useragent)
                     self.urlopener.add_handler(lfsauthhandler())
                     self.retry = ui.configint(b'lfs', b'retry')
                 def writebatch(self, pointers, fromstore):
                     """Batch upload from local to remote blobstore."""
                     self._batch(_deduplicate(pointers), fromstore, b'upload')
                 def readbatch(self, pointers, tostore):
                     """Batch download from remote to local blostore."""
                     self._batch(_deduplicate(pointers), tostore, b'download')
                 def _batchrequest(self, pointers, action):
                     """Get metadata about objects pointed by pointers for given action
                     Return decoded JSON object like {'objects': [{'oid': '', 'size': 1}]}
                     See https://github.com/git-lfs/git-lfs/blob/master/docs/api/batch.md
                     """
                     objects = [
                         {r'oid': pycompat.strurl(p.oid()), r'size': p.size()}
                         for p in pointers
                     ]
                     requestdata = pycompat.bytesurl(
                         json.dumps(
                             {r'objects': objects, r'operation': pycompat.strurl(action),}
                         )
                     )
                     url = b'%s/objects/batch' % self.baseurl
                     batchreq = util.urlreq.request(pycompat.strurl(url), data=requestdata)
                     batchreq.add_header(r'Accept', r'application/vnd.git-lfs+json')
                     batchreq.add_header(r'Content-Type', r'application/vnd.git-lfs+json')
                     try:
                         with contextlib.closing(self.urlopener.open(batchreq)) as rsp:
                             rawjson = rsp.read()
                     except util.urlerr.httperror as ex:
                         hints = {
 : _(
                                 b'check that lfs serving is enabled on %s and "%s" is '
                                 b'supported'
                             )
                             % (self.baseurl, action),
 : _(b'the "lfs.url" config may be used to override %s')
                             % self.baseurl,
                         }
                         hint = hints.get(ex.code, _(b'api=%s, action=%s') % (url, action))
                         raise LfsRemoteError(
                             _(b'LFS HTTP error: %s') % stringutil.forcebytestr(ex),
                             hint=hint,
                         )
                     except util.urlerr.urlerror as ex:
                         hint = (
                             _(b'the "lfs.url" config may be used to override %s')
                             % self.baseurl
                         )
                         raise LfsRemoteError(
                             _(b'LFS error: %s') % _urlerrorreason(ex), hint=hint
                         )
                     try:
                         response = json.loads(rawjson)
                     except ValueError:
                         raise LfsRemoteError(
                             _(b'LFS server returns invalid JSON: %s')
                             % rawjson.encode("utf-8")
                         )
                     if self.ui.debugflag:
                         self.ui.debug(b'Status: %d\n' % rsp.status)
                         # lfs-test-server and hg serve return headers in different order
                         headers = pycompat.bytestr(rsp.info()).strip()
                         self.ui.debug(b'%s\n' % b'\n'.join(sorted(headers.splitlines())))
                         if r'objects' in response:
                             response[r'objects'] = sorted(
                                 response[r'objects'], key=lambda p: p[r'oid']
                             )
                         self.ui.debug(
                             b'%s\n'
                             % pycompat.bytesurl(
                                 json.dumps(
                                     response,
                                     indent=2,
                                     separators=(r'', r': '),
                                     sort_keys=True,
                                 )
                             )
                         )
                     def encodestr(x):
                         if isinstance(x, pycompat.unicode):
-                            return x.encode(u'utf-8')
+                            return x.encode('utf-8')
                         return x
                     return pycompat.rapply(encodestr, response)
                 def _checkforservererror(self, pointers, responses, action):
                     """Scans errors from objects
                     Raises LfsRemoteError if any objects have an error"""
                     for response in responses:
                         # The server should return 404 when objects cannot be found. Some
                         # server implementation (ex. lfs-test-server)  does not set "error"
                         # but just removes "download" from "actions". Treat that case
                         # as the same as 404 error.
                         if b'error' not in response:
                             if action == b'download' and action not in response.get(
                                 b'actions', []
                             ):
                                 code = 404
                             else:
                                 continue
                         else:
                             # An error dict without a code doesn't make much sense, so
                             # treat as a server error.
                             code = response.get(b'error').get(b'code', 500)
                         ptrmap = {p.oid(): p for p in pointers}
                         p = ptrmap.get(response[b'oid'], None)
                         if p:
                             filename = getattr(p, 'filename', b'unknown')
                             errors = {
 : b'The object does not exist',
 : b'The object was removed by the owner',
 : b'Validation error',
 : b'Internal server error',
                             }
                             msg = errors.get(code, b'status code %d' % code)
                             raise LfsRemoteError(
                                 _(b'LFS server error for "%s": %s') % (filename, msg)
                             )
                         else:
                             raise LfsRemoteError(
                                 _(b'LFS server error. Unsolicited response for oid %s')
                                 % response[b'oid']
                             )
                 def _extractobjects(self, response, pointers, action):
                     """extract objects from response of the batch API
                     response: parsed JSON object returned by batch API
                     return response['objects'] filtered by action
                     raise if any object has an error
                     """
                     # Scan errors from objects - fail early
                     objects = response.get(b'objects', [])
                     self._checkforservererror(pointers, objects, action)
                     # Filter objects with given action. Practically, this skips uploading
                     # objects which exist in the server.
                     filteredobjects = [
                         o for o in objects if action in o.get(b'actions', [])
                     ]
                     return filteredobjects
                 def _basictransfer(self, obj, action, localstore):
                     """Download or upload a single object using basic transfer protocol
                     obj: dict, an object description returned by batch API
                     action: string, one of ['upload', 'download']
                     localstore: blobstore.local
                     See https://github.com/git-lfs/git-lfs/blob/master/docs/api/\
                     basic-transfers.md
                     """
                     oid = obj[b'oid']
                     href = obj[b'actions'][action].get(b'href')
                     headers = obj[b'actions'][action].get(b'header', {}).items()
                     request = util.urlreq.request(pycompat.strurl(href))
                     if action == b'upload':
                         # If uploading blobs, read data from local blobstore.
                         if not localstore.verify(oid):
                             raise error.Abort(
                                 _(b'detected corrupt lfs object: %s') % oid,
                                 hint=_(b'run hg verify'),
                             )
                         request.data = filewithprogress(localstore.open(oid), None)
                         request.get_method = lambda: r'PUT'
                         request.add_header(r'Content-Type', r'application/octet-stream')
                         request.add_header(r'Content-Length', len(request.data))
                     for k, v in headers:
                         request.add_header(pycompat.strurl(k), pycompat.strurl(v))
                     response = b''
                     try:
                         with contextlib.closing(self.urlopener.open(request)) as req:
                             ui = self.ui  # Shorten debug lines
                             if self.ui.debugflag:
                                 ui.debug(b'Status: %d\n' % req.status)
                                 # lfs-test-server and hg serve return headers in different
                                 # order
                                 headers = pycompat.bytestr(req.info()).strip()
                                 ui.debug(b'%s\n' % b'\n'.join(sorted(headers.splitlines())))
                             if action == b'download':
                                 # If downloading blobs, store downloaded data to local
                                 # blobstore
                                 localstore.download(oid, req)
                             else:
                                 while True:
                                     data = req.read(1048576)
                                     if not data:
                                         break
                                     response += data
                                 if response:
                                     ui.debug(b'lfs %s response: %s' % (action, response))
                     except util.urlerr.httperror as ex:
                         if self.ui.debugflag:
                             self.ui.debug(
                                 b'%s: %s\n' % (oid, ex.read())
                             )  # XXX: also bytes?
                         raise LfsRemoteError(
                             _(b'LFS HTTP error: %s (oid=%s, action=%s)')
                             % (stringutil.forcebytestr(ex), oid, action)
                         )
                     except util.urlerr.urlerror as ex:
                         hint = _(b'attempted connection to %s') % pycompat.bytesurl(
                             util.urllibcompat.getfullurl(request)
                         )
                         raise LfsRemoteError(
                             _(b'LFS error: %s') % _urlerrorreason(ex), hint=hint
                         )
                 def _batch(self, pointers, localstore, action):
                     if action not in [b'upload', b'download']:
                         raise error.ProgrammingError(b'invalid Git-LFS action: %s' % action)
                     response = self._batchrequest(pointers, action)
                     objects = self._extractobjects(response, pointers, action)
                     total = sum(x.get(b'size', 0) for x in objects)
                     sizes = {}
                     for obj in objects:
                         sizes[obj.get(b'oid')] = obj.get(b'size', 0)
                     topic = {
                         b'upload': _(b'lfs uploading'),
                         b'download': _(b'lfs downloading'),
                     }[action]
                     if len(objects) > 1:
                         self.ui.note(
                             _(b'lfs: need to transfer %d objects (%s)\n')
                             % (len(objects), util.bytecount(total))
                         )
                     def transfer(chunk):
                         for obj in chunk:
                             objsize = obj.get(b'size', 0)
                             if self.ui.verbose:
                                 if action == b'download':
                                     msg = _(b'lfs: downloading %s (%s)\n')
                                 elif action == b'upload':
                                     msg = _(b'lfs: uploading %s (%s)\n')
                                 self.ui.note(
                                     msg % (obj.get(b'oid'), util.bytecount(objsize))
                                 )
                             retry = self.retry
                             while True:
                                 try:
                                     self._basictransfer(obj, action, localstore)
                                     yield 1, obj.get(b'oid')
                                     break
                                 except socket.error as ex:
                                     if retry > 0:
                                         self.ui.note(
                                             _(b'lfs: failed: %r (remaining retry %d)\n')
                                             % (stringutil.forcebytestr(ex), retry)
                                         )
                                         retry -= 1
                                         continue
                                     raise
                     # Until https multiplexing gets sorted out
                     if self.ui.configbool(b'experimental', b'lfs.worker-enable'):
                         oids = worker.worker(
                             self.ui,
 .1,
                             transfer,
                             (),
                             sorted(objects, key=lambda o: o.get(b'oid')),
                         )
                     else:
                         oids = transfer(sorted(objects, key=lambda o: o.get(b'oid')))
                     with self.ui.makeprogress(topic, total=total) as progress:
                         progress.update(0)
                         processed = 0
                         blobs = 0
                         for _one, oid in oids:
                             processed += sizes[oid]
                             blobs += 1
                             progress.update(processed)
                             self.ui.note(_(b'lfs: processed: %s\n') % oid)
                     if blobs > 0:
                         if action == b'upload':
                             self.ui.status(
                                 _(b'lfs: uploaded %d files (%s)\n')
                                 % (blobs, util.bytecount(processed))
                             )
                         elif action == b'download':
                             self.ui.status(
                                 _(b'lfs: downloaded %d files (%s)\n')
                                 % (blobs, util.bytecount(processed))
                             )
                 def __del__(self):
                     # copied from mercurial/httppeer.py
                     urlopener = getattr(self, 'urlopener', None)
                     if urlopener:
                         for h in urlopener.handlers:
                             h.close()
                             getattr(h, "close_all", lambda: None)()
             class _dummyremote(object):
                 """Dummy store storing blobs to temp directory."""
                 def __init__(self, repo, url):
                     fullpath = repo.vfs.join(b'lfs', url.path)
                     self.vfs = lfsvfs(fullpath)
                 def writebatch(self, pointers, fromstore):
                     for p in _deduplicate(pointers):
                         content = fromstore.read(p.oid(), verify=True)
                         with self.vfs(p.oid(), b'wb', atomictemp=True) as fp:
                             fp.write(content)
                 def readbatch(self, pointers, tostore):
                     for p in _deduplicate(pointers):
                         with self.vfs(p.oid(), b'rb') as fp:
                             tostore.download(p.oid(), fp)
             class _nullremote(object):
                 """Null store storing blobs to /dev/null."""
                 def __init__(self, repo, url):
                     pass
                 def writebatch(self, pointers, fromstore):
                     pass
                 def readbatch(self, pointers, tostore):
                     pass
             class _promptremote(object):
                 """Prompt user to set lfs.url when accessed."""
                 def __init__(self, repo, url):
                     pass
                 def writebatch(self, pointers, fromstore, ui=None):
                     self._prompt()
                 def readbatch(self, pointers, tostore, ui=None):
                     self._prompt()
                 def _prompt(self):
                     raise error.Abort(_(b'lfs.url needs to be configured'))
             _storemap = {
                 b'https': _gitlfsremote,
                 b'http': _gitlfsremote,
                 b'file': _dummyremote,
                 b'null': _nullremote,
                 None: _promptremote,
             }
             def _deduplicate(pointers):
                 """Remove any duplicate oids that exist in the list"""
                 reduced = util.sortdict()
                 for p in pointers:
                     reduced[p.oid()] = p
                 return reduced.values()
             def _verify(oid, content):
                 realoid = node.hex(hashlib.sha256(content).digest())
                 if realoid != oid:
                     raise LfsCorruptionError(
                         _(b'detected corrupt lfs object: %s') % oid,
                         hint=_(b'run hg verify'),
                     )
             def remote(repo, remote=None):
                 """remotestore factory. return a store in _storemap depending on config
                 If ``lfs.url`` is specified, use that remote endpoint.  Otherwise, try to
                 infer the endpoint, based on the remote repository using the same path
                 adjustments as git.  As an extension, 'http' is supported as well so that
                 ``hg serve`` works out of the box.
                 https://github.com/git-lfs/git-lfs/blob/master/docs/api/server-discovery.md
                 """
                 lfsurl = repo.ui.config(b'lfs', b'url')
                 url = util.url(lfsurl or b'')
                 if lfsurl is None:
                     if remote:
                         path = remote
                     elif util.safehasattr(repo, b'_subtoppath'):
                         # The pull command sets this during the optional update phase, which
                         # tells exactly where the pull originated, whether 'paths.default'
                         # or explicit.
                         path = repo._subtoppath
                     else:
                         # TODO: investigate 'paths.remote:lfsurl' style path customization,
                         # and fall back to inferring from 'paths.remote' if unspecified.
                         path = repo.ui.config(b'paths', b'default') or b''
                     defaulturl = util.url(path)
                     # TODO: support local paths as well.
                     # TODO: consider the ssh -> https transformation that git applies
                     if defaulturl.scheme in (b'http', b'https'):
                         if defaulturl.path and defaulturl.path[:-1] != b'/':
                             defaulturl.path += b'/'
                         defaulturl.path = (defaulturl.path or b'') + b'.git/info/lfs'
                         url = util.url(bytes(defaulturl))
                         repo.ui.note(_(b'lfs: assuming remote store: %s\n') % url)
                 scheme = url.scheme
                 if scheme not in _storemap:
                     raise error.Abort(_(b'lfs: unknown url scheme: %s') % scheme)
                 return _storemap[scheme](repo, url)
             class LfsRemoteError(error.StorageError):
                 pass
             class LfsCorruptionError(error.Abort):
                 """Raised when a corrupt blob is detected, aborting an operation
                 It exists to allow specialized handling on the server side."""

mercurial/__init__.py

0 +1 -9

             # __init__.py - Startup and module loading logic for Mercurial.
             #
             # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import sys
             # Allow 'from mercurial import demandimport' to keep working.
             import hgdemandimport
             demandimport = hgdemandimport
             __all__ = []
             # Python 3 uses a custom module loader that transforms source code between
             # source file reading and compilation. This is done by registering a custom
             # finder that changes the spec for Mercurial modules to use a custom loader.
             if sys.version_info[0] >= 3:
                 import importlib
                 import importlib.abc
                 import io
                 import token
                 import tokenize
                 class hgpathentryfinder(importlib.abc.MetaPathFinder):
                     """A sys.meta_path finder that uses a custom module loader."""
                     def find_spec(self, fullname, path, target=None):
                         # Only handle Mercurial-related modules.
                         if not fullname.startswith(('mercurial.', 'hgext.')):
                             return None
                         # don't try to parse binary
                         if fullname.startswith('mercurial.cext.'):
                             return None
                         # third-party packages are expected to be dual-version clean
                         if fullname.startswith('mercurial.thirdparty'):
                             return None
                         # zstd is already dual-version clean, don't try and mangle it
                         if fullname.startswith('mercurial.zstd'):
                             return None
                         # rustext is built for the right python version,
                         # don't try and mangle it
                         if fullname.startswith('mercurial.rustext'):
                             return None
                         # pywatchman is already dual-version clean, don't try and mangle it
                         if fullname.startswith('hgext.fsmonitor.pywatchman'):
                             return None
                         # Try to find the module using other registered finders.
                         spec = None
                         for finder in sys.meta_path:
                             if finder == self:
                                 continue
                             # Originally the API was a `find_module` method, but it was
                             # renamed to `find_spec` in python 3.4, with a new `target`
                             # argument.
                             find_spec_method = getattr(finder, 'find_spec', None)
                             if find_spec_method:
                                 spec = find_spec_method(fullname, path, target=target)
                             else:
                                 spec = finder.find_module(fullname)
                                 if spec is not None:
                                     spec = importlib.util.spec_from_loader(fullname, spec)
                             if spec:
                                 break
                         # This is a Mercurial-related module but we couldn't find it
                         # using the previously-registered finders. This likely means
                         # the module doesn't exist.
                         if not spec:
                             return None
                         # TODO need to support loaders from alternate specs, like zip
                         # loaders.
                         loader = hgloader(spec.name, spec.origin)
                         # Can't use util.safehasattr here because that would require
                         # importing util, and we're in import code.
                         if hasattr(spec.loader, 'loader'):  # hasattr-py3-only
                             # This is a nested loader (maybe a lazy loader?)
                             spec.loader.loader = loader
                         else:
                             spec.loader = loader
                         return spec
                 def replacetokens(tokens, fullname):
                     """Transform a stream of tokens from raw to Python 3.
                     It is called by the custom module loading machinery to rewrite
                     source/tokens between source decoding and compilation.
                     Returns a generator of possibly rewritten tokens.
                     The input token list may be mutated as part of processing. However,
                     its changes do not necessarily match the output token stream.
                     REMEMBER TO CHANGE ``BYTECODEHEADER`` WHEN CHANGING THIS FUNCTION
                     OR CACHED FILES WON'T GET INVALIDATED PROPERLY.
                     """
                     # The following utility functions access the tokens list and i index of
                     # the for i, t enumerate(tokens) loop below
                     def _isop(j, *o):
                         """Assert that tokens[j] is an OP with one of the given values"""
                         try:
                             return tokens[j].type == token.OP and tokens[j].string in o
                         except IndexError:
                             return False
                     def _findargnofcall(n):
                         """Find arg n of a call expression (start at 0)
                         Returns index of the first token of that argument, or None if
                         there is not that many arguments.
                         Assumes that token[i + 1] is '('.
                         """
                         nested = 0
                         for j in range(i + 2, len(tokens)):
                             if _isop(j, ')', ']', '}'):
                                 # end of call, tuple, subscription or dict / set
                                 nested -= 1
                                 if nested < 0:
                                     return None
                             elif n == 0:
                                 # this is the starting position of arg
                                 return j
                             elif _isop(j, '(', '[', '{'):
                                 nested += 1
                             elif _isop(j, ',') and nested == 0:
                                 n -= 1
                         return None
                     def _ensureunicode(j):
                         """Make sure the token at j is a unicode string
                         This rewrites a string token to include the unicode literal prefix
                         so the string transformer won't add the byte prefix.
                         Ignores tokens that are not strings. Assumes bounds checking has
                         already been done.
                         """
                         st = tokens[j]
                         if st.type == token.STRING and st.string.startswith(("'", '"')):
                             tokens[j] = st._replace(string='u%s' % st.string)
                     for i, t in enumerate(tokens):
                         # This looks like a function call.
                         if t.type == token.NAME and _isop(i + 1, '('):
                             fn = t.string
                             # *attr() builtins don't accept byte strings to 2nd argument.
                             if fn in (
                                 'getattr',
                                 'setattr',
                                 'hasattr',
                                 'safehasattr',
                             ) and not _isop(i - 1, '.'):
                                 arg1idx = _findargnofcall(1)
                                 if arg1idx is not None:
                                     _ensureunicode(arg1idx)
-                            # .encode() and .decode() on str/bytes/unicode don't accept
-                            # byte strings on Python 3.
-                            elif fn in ('encode', 'decode') and _isop(i - 1, '.'):
-                                for argn in range(2):
-                                    argidx = _findargnofcall(argn)
-                                    if argidx is not None:
-                                        _ensureunicode(argidx)
                             # It changes iteritems/values to items/values as they are not
                             # present in Python 3 world.
                             elif fn in ('iteritems', 'itervalues') and not (
                                 tokens[i - 1].type == token.NAME
                                 and tokens[i - 1].string == 'def'
                             ):
                                 yield t._replace(string=fn[4:])
                                 continue
                         # Emit unmodified token.
                         yield t
                 # Header to add to bytecode files. This MUST be changed when
                 # ``replacetoken`` or any mechanism that changes semantics of module
                 # loading is changed. Otherwise cached bytecode may get loaded without
                 # the new transformation mechanisms applied.
-                BYTECODEHEADER = b'HG\x00\x12'
+                BYTECODEHEADER = b'HG\x00\x13'
                 class hgloader(importlib.machinery.SourceFileLoader):
                     """Custom module loader that transforms source code.
                     When the source code is converted to a code object, we transform
                     certain patterns to be Python 3 compatible. This allows us to write code
                     that is natively Python 2 and compatible with Python 3 without
                     making the code excessively ugly.
                     We do this by transforming the token stream between parse and compile.
                     Implementing transformations invalidates caching assumptions made
                     by the built-in importer. The built-in importer stores a header on
                     saved bytecode files indicating the Python/bytecode version. If the
                     version changes, the cached bytecode is ignored. The Mercurial
                     transformations could change at any time. This means we need to check
                     that cached bytecode was generated with the current transformation
                     code or there could be a mismatch between cached bytecode and what
                     would be generated from this class.
                     We supplement the bytecode caching layer by wrapping ``get_data``
                     and ``set_data``. These functions are called when the
                     ``SourceFileLoader`` retrieves and saves bytecode cache files,
                     respectively. We simply add an additional header on the file. As
                     long as the version in this file is changed when semantics change,
                     cached bytecode should be invalidated when transformations change.
                     The added header has the form ``HG<VERSION>``. That is a literal
                     ``HG`` with 2 binary bytes indicating the transformation version.
                     """
                     def get_data(self, path):
                         data = super(hgloader, self).get_data(path)
                         if not path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
                             return data
                         # There should be a header indicating the Mercurial transformation
                         # version. If it doesn't exist or doesn't match the current version,
                         # we raise an OSError because that is what
                         # ``SourceFileLoader.get_code()`` expects when loading bytecode
                         # paths to indicate the cached file is "bad."
                         if data[0:2] != b'HG':
                             raise OSError('no hg header')
                         if data[0:4] != BYTECODEHEADER:
                             raise OSError('hg header version mismatch')
                         return data[4:]
                     def set_data(self, path, data, *args, **kwargs):
                         if path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
                             data = BYTECODEHEADER + data
                         return super(hgloader, self).set_data(path, data, *args, **kwargs)
                     def source_to_code(self, data, path):
                         """Perform token transformation before compilation."""
                         buf = io.BytesIO(data)
                         tokens = tokenize.tokenize(buf.readline)
                         data = tokenize.untokenize(replacetokens(list(tokens), self.name))
                         # Python's built-in importer strips frames from exceptions raised
                         # for this code. Unfortunately, that mechanism isn't extensible
                         # and our frame will be blamed for the import failure. There
                         # are extremely hacky ways to do frame stripping. We haven't
                         # implemented them because they are very ugly.
                         return super(hgloader, self).source_to_code(data, path)
                 # We automagically register our custom importer as a side-effect of
                 # loading. This is necessary to ensure that any entry points are able
                 # to import mercurial.* modules without having to perform this
                 # registration themselves.
                 if not any(isinstance(x, hgpathentryfinder) for x in sys.meta_path):
                     # meta_path is used before any implicit finders and before sys.path.
                     sys.meta_path.insert(0, hgpathentryfinder())

mercurial/pycompat.py

0 +5 -5

             # pycompat.py - portability shim for python 3
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Mercurial portability shim for python 3.
             This contains aliases to hide python version-specific details from the core.
             """
             from __future__ import absolute_import
             import getopt
             import inspect
             import os
             import shlex
             import sys
             import tempfile
             ispy3 = sys.version_info[0] >= 3
             ispypy = r'__pypy__' in sys.builtin_module_names
             if not ispy3:
                 import cookielib
                 import cPickle as pickle
                 import httplib
                 import Queue as queue
                 import SocketServer as socketserver
                 import xmlrpclib
                 from .thirdparty.concurrent import futures
                 def future_set_exception_info(f, exc_info):
                     f.set_exception_info(*exc_info)
             else:
                 import concurrent.futures as futures
                 import http.cookiejar as cookielib
                 import http.client as httplib
                 import pickle
                 import queue as queue
                 import socketserver
                 import xmlrpc.client as xmlrpclib
                 def future_set_exception_info(f, exc_info):
                     f.set_exception(exc_info[0])
             def identity(a):
                 return a
             def _rapply(f, xs):
                 if xs is None:
                     # assume None means non-value of optional data
                     return xs
                 if isinstance(xs, (list, set, tuple)):
                     return type(xs)(_rapply(f, x) for x in xs)
                 if isinstance(xs, dict):
                     return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
                 return f(xs)
             def rapply(f, xs):
                 """Apply function recursively to every item preserving the data structure
                 >>> def f(x):
                 ...     return 'f(%s)' % x
                 >>> rapply(f, None) is None
                 True
                 >>> rapply(f, 'a')
                 'f(a)'
                 >>> rapply(f, {'a'}) == {'f(a)'}
                 True
                 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
                 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
                 >>> xs = [object()]
                 >>> rapply(identity, xs) is xs
                 True
                 """
                 if f is identity:
                     # fast path mainly for py2
                     return xs
                 return _rapply(f, xs)
             if ispy3:
                 import builtins
                 import functools
                 import io
                 import struct
                 fsencode = os.fsencode
                 fsdecode = os.fsdecode
                 oscurdir = os.curdir.encode('ascii')
                 oslinesep = os.linesep.encode('ascii')
                 osname = os.name.encode('ascii')
                 ospathsep = os.pathsep.encode('ascii')
                 ospardir = os.pardir.encode('ascii')
                 ossep = os.sep.encode('ascii')
                 osaltsep = os.altsep
                 if osaltsep:
                     osaltsep = osaltsep.encode('ascii')
                 sysplatform = sys.platform.encode('ascii')
                 sysexecutable = sys.executable
                 if sysexecutable:
                     sysexecutable = os.fsencode(sysexecutable)
                 bytesio = io.BytesIO
                 # TODO deprecate stringio name, as it is a lie on Python 3.
                 stringio = bytesio
                 def maplist(*args):
                     return list(map(*args))
                 def rangelist(*args):
                     return list(range(*args))
                 def ziplist(*args):
                     return list(zip(*args))
                 rawinput = input
                 getargspec = inspect.getfullargspec
                 long = int
                 # TODO: .buffer might not exist if std streams were replaced; we'll need
                 # a silly wrapper to make a bytes stream backed by a unicode one.
                 stdin = sys.stdin.buffer
                 stdout = sys.stdout.buffer
                 stderr = sys.stderr.buffer
                 # Since Python 3 converts argv to wchar_t type by Py_DecodeLocale() on Unix,
                 # we can use os.fsencode() to get back bytes argv.
                 #
                 # https://hg.python.org/cpython/file/v3.5.1/Programs/python.c#l55
                 #
                 # TODO: On Windows, the native argv is wchar_t, so we'll need a different
                 # workaround to simulate the Python 2 (i.e. ANSI Win32 API) behavior.
                 if getattr(sys, 'argv', None) is not None:
                     sysargv = list(map(os.fsencode, sys.argv))
                 bytechr = struct.Struct(r'>B').pack
                 byterepr = b'%r'.__mod__
                 class bytestr(bytes):
                     """A bytes which mostly acts as a Python 2 str
                     >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
                     ('', 'foo', 'ascii', '1')
                     >>> s = bytestr(b'foo')
                     >>> assert s is bytestr(s)
                     __bytes__() should be called if provided:
                     >>> class bytesable(object):
                     ...     def __bytes__(self):
                     ...         return b'bytes'
                     >>> bytestr(bytesable())
                     'bytes'
                     There's no implicit conversion from non-ascii str as its encoding is
                     unknown:
                     >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
                     Traceback (most recent call last):
                       ...
                     UnicodeEncodeError: ...
                     Comparison between bytestr and bytes should work:
                     >>> assert bytestr(b'foo') == b'foo'
                     >>> assert b'foo' == bytestr(b'foo')
                     >>> assert b'f' in bytestr(b'foo')
                     >>> assert bytestr(b'f') in b'foo'
                     Sliced elements should be bytes, not integer:
                     >>> s[1], s[:2]
                     (b'o', b'fo')
                     >>> list(s), list(reversed(s))
                     ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
                     As bytestr type isn't propagated across operations, you need to cast
                     bytes to bytestr explicitly:
                     >>> s = bytestr(b'foo').upper()
                     >>> t = bytestr(s)
                     >>> s[0], t[0]
                     (70, b'F')
                     Be careful to not pass a bytestr object to a function which expects
                     bytearray-like behavior.
                     >>> t = bytes(t)  # cast to bytes
                     >>> assert type(t) is bytes
                     """
                     def __new__(cls, s=b''):
                         if isinstance(s, bytestr):
                             return s
                         if not isinstance(
                             s, (bytes, bytearray)
                         ) and not hasattr(  # hasattr-py3-only
                             s, u'__bytes__'
                         ):
-                            s = str(s).encode(u'ascii')
+                            s = str(s).encode('ascii')
                         return bytes.__new__(cls, s)
                     def __getitem__(self, key):
                         s = bytes.__getitem__(self, key)
                         if not isinstance(s, bytes):
                             s = bytechr(s)
                         return s
                     def __iter__(self):
                         return iterbytestr(bytes.__iter__(self))
                     def __repr__(self):
                         return bytes.__repr__(self)[1:]  # drop b''
                 def iterbytestr(s):
                     """Iterate bytes as if it were a str object of Python 2"""
                     return map(bytechr, s)
                 def maybebytestr(s):
                     """Promote bytes to bytestr"""
                     if isinstance(s, bytes):
                         return bytestr(s)
                     return s
                 def sysbytes(s):
                     """Convert an internal str (e.g. keyword, __doc__) back to bytes
                     This never raises UnicodeEncodeError, but only ASCII characters
                     can be round-trip by sysstr(sysbytes(s)).
                     """
-                    return s.encode(u'utf-8')
+                    return s.encode('utf-8')
                 def sysstr(s):
                     """Return a keyword str to be passed to Python functions such as
                     getattr() and str.encode()
                     This never raises UnicodeDecodeError. Non-ascii characters are
                     considered invalid and mapped to arbitrary but unique code points
                     such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
                     """
                     if isinstance(s, builtins.str):
                         return s
-                    return s.decode(u'latin-1')
+                    return s.decode('latin-1')
                 def strurl(url):
                     """Converts a bytes url back to str"""
                     if isinstance(url, bytes):
-                        return url.decode(u'ascii')
+                        return url.decode('ascii')
                     return url
                 def bytesurl(url):
                     """Converts a str url to bytes by encoding in ascii"""
                     if isinstance(url, str):
-                        return url.encode(u'ascii')
+                        return url.encode('ascii')
                     return url
                 def raisewithtb(exc, tb):
                     """Raise exception with the given traceback"""
                     raise exc.with_traceback(tb)
                 def getdoc(obj):
                     """Get docstring as bytes; may be None so gettext() won't confuse it
                     with _('')"""
                     doc = getattr(obj, u'__doc__', None)
                     if doc is None:
                         return doc
                     return sysbytes(doc)
                 def _wrapattrfunc(f):
                     @functools.wraps(f)
                     def w(object, name, *args):
                         return f(object, sysstr(name), *args)
                     return w
                 # these wrappers are automagically imported by hgloader
                 delattr = _wrapattrfunc(builtins.delattr)
                 getattr = _wrapattrfunc(builtins.getattr)
                 hasattr = _wrapattrfunc(builtins.hasattr)
                 setattr = _wrapattrfunc(builtins.setattr)
                 xrange = builtins.range
                 unicode = str
                 def open(name, mode=b'r', buffering=-1, encoding=None):
                     return builtins.open(name, sysstr(mode), buffering, encoding)
                 safehasattr = _wrapattrfunc(builtins.hasattr)
                 def _getoptbwrapper(orig, args, shortlist, namelist):
                     """
                     Takes bytes arguments, converts them to unicode, pass them to
                     getopt.getopt(), convert the returned values back to bytes and then
                     return them for Python 3 compatibility as getopt.getopt() don't accepts
                     bytes on Python 3.
                     """
                     args = [a.decode('latin-1') for a in args]
                     shortlist = shortlist.decode('latin-1')
                     namelist = [a.decode('latin-1') for a in namelist]
                     opts, args = orig(args, shortlist, namelist)
                     opts = [(a[0].encode('latin-1'), a[1].encode('latin-1')) for a in opts]
                     args = [a.encode('latin-1') for a in args]
                     return opts, args
                 def strkwargs(dic):
                     """
                     Converts the keys of a python dictonary to str i.e. unicodes so that
                     they can be passed as keyword arguments as dictonaries with bytes keys
                     can't be passed as keyword arguments to functions on Python 3.
                     """
                     dic = dict((k.decode('latin-1'), v) for k, v in dic.iteritems())
                     return dic
                 def byteskwargs(dic):
                     """
                     Converts keys of python dictonaries to bytes as they were converted to
                     str to pass that dictonary as a keyword argument on Python 3.
                     """
                     dic = dict((k.encode('latin-1'), v) for k, v in dic.iteritems())
                     return dic
                 # TODO: handle shlex.shlex().
                 def shlexsplit(s, comments=False, posix=True):
                     """
                     Takes bytes argument, convert it to str i.e. unicodes, pass that into
                     shlex.split(), convert the returned value to bytes and return that for
                     Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
                     """
                     ret = shlex.split(s.decode('latin-1'), comments, posix)
                     return [a.encode('latin-1') for a in ret]
                 shlexquote = shlex.quote
             else:
                 import cStringIO
                 import pipes
                 xrange = xrange
                 unicode = unicode
                 bytechr = chr
                 byterepr = repr
                 bytestr = str
                 iterbytestr = iter
                 maybebytestr = identity
                 sysbytes = identity
                 sysstr = identity
                 strurl = identity
                 bytesurl = identity
                 open = open
                 delattr = delattr
                 getattr = getattr
                 hasattr = hasattr
                 setattr = setattr
                 # this can't be parsed on Python 3
                 exec(b'def raisewithtb(exc, tb):\n' b'    raise exc, None, tb\n')
                 def fsencode(filename):
                     """
                     Partial backport from os.py in Python 3, which only accepts bytes.
                     In Python 2, our paths should only ever be bytes, a unicode path
                     indicates a bug.
                     """
                     if isinstance(filename, str):
                         return filename
                     else:
                         raise TypeError(r"expect str, not %s" % type(filename).__name__)
                 # In Python 2, fsdecode() has a very chance to receive bytes. So it's
                 # better not to touch Python 2 part as it's already working fine.
                 fsdecode = identity
                 def getdoc(obj):
                     return getattr(obj, '__doc__', None)
                 _notset = object()
                 def safehasattr(thing, attr):
                     return getattr(thing, attr, _notset) is not _notset
                 def _getoptbwrapper(orig, args, shortlist, namelist):
                     return orig(args, shortlist, namelist)
                 strkwargs = identity
                 byteskwargs = identity
                 oscurdir = os.curdir
                 oslinesep = os.linesep
                 osname = os.name
                 ospathsep = os.pathsep
                 ospardir = os.pardir
                 ossep = os.sep
                 osaltsep = os.altsep
                 long = long
                 stdin = sys.stdin
                 stdout = sys.stdout
                 stderr = sys.stderr
                 if getattr(sys, 'argv', None) is not None:
                     sysargv = sys.argv
                 sysplatform = sys.platform
                 sysexecutable = sys.executable
                 shlexsplit = shlex.split
                 shlexquote = pipes.quote
                 bytesio = cStringIO.StringIO
                 stringio = bytesio
                 maplist = map
                 rangelist = range
                 ziplist = zip
                 rawinput = raw_input
                 getargspec = inspect.getargspec
             isjython = sysplatform.startswith(b'java')
             isdarwin = sysplatform.startswith(b'darwin')
             islinux = sysplatform.startswith(b'linux')
             isposix = osname == b'posix'
             iswindows = osname == b'nt'
             def getoptb(args, shortlist, namelist):
                 return _getoptbwrapper(getopt.getopt, args, shortlist, namelist)
             def gnugetoptb(args, shortlist, namelist):
                 return _getoptbwrapper(getopt.gnu_getopt, args, shortlist, namelist)
             def mkdtemp(suffix=b'', prefix=b'tmp', dir=None):
                 return tempfile.mkdtemp(suffix, prefix, dir)
             # text=True is not supported; use util.from/tonativeeol() instead
             def mkstemp(suffix=b'', prefix=b'tmp', dir=None):
                 return tempfile.mkstemp(suffix, prefix, dir)
             # mode must include 'b'ytes as encoding= is not supported
             def namedtempfile(
                 mode=b'w+b', bufsize=-1, suffix=b'', prefix=b'tmp', dir=None, delete=True
             ):
                 mode = sysstr(mode)
                 assert r'b' in mode
                 return tempfile.NamedTemporaryFile(
                     mode, bufsize, suffix=suffix, prefix=prefix, dir=dir, delete=delete
                 )

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages