##// END OF EJS Templates
py3: add a bytes version of urllib.parse.urlencode() to pycompat.py...
py3: add a bytes version of urllib.parse.urlencode() to pycompat.py urllib.parse.urlencode() returns unicodes on Python 3. This commit adds a method which will take its output and encode it to bytes so that we can use bytes consistently.

File last commit:

r31842:c130d092 default
r31842:c130d092 default
Show More
pycompat.py
409 lines | 12.2 KiB | text/x-python | PythonLexer
timeless
pycompat: add empty and queue to handle py3 divergence...
r28818 # pycompat.py - portability shim for python 3
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
"""Mercurial portability shim for python 3.
This contains aliases to hide python version-specific details from the core.
"""
from __future__ import absolute_import
Pulkit Goyal
py3: make a bytes version of getopt.getopt()...
r30578 import getopt
Pulkit Goyal
py3: add a bytes version of os.name...
r30302 import os
Pulkit Goyal
py3: have a bytes version of shlex.split()...
r30678 import shlex
Pulkit Goyal
pycompat: make pycompat demandimport friendly...
r29584 import sys
Yuya Nishihara
pycompat: provide 'ispy3' constant...
r30030 ispy3 = (sys.version_info[0] >= 3)
if not ispy3:
Pulkit Goyal
py3: conditionalize cPickle import by adding in util...
r29324 import cPickle as pickle
Pulkit Goyal
py3: conditionalize httplib import...
r29455 import httplib
Pulkit Goyal
pycompat: make pycompat demandimport friendly...
r29584 import Queue as _queue
Pulkit Goyal
py3: conditionalize SocketServer import...
r29433 import SocketServer as socketserver
Pulkit Goyal
py3: conditionalize xmlrpclib import...
r29432 import xmlrpclib
Pulkit Goyal
pycompat: make pycompat demandimport friendly...
r29584 else:
import http.client as httplib
import pickle
import queue as _queue
import socketserver
Pulkit Goyal
py3: conditionalize xmlrpclib import...
r29432 import xmlrpc.client as xmlrpclib
Pulkit Goyal
py3: conditionalize the urlparse import...
r29431
Yuya Nishihara
pycompat: introduce identity function as a compat stub...
r31774 def identity(a):
return a
Yuya Nishihara
pycompat: provide 'ispy3' constant...
r30030 if ispy3:
Yuya Nishihara
py3: move xrange alias next to import lines...
r29797 import builtins
Yuya Nishihara
py3: provide (del|get|has|set)attr wrappers that accepts bytes...
r29799 import functools
Yuya Nishihara
pycompat: move imports of cStringIO/io to where they are used...
r31372 import io
Martin von Zweigbergk
py3: optimize py3 compat.bytechr using Struct.pack...
r31424 import struct
Yuya Nishihara
pycompat: move imports of cStringIO/io to where they are used...
r31372
Martijn Pieters
py3: add an os.fsencode backport to ease path handling
r30119 fsencode = os.fsencode
Pulkit Goyal
py3: add os.fsdecode() as pycompat.fsdecode()...
r30300 fsdecode = os.fsdecode
Pulkit Goyal
py3: add a bytes version of os.name...
r30302 # A bytes version of os.name.
Yuya Nishihara
pycompat: provide bytes os.linesep
r31775 oslinesep = os.linesep.encode('ascii')
Pulkit Goyal
py3: add a bytes version of os.name...
r30302 osname = os.name.encode('ascii')
Pulkit Goyal
py3: have pycompat.ospathsep and pycompat.ossep...
r30303 ospathsep = os.pathsep.encode('ascii')
ossep = os.sep.encode('ascii')
Pulkit Goyal
py3: have a bytes version of os.altsep...
r30623 osaltsep = os.altsep
if osaltsep:
osaltsep = osaltsep.encode('ascii')
Pulkit Goyal
py3: add os.getcwdb() to have bytes path...
r30500 # os.getcwd() on Python 3 returns string, but it has os.getcwdb() which
# returns bytes.
getcwd = os.getcwdb
Pulkit Goyal
py3: have a bytes version of sys.platform...
r30624 sysplatform = sys.platform.encode('ascii')
Pulkit Goyal
py3: have bytes version of sys.executable...
r30668 sysexecutable = sys.executable
if sysexecutable:
sysexecutable = os.fsencode(sysexecutable)
Pulkit Goyal
pycompat: default to BytesIO instead of StringIO
r31359 stringio = io.BytesIO
Augie Fackler
pycompat: add maplist alias for old map behavior
r31501 maplist = lambda *args: list(map(*args))
Yuya Nishihara
py3: document why os.fsencode() can be used to get back bytes argv...
r30334
Yuya Nishihara
py3: provide bytes stdin/out/err through util module...
r30472 # TODO: .buffer might not exist if std streams were replaced; we'll need
# a silly wrapper to make a bytes stream backed by a unicode one.
stdin = sys.stdin.buffer
stdout = sys.stdout.buffer
stderr = sys.stderr.buffer
Yuya Nishihara
py3: document why os.fsencode() can be used to get back bytes argv...
r30334 # Since Python 3 converts argv to wchar_t type by Py_DecodeLocale() on Unix,
# we can use os.fsencode() to get back bytes argv.
#
# https://hg.python.org/cpython/file/v3.5.1/Programs/python.c#l55
#
# TODO: On Windows, the native argv is wchar_t, so we'll need a different
# workaround to simulate the Python 2 (i.e. ANSI Win32 API) behavior.
Augie Fackler
pycompat: verify sys.argv exists before forwarding it (issue5493)...
r31277 if getattr(sys, 'argv', None) is not None:
sysargv = list(map(os.fsencode, sys.argv))
Yuya Nishihara
py3: move xrange alias next to import lines...
r29797
Martin von Zweigbergk
py3: optimize py3 compat.bytechr using Struct.pack...
r31424 bytechr = struct.Struct('>B').pack
Yuya Nishihara
py3: factor out bytechr() function...
r31253
Yuya Nishihara
pycompat: add bytestr wrapper which mostly acts as a Python 2 str...
r31439 class bytestr(bytes):
"""A bytes which mostly acts as a Python 2 str
>>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
(b'', b'foo', b'ascii', b'1')
>>> s = bytestr(b'foo')
>>> assert s is bytestr(s)
There's no implicit conversion from non-ascii str as its encoding is
unknown:
>>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
Traceback (most recent call last):
...
UnicodeEncodeError: ...
Comparison between bytestr and bytes should work:
>>> assert bytestr(b'foo') == b'foo'
>>> assert b'foo' == bytestr(b'foo')
>>> assert b'f' in bytestr(b'foo')
>>> assert bytestr(b'f') in b'foo'
Sliced elements should be bytes, not integer:
>>> s[1], s[:2]
(b'o', b'fo')
>>> list(s), list(reversed(s))
([b'f', b'o', b'o'], [b'o', b'o', b'f'])
As bytestr type isn't propagated across operations, you need to cast
bytes to bytestr explicitly:
>>> s = bytestr(b'foo').upper()
>>> t = bytestr(s)
>>> s[0], t[0]
(70, b'F')
Be careful to not pass a bytestr object to a function which expects
bytearray-like behavior.
>>> t = bytes(t) # cast to bytes
>>> assert type(t) is bytes
"""
def __new__(cls, s=b''):
if isinstance(s, bytestr):
return s
if not isinstance(s, (bytes, bytearray)):
s = str(s).encode(u'ascii')
return bytes.__new__(cls, s)
def __getitem__(self, key):
s = bytes.__getitem__(self, key)
if not isinstance(s, bytes):
s = bytechr(s)
return s
def __iter__(self):
return iterbytestr(bytes.__iter__(self))
Yuya Nishihara
pycompat: add helper to iterate each char in bytes
r31382 def iterbytestr(s):
"""Iterate bytes as if it were a str object of Python 2"""
Martin von Zweigbergk
py3: make py3 compat.iterbytestr simpler and faster...
r31425 return map(bytechr, s)
Yuya Nishihara
pycompat: add helper to iterate each char in bytes
r31382
Yuya Nishihara
py3: have registrar process docstrings in bytes...
r31820 def sysbytes(s):
"""Convert an internal str (e.g. keyword, __doc__) back to bytes
This never raises UnicodeEncodeError, but only ASCII characters
can be round-trip by sysstr(sysbytes(s)).
"""
return s.encode(u'utf-8')
Yuya Nishihara
pycompat: extract function that converts attribute or encoding name to str...
r30032 def sysstr(s):
"""Return a keyword str to be passed to Python functions such as
getattr() and str.encode()
This never raises UnicodeDecodeError. Non-ascii characters are
considered invalid and mapped to arbitrary but unique code points
such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
"""
if isinstance(s, builtins.str):
return s
return s.decode(u'latin-1')
Yuya Nishihara
py3: provide (del|get|has|set)attr wrappers that accepts bytes...
r29799 def _wrapattrfunc(f):
@functools.wraps(f)
def w(object, name, *args):
Yuya Nishihara
pycompat: extract function that converts attribute or encoding name to str...
r30032 return f(object, sysstr(name), *args)
Yuya Nishihara
py3: provide (del|get|has|set)attr wrappers that accepts bytes...
r29799 return w
Yuya Nishihara
py3: import builtin wrappers automagically by code transformer...
r29800 # these wrappers are automagically imported by hgloader
Yuya Nishihara
py3: provide (del|get|has|set)attr wrappers that accepts bytes...
r29799 delattr = _wrapattrfunc(builtins.delattr)
getattr = _wrapattrfunc(builtins.getattr)
hasattr = _wrapattrfunc(builtins.hasattr)
setattr = _wrapattrfunc(builtins.setattr)
Yuya Nishihara
py3: import builtin wrappers automagically by code transformer...
r29800 xrange = builtins.range
Yuya Nishihara
py3: provide (del|get|has|set)attr wrappers that accepts bytes...
r29799
Pulkit Goyal
py3: add pycompat.open and replace open() calls...
r31149 def open(name, mode='r', buffering=-1):
return builtins.open(name, sysstr(mode), buffering)
Pulkit Goyal
py3: make a bytes version of getopt.getopt()...
r30578 # getopt.getopt() on Python 3 deals with unicodes internally so we cannot
# pass bytes there. Passing unicodes will result in unicodes as return
# values which we need to convert again to bytes.
def getoptb(args, shortlist, namelist):
args = [a.decode('latin-1') for a in args]
shortlist = shortlist.decode('latin-1')
namelist = [a.decode('latin-1') for a in namelist]
opts, args = getopt.getopt(args, shortlist, namelist)
opts = [(a[0].encode('latin-1'), a[1].encode('latin-1'))
for a in opts]
args = [a.encode('latin-1') for a in args]
return opts, args
Pulkit Goyal
py3: utility functions to convert keys of kwargs to bytes/unicodes...
r30579 # keys of keyword arguments in Python need to be strings which are unicodes
# Python 3. This function takes keyword arguments, convert the keys to str.
def strkwargs(dic):
dic = dict((k.decode('latin-1'), v) for k, v in dic.iteritems())
return dic
# keys of keyword arguments need to be unicode while passing into
# a function. This function helps us to convert those keys back to bytes
# again as we need to deal with bytes.
def byteskwargs(dic):
dic = dict((k.encode('latin-1'), v) for k, v in dic.iteritems())
return dic
Pulkit Goyal
py3: have a bytes version of shlex.split()...
r30678 # shlex.split() accepts unicodes on Python 3. This function takes bytes
# argument, convert it into unicodes, pass into shlex.split(), convert the
# returned value to bytes and return that.
# TODO: handle shlex.shlex().
def shlexsplit(s):
ret = shlex.split(s.decode('latin-1'))
return [a.encode('latin-1') for a in ret]
Yuya Nishihara
pycompat: extract function that converts attribute or encoding name to str...
r30032 else:
Yuya Nishihara
pycompat: move imports of cStringIO/io to where they are used...
r31372 import cStringIO
Yuya Nishihara
py3: factor out bytechr() function...
r31253 bytechr = chr
Yuya Nishihara
pycompat: add bytestr wrapper which mostly acts as a Python 2 str...
r31439 bytestr = str
Yuya Nishihara
pycompat: add helper to iterate each char in bytes
r31382 iterbytestr = iter
Yuya Nishihara
py3: have registrar process docstrings in bytes...
r31820 sysbytes = identity
Yuya Nishihara
pycompat: introduce identity function as a compat stub...
r31774 sysstr = identity
Yuya Nishihara
pycompat: extract function that converts attribute or encoding name to str...
r30032
Martijn Pieters
pycompat: only accept a bytestring filepath in Python 2
r30133 # Partial backport from os.py in Python 3, which only accepts bytes.
# In Python 2, our paths should only ever be bytes, a unicode path
# indicates a bug.
def fsencode(filename):
if isinstance(filename, str):
return filename
Martijn Pieters
py3: add an os.fsencode backport to ease path handling
r30119 else:
Martijn Pieters
pycompat: only accept a bytestring filepath in Python 2
r30133 raise TypeError(
"expect str, not %s" % type(filename).__name__)
Martijn Pieters
py3: add an os.fsencode backport to ease path handling
r30119
Pulkit Goyal
py3: add os.fsdecode() as pycompat.fsdecode()...
r30300 # In Python 2, fsdecode() has a very chance to receive bytes. So it's
# better not to touch Python 2 part as it's already working fine.
Yuya Nishihara
pycompat: introduce identity function as a compat stub...
r31774 fsdecode = identity
Pulkit Goyal
py3: add os.fsdecode() as pycompat.fsdecode()...
r30300
Pulkit Goyal
py3: make a bytes version of getopt.getopt()...
r30578 def getoptb(args, shortlist, namelist):
return getopt.getopt(args, shortlist, namelist)
Yuya Nishihara
pycompat: introduce identity function as a compat stub...
r31774 strkwargs = identity
byteskwargs = identity
Pulkit Goyal
py3: utility functions to convert keys of kwargs to bytes/unicodes...
r30579
Yuya Nishihara
pycompat: provide bytes os.linesep
r31775 oslinesep = os.linesep
Pulkit Goyal
py3: add a bytes version of os.name...
r30302 osname = os.name
Pulkit Goyal
py3: have pycompat.ospathsep and pycompat.ossep...
r30303 ospathsep = os.pathsep
ossep = os.sep
Pulkit Goyal
py3: have a bytes version of os.altsep...
r30623 osaltsep = os.altsep
Yuya Nishihara
py3: provide bytes stdin/out/err through util module...
r30472 stdin = sys.stdin
stdout = sys.stdout
stderr = sys.stderr
Augie Fackler
pycompat: verify sys.argv exists before forwarding it (issue5493)...
r31277 if getattr(sys, 'argv', None) is not None:
sysargv = sys.argv
Pulkit Goyal
py3: have a bytes version of sys.platform...
r30624 sysplatform = sys.platform
Pulkit Goyal
py3: add os.getcwdb() to have bytes path...
r30500 getcwd = os.getcwd
Pulkit Goyal
py3: have bytes version of sys.executable...
r30668 sysexecutable = sys.executable
Pulkit Goyal
py3: have a bytes version of shlex.split()...
r30678 shlexsplit = shlex.split
Yuya Nishihara
pycompat: move imports of cStringIO/io to where they are used...
r31372 stringio = cStringIO.StringIO
Augie Fackler
pycompat: add maplist alias for old map behavior
r31501 maplist = map
Pulkit Goyal
py3: add a bytes version of os.name...
r30302
timeless
pycompat: add empty and queue to handle py3 divergence...
r28818 empty = _queue.Empty
queue = _queue.Queue
timeless
pycompat: alias xrange to range in py3
r28834
timeless
pycompat: add util.urlerr util.urlreq classes for py3 compat...
r28882 class _pycompatstub(object):
Yuya Nishihara
pycompat: delay loading modules registered to stub...
r29801 def __init__(self):
self._aliases = {}
timeless
pycompat: add util.urlerr util.urlreq classes for py3 compat...
r28882
Yuya Nishihara
pycompat: delay loading modules registered to stub...
r29801 def _registeraliases(self, origin, items):
"""Add items that will be populated at the first access"""
Augie Fackler
pycompat: when setting attrs, ensure we use sysstr...
r30086 items = map(sysstr, items)
self._aliases.update(
(item.replace(sysstr('_'), sysstr('')).lower(), (origin, item))
for item in items)
timeless
pycompat: add util.urlerr util.urlreq classes for py3 compat...
r28882
Gregory Szorc
pycompat: alias urlreq.unquote to unquote_to_bytes...
r31566 def _registeralias(self, origin, attr, name):
"""Alias ``origin``.``attr`` as ``name``"""
self._aliases[sysstr(name)] = (origin, sysstr(attr))
Yuya Nishihara
pycompat: delay loading modules registered to stub...
r29801 def __getattr__(self, name):
timeless
pycompat: add util.urlerr util.urlreq classes for py3 compat...
r28882 try:
Yuya Nishihara
pycompat: delay loading modules registered to stub...
r29801 origin, item = self._aliases[name]
except KeyError:
raise AttributeError(name)
self.__dict__[name] = obj = getattr(origin, item)
return obj
timeless
pycompat: add util.urlerr util.urlreq classes for py3 compat...
r28882
Pulkit Goyal
py3: conditionalize BaseHTTPServer, SimpleHTTPServer and CGIHTTPServer import...
r29566 httpserver = _pycompatstub()
timeless
pycompat: add util.urlerr util.urlreq classes for py3 compat...
r28882 urlreq = _pycompatstub()
urlerr = _pycompatstub()
Yuya Nishihara
pycompat: provide 'ispy3' constant...
r30030 if not ispy3:
Pulkit Goyal
py3: conditionalize BaseHTTPServer, SimpleHTTPServer and CGIHTTPServer import...
r29566 import BaseHTTPServer
import CGIHTTPServer
import SimpleHTTPServer
timeless
pycompat: add util.urlerr util.urlreq classes for py3 compat...
r28882 import urllib2
import urllib
Gregory Szorc
pycompat: define urlreq.urlparse and urlreq.unparse aliases...
r31569 import urlparse
Yuya Nishihara
pycompat: delay loading modules registered to stub...
r29801 urlreq._registeraliases(urllib, (
timeless
pycompat: add util.urlerr util.urlreq classes for py3 compat...
r28882 "addclosehook",
"addinfourl",
"ftpwrapper",
"pathname2url",
"quote",
"splitattr",
"splitpasswd",
"splitport",
"splituser",
"unquote",
"url2pathname",
"urlencode",
))
Yuya Nishihara
pycompat: delay loading modules registered to stub...
r29801 urlreq._registeraliases(urllib2, (
timeless
pycompat: add util.urlerr util.urlreq classes for py3 compat...
r28882 "AbstractHTTPHandler",
"BaseHandler",
"build_opener",
"FileHandler",
"FTPHandler",
"HTTPBasicAuthHandler",
"HTTPDigestAuthHandler",
"HTTPHandler",
"HTTPPasswordMgrWithDefaultRealm",
"HTTPSHandler",
"install_opener",
"ProxyHandler",
"Request",
"urlopen",
))
Gregory Szorc
pycompat: define urlreq.urlparse and urlreq.unparse aliases...
r31569 urlreq._registeraliases(urlparse, (
"urlparse",
"urlunparse",
))
Yuya Nishihara
pycompat: delay loading modules registered to stub...
r29801 urlerr._registeraliases(urllib2, (
timeless
pycompat: add util.urlerr util.urlreq classes for py3 compat...
r28882 "HTTPError",
"URLError",
))
Yuya Nishihara
pycompat: delay loading modules registered to stub...
r29801 httpserver._registeraliases(BaseHTTPServer, (
Pulkit Goyal
py3: conditionalize BaseHTTPServer, SimpleHTTPServer and CGIHTTPServer import...
r29566 "HTTPServer",
"BaseHTTPRequestHandler",
))
Yuya Nishihara
pycompat: delay loading modules registered to stub...
r29801 httpserver._registeraliases(SimpleHTTPServer, (
Pulkit Goyal
py3: conditionalize BaseHTTPServer, SimpleHTTPServer and CGIHTTPServer import...
r29566 "SimpleHTTPRequestHandler",
))
Yuya Nishihara
pycompat: delay loading modules registered to stub...
r29801 httpserver._registeraliases(CGIHTTPServer, (
Pulkit Goyal
py3: conditionalize BaseHTTPServer, SimpleHTTPServer and CGIHTTPServer import...
r29566 "CGIHTTPRequestHandler",
))
timeless
pycompat: add util.urlerr util.urlreq classes for py3 compat...
r28882
Yuya Nishihara
pycompat: delay loading modules registered to stub...
r29801 else:
Gregory Szorc
pycompat: alias urllib symbols directly...
r31399 import urllib.parse
urlreq._registeraliases(urllib.parse, (
"splitattr",
"splitpasswd",
"splitport",
"splituser",
Gregory Szorc
pycompat: define urlreq.urlparse and urlreq.unparse aliases...
r31569 "urlparse",
"urlunparse",
Gregory Szorc
pycompat: alias urllib symbols directly...
r31399 ))
Gregory Szorc
pycompat: alias urlreq.unquote to unquote_to_bytes...
r31566 urlreq._registeralias(urllib.parse, "unquote_to_bytes", "unquote")
timeless
pycompat: add util.urlerr util.urlreq classes for py3 compat...
r28882 import urllib.request
Yuya Nishihara
pycompat: delay loading modules registered to stub...
r29801 urlreq._registeraliases(urllib.request, (
timeless
pycompat: add util.urlerr util.urlreq classes for py3 compat...
r28882 "AbstractHTTPHandler",
"BaseHandler",
"build_opener",
"FileHandler",
"FTPHandler",
"ftpwrapper",
"HTTPHandler",
"HTTPSHandler",
"install_opener",
"pathname2url",
"HTTPBasicAuthHandler",
"HTTPDigestAuthHandler",
Gregory Szorc
pycompat: add HTTPPasswordMgrWithDefaultRealm to Python 3 block...
r29414 "HTTPPasswordMgrWithDefaultRealm",
timeless
pycompat: add util.urlerr util.urlreq classes for py3 compat...
r28882 "ProxyHandler",
"Request",
"url2pathname",
"urlopen",
))
Gregory Szorc
pycompat: alias urllib symbols directly...
r31399 import urllib.response
urlreq._registeraliases(urllib.response, (
"addclosehook",
"addinfourl",
))
timeless
pycompat: add util.urlerr util.urlreq classes for py3 compat...
r28882 import urllib.error
Yuya Nishihara
pycompat: delay loading modules registered to stub...
r29801 urlerr._registeraliases(urllib.error, (
timeless
pycompat: add util.urlerr util.urlreq classes for py3 compat...
r28882 "HTTPError",
"URLError",
))
Pulkit Goyal
py3: conditionalize BaseHTTPServer, SimpleHTTPServer and CGIHTTPServer import...
r29566 import http.server
Yuya Nishihara
pycompat: delay loading modules registered to stub...
r29801 httpserver._registeraliases(http.server, (
Pulkit Goyal
py3: conditionalize BaseHTTPServer, SimpleHTTPServer and CGIHTTPServer import...
r29566 "HTTPServer",
"BaseHTTPRequestHandler",
"SimpleHTTPRequestHandler",
"CGIHTTPRequestHandler",
))
Gregory Szorc
pycompat: custom implementation of urllib.parse.quote()...
r31400
# urllib.parse.quote() accepts both str and bytes, decodes bytes
# (if necessary), and returns str. This is wonky. We provide a custom
# implementation that only accepts bytes and emits bytes.
def quote(s, safe=r'/'):
s = urllib.parse.quote_from_bytes(s, safe=safe)
return s.encode('ascii', 'strict')
Pulkit Goyal
py3: add a bytes version of urllib.parse.urlencode() to pycompat.py...
r31842 # urllib.parse.urlencode() returns str. We use this function to make
# sure we return bytes.
def urlencode(query, doseq=False):
s = urllib.parse.urlencode(query, doseq=doseq)
return s.encode('ascii')
Gregory Szorc
pycompat: custom implementation of urllib.parse.quote()...
r31400 urlreq.quote = quote
Pulkit Goyal
py3: add a bytes version of urllib.parse.urlencode() to pycompat.py...
r31842 urlreq.urlencode = urlencode