# HG changeset patch # User Augie Fackler # Date 2016-06-27 15:53:50 # Node ID 456609cbd840e43fc4aba3fb904b808b94174323 # Parent 9e8d258708bb5dd8bbc51664314566287bfe3298 httpclient: update to 54868ef054d2 of httpplus As of that revision, httpplus fully supports Python 3, including mimicing all the subtle behavior changes around headers in Python 3's http.client. diff --git a/mercurial/httpclient/__init__.py b/mercurial/httpclient/__init__.py --- a/mercurial/httpclient/__init__.py +++ b/mercurial/httpclient/__init__.py @@ -40,13 +40,15 @@ from __future__ import absolute_import # Many functions in this file have too many arguments. # pylint: disable=R0913 - +import email +import email.message import errno import inspect import logging -import rfc822 import select import socket +import ssl +import sys try: import cStringIO as io @@ -62,15 +64,14 @@ except ImportError: from . import ( _readers, - socketutil, ) logger = logging.getLogger(__name__) __all__ = ['HTTPConnection', 'HTTPResponse'] -HTTP_VER_1_0 = 'HTTP/1.0' -HTTP_VER_1_1 = 'HTTP/1.1' +HTTP_VER_1_0 = b'HTTP/1.0' +HTTP_VER_1_1 = b'HTTP/1.1' OUTGOING_BUFFER_SIZE = 1 << 15 INCOMING_BUFFER_SIZE = 1 << 20 @@ -84,7 +85,7 @@ XFER_ENCODING_CHUNKED = 'chunked' CONNECTION_CLOSE = 'close' -EOL = '\r\n' +EOL = b'\r\n' _END_HEADERS = EOL * 2 # Based on some searching around, 1 second seems like a reasonable @@ -92,6 +93,57 @@ EOL = '\r\n' TIMEOUT_ASSUME_CONTINUE = 1 TIMEOUT_DEFAULT = None +if sys.version_info > (3, 0): + _unicode = str +else: + _unicode = unicode + +def _ensurebytes(data): + if not isinstance(data, (_unicode, bytes)): + data = str(data) + if not isinstance(data, bytes): + try: + return data.encode('latin-1') + except UnicodeEncodeError as err: + raise UnicodeEncodeError( + err.encoding, + err.object, + err.start, + err.end, + '%r is not valid Latin-1 Use .encode("utf-8") ' + 'if sending as utf-8 is desired.' % ( + data[err.start:err.end],)) + return data + +class _CompatMessage(email.message.Message): + """Workaround for rfc822.Message and email.message.Message API diffs.""" + + @classmethod + def from_string(cls, s): + if sys.version_info > (3, 0): + # Python 3 can't decode headers from bytes, so we have to + # trust RFC 2616 and decode the headers as iso-8859-1 + # bytes. + s = s.decode('iso-8859-1') + headers = email.message_from_string(s, _class=_CompatMessage) + # Fix multi-line headers to match httplib's behavior from + # Python 2.x, since email.message.Message handles them in + # slightly different ways. + if sys.version_info < (3, 0): + new = [] + for h, v in headers._headers: + if '\r\n' in v: + v = '\n'.join([' ' + x.lstrip() for x in v.split('\r\n')])[1:] + new.append((h, v)) + headers._headers = new + return headers + + def getheaders(self, key): + return self.get_all(key) + + def getheader(self, key, default=None): + return self.get(key, failobj=default) + class HTTPResponse(object): """Response from an HTTP server. @@ -102,11 +154,11 @@ class HTTPResponse(object): def __init__(self, sock, timeout, method): self.sock = sock self.method = method - self.raw_response = '' + self.raw_response = b'' self._headers_len = 0 self.headers = None self.will_close = False - self.status_line = '' + self.status_line = b'' self.status = None self.continued = False self.http_version = None @@ -142,6 +194,10 @@ class HTTPResponse(object): return self.headers.getheader(header, default=default) def getheaders(self): + if sys.version_info < (3, 0): + return [(k.lower(), v) for k, v in self.headers.items()] + # Starting in Python 3, headers aren't lowercased before being + # returned here. return self.headers.items() def readline(self): @@ -152,14 +208,14 @@ class HTTPResponse(object): """ blocks = [] while True: - self._reader.readto('\n', blocks) + self._reader.readto(b'\n', blocks) - if blocks and blocks[-1][-1] == '\n' or self.complete(): + if blocks and blocks[-1][-1:] == b'\n' or self.complete(): break self._select() - return ''.join(blocks) + return b''.join(blocks) def read(self, length=None): """Read data from the response body.""" @@ -186,8 +242,8 @@ class HTTPResponse(object): raise HTTPTimeoutException('timeout reading data') try: data = self.sock.recv(INCOMING_BUFFER_SIZE) - except socket.sslerror as e: - if e.args[0] != socket.SSL_ERROR_WANT_READ: + except ssl.SSLError as e: + if e.args[0] != ssl.SSL_ERROR_WANT_READ: raise logger.debug('SSL_ERROR_WANT_READ in _select, should retry later') return True @@ -214,7 +270,7 @@ class HTTPResponse(object): self.raw_response += data # This is a bogus server with bad line endings if self._eol not in self.raw_response: - for bad_eol in ('\n', '\r'): + for bad_eol in (b'\n', b'\r'): if (bad_eol in self.raw_response # verify that bad_eol is not the end of the incoming data # as this could be a response line that just got @@ -231,8 +287,8 @@ class HTTPResponse(object): # handle 100-continue response hdrs, body = self.raw_response.split(self._end_headers, 1) - unused_http_ver, status = hdrs.split(' ', 1) - if status.startswith('100'): + unused_http_ver, status = hdrs.split(b' ', 1) + if status.startswith(b'100'): self.raw_response = body self.continued = True logger.debug('continue seen, setting body to %r', body) @@ -246,14 +302,14 @@ class HTTPResponse(object): self.status_line, hdrs = hdrs.split(self._eol, 1) else: self.status_line = hdrs - hdrs = '' + hdrs = b'' # TODO HTTP < 1.0 support (self.http_version, self.status, - self.reason) = self.status_line.split(' ', 2) + self.reason) = self.status_line.split(b' ', 2) self.status = int(self.status) if self._eol != EOL: - hdrs = hdrs.replace(self._eol, '\r\n') - headers = rfc822.Message(io.StringIO(hdrs)) + hdrs = hdrs.replace(self._eol, b'\r\n') + headers = _CompatMessage.from_string(hdrs) content_len = None if HDR_CONTENT_LENGTH in headers: content_len = int(headers[HDR_CONTENT_LENGTH]) @@ -270,8 +326,8 @@ class HTTPResponse(object): # HEAD responses are forbidden from returning a body, and # it's implausible for a CONNECT response to use # close-is-end logic for an OK response. - if (self.method == 'HEAD' or - (self.method == 'CONNECT' and content_len is None)): + if (self.method == b'HEAD' or + (self.method == b'CONNECT' and content_len is None)): content_len = 0 if content_len is not None: logger.debug('using a content-length reader with length %d', @@ -305,7 +361,7 @@ def _foldheaders(headers): >>> _foldheaders({'Accept-Encoding': 'wat'}) {'accept-encoding': ('Accept-Encoding', 'wat')} """ - return dict((k.lower(), (k, v)) for k, v in headers.iteritems()) + return dict((k.lower(), (k, v)) for k, v in headers.items()) try: inspect.signature @@ -391,15 +447,16 @@ class HTTPConnection(object): Any extra keyword arguments to this function will be provided to the ssl_wrap_socket method. If no ssl """ - if port is None and host.count(':') == 1 or ']:' in host: - host, port = host.rsplit(':', 1) + host = _ensurebytes(host) + if port is None and host.count(b':') == 1 or b']:' in host: + host, port = host.rsplit(b':', 1) port = int(port) - if '[' in host: + if b'[' in host: host = host[1:-1] if ssl_wrap_socket is not None: _wrap_socket = ssl_wrap_socket else: - _wrap_socket = socketutil.wrap_socket + _wrap_socket = ssl.wrap_socket call_wrap_socket = None handlesubar = _handlesarg(_wrap_socket, 'server_hostname') if handlesubar is True: @@ -430,8 +487,6 @@ class HTTPConnection(object): elif port is None: port = (use_ssl and 443 or 80) self.port = port - if use_ssl and not socketutil.have_ssl: - raise Exception('ssl requested but unavailable on this Python') self.ssl = use_ssl self.ssl_opts = ssl_opts self._ssl_validator = ssl_validator @@ -461,15 +516,15 @@ class HTTPConnection(object): if self._proxy_host is not None: logger.info('Connecting to http proxy %s:%s', self._proxy_host, self._proxy_port) - sock = socketutil.create_connection((self._proxy_host, - self._proxy_port)) + sock = socket.create_connection((self._proxy_host, + self._proxy_port)) if self.ssl: - data = self._buildheaders('CONNECT', '%s:%d' % (self.host, - self.port), + data = self._buildheaders(b'CONNECT', b'%s:%d' % (self.host, + self.port), proxy_headers, HTTP_VER_1_0) sock.send(data) sock.setblocking(0) - r = self.response_class(sock, self.timeout, 'CONNECT') + r = self.response_class(sock, self.timeout, b'CONNECT') timeout_exc = HTTPTimeoutException( 'Timed out waiting for CONNECT response from proxy') while not r.complete(): @@ -494,7 +549,7 @@ class HTTPConnection(object): logger.info('CONNECT (for SSL) to %s:%s via proxy succeeded.', self.host, self.port) else: - sock = socketutil.create_connection((self.host, self.port)) + sock = socket.create_connection((self.host, self.port)) if self.ssl: # This is the default, but in the case of proxied SSL # requests the proxy logic above will have cleared @@ -515,25 +570,26 @@ class HTTPConnection(object): hdrhost = self.host else: # include nonstandard port in header - if ':' in self.host: # must be IPv6 - hdrhost = '[%s]:%d' % (self.host, self.port) + if b':' in self.host: # must be IPv6 + hdrhost = b'[%s]:%d' % (self.host, self.port) else: - hdrhost = '%s:%d' % (self.host, self.port) + hdrhost = b'%s:%d' % (self.host, self.port) if self._proxy_host and not self.ssl: # When talking to a regular http proxy we must send the # full URI, but in all other cases we must not (although # technically RFC 2616 says servers must accept our # request if we screw up, experimentally few do that # correctly.) - assert path[0] == '/', 'path must start with a /' - path = 'http://%s%s' % (hdrhost, path) - outgoing = ['%s %s %s%s' % (method, path, http_ver, EOL)] - headers['host'] = ('Host', hdrhost) + assert path[0:1] == b'/', 'path must start with a /' + path = b'http://%s%s' % (hdrhost, path) + outgoing = [b'%s %s %s%s' % (method, path, http_ver, EOL)] + headers[b'host'] = (b'Host', hdrhost) headers[HDR_ACCEPT_ENCODING] = (HDR_ACCEPT_ENCODING, 'identity') - for hdr, val in headers.itervalues(): - outgoing.append('%s: %s%s' % (hdr, val, EOL)) + for hdr, val in sorted((_ensurebytes(h), _ensurebytes(v)) + for h, v in headers.values()): + outgoing.append(b'%s: %s%s' % (hdr, val, EOL)) outgoing.append(EOL) - return ''.join(outgoing) + return b''.join(outgoing) def close(self): """Close the connection to the server. @@ -586,6 +642,8 @@ class HTTPConnection(object): available. Use the `getresponse()` method to retrieve the response. """ + method = _ensurebytes(method) + path = _ensurebytes(path) if self.busy(): raise httplib.CannotSendRequest( 'Can not send another request before ' @@ -594,11 +652,26 @@ class HTTPConnection(object): logger.info('sending %s request for %s to %s on port %s', method, path, self.host, self.port) + hdrs = _foldheaders(headers) - if hdrs.get('expect', ('', ''))[1].lower() == '100-continue': + # Figure out headers that have to be computed from the request + # body. + chunked = False + if body and HDR_CONTENT_LENGTH not in hdrs: + if getattr(body, '__len__', False): + hdrs[HDR_CONTENT_LENGTH] = (HDR_CONTENT_LENGTH, + b'%d' % len(body)) + elif getattr(body, 'read', False): + hdrs[HDR_XFER_ENCODING] = (HDR_XFER_ENCODING, + XFER_ENCODING_CHUNKED) + chunked = True + else: + raise BadRequestData('body has no __len__() nor read()') + # Figure out expect-continue header + if hdrs.get('expect', ('', ''))[1].lower() == b'100-continue': expect_continue = True elif expect_continue: - hdrs['expect'] = ('Expect', '100-Continue') + hdrs['expect'] = (b'Expect', b'100-Continue') # httplib compatibility: if the user specified a # proxy-authorization header, that's actually intended for a # proxy CONNECT action, not the real request, but only if @@ -608,25 +681,15 @@ class HTTPConnection(object): pa = hdrs.pop('proxy-authorization', None) if pa is not None: pheaders['proxy-authorization'] = pa - - chunked = False - if body and HDR_CONTENT_LENGTH not in hdrs: - if getattr(body, '__len__', False): - hdrs[HDR_CONTENT_LENGTH] = (HDR_CONTENT_LENGTH, len(body)) - elif getattr(body, 'read', False): - hdrs[HDR_XFER_ENCODING] = (HDR_XFER_ENCODING, - XFER_ENCODING_CHUNKED) - chunked = True - else: - raise BadRequestData('body has no __len__() nor read()') + # Build header data + outgoing_headers = self._buildheaders( + method, path, hdrs, self.http_version) # If we're reusing the underlying socket, there are some # conditions where we'll want to retry, so make a note of the # state of self.sock fresh_socket = self.sock is None self._connect(pheaders) - outgoing_headers = self._buildheaders( - method, path, hdrs, self.http_version) response = None first = True @@ -666,8 +729,8 @@ class HTTPConnection(object): try: try: data = r[0].recv(INCOMING_BUFFER_SIZE) - except socket.sslerror as e: - if e.args[0] != socket.SSL_ERROR_WANT_READ: + except ssl.SSLError as e: + if e.args[0] != ssl.SSL_ERROR_WANT_READ: raise logger.debug('SSL_ERROR_WANT_READ while sending ' 'data, retrying...') @@ -736,16 +799,20 @@ class HTTPConnection(object): continue if len(data) < OUTGOING_BUFFER_SIZE: if chunked: - body = '0' + EOL + EOL + body = b'0' + EOL + EOL else: body = None if chunked: - out = hex(len(data))[2:] + EOL + data + EOL + # This encode is okay because we know + # hex() is building us only 0-9 and a-f + # digits. + asciilen = hex(len(data))[2:].encode('ascii') + out = asciilen + EOL + data + EOL else: out = data amt = w[0].send(out) except socket.error as e: - if e[0] == socket.SSL_ERROR_WANT_WRITE and self.ssl: + if e[0] == ssl.SSL_ERROR_WANT_WRITE and self.ssl: # This means that SSL hasn't flushed its buffer into # the socket yet. # TODO: find a way to block on ssl flushing its buffer @@ -764,6 +831,7 @@ class HTTPConnection(object): body = out[amt:] else: outgoing_headers = out[amt:] + # End of request-sending loop. # close if the server response said to or responded before eating # the whole request diff --git a/mercurial/httpclient/_readers.py b/mercurial/httpclient/_readers.py --- a/mercurial/httpclient/_readers.py +++ b/mercurial/httpclient/_readers.py @@ -98,7 +98,7 @@ class AbstractReader(object): need -= len(b) if need == 0: break - result = ''.join(blocks) + result = b''.join(blocks) assert len(result) == amt or (self._finished and len(result) < amt) return result diff --git a/mercurial/httpclient/socketutil.py b/mercurial/httpclient/socketutil.py deleted file mode 100644 --- a/mercurial/httpclient/socketutil.py +++ /dev/null @@ -1,141 +0,0 @@ -# Copyright 2010, Google Inc. -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following disclaimer -# in the documentation and/or other materials provided with the -# distribution. -# * Neither the name of Google Inc. nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. - -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -"""Abstraction to simplify socket use for Python < 2.6 - -This will attempt to use the ssl module and the new -socket.create_connection method, but fall back to the old -methods if those are unavailable. -""" -from __future__ import absolute_import - -import logging -import socket - -logger = logging.getLogger(__name__) - -try: - import ssl - # make demandimporters load the module - ssl.wrap_socket # pylint: disable=W0104 - have_ssl = True -except ImportError: - import httplib - import urllib2 - have_ssl = getattr(urllib2, 'HTTPSHandler', False) - ssl = False - - -try: - create_connection = socket.create_connection -except AttributeError: - def create_connection(address): - """Backport of socket.create_connection from Python 2.6.""" - host, port = address - msg = "getaddrinfo returns an empty list" - sock = None - for res in socket.getaddrinfo(host, port, 0, - socket.SOCK_STREAM): - af, socktype, proto, unused_canonname, sa = res - try: - sock = socket.socket(af, socktype, proto) - logger.info("connect: (%s, %s)", host, port) - sock.connect(sa) - except socket.error as msg: - logger.info('connect fail: %s %s', host, port) - if sock: - sock.close() - sock = None - continue - break - if not sock: - raise socket.error(msg) - return sock - -if ssl: - wrap_socket = ssl.wrap_socket - CERT_NONE = ssl.CERT_NONE - CERT_OPTIONAL = ssl.CERT_OPTIONAL - CERT_REQUIRED = ssl.CERT_REQUIRED -else: - class FakeSocket(httplib.FakeSocket): - """Socket wrapper that supports SSL.""" - - # Silence lint about this goofy backport class - # pylint: disable=W0232,E1101,R0903,R0913,C0111 - - # backport the behavior from Python 2.6, which is to busy wait - # on the socket instead of anything nice. Sigh. - # See http://bugs.python.org/issue3890 for more info. - def recv(self, buflen=1024, flags=0): - """ssl-aware wrapper around socket.recv - """ - if flags != 0: - raise ValueError( - "non-zero flags not allowed in calls to recv() on %s" % - self.__class__) - while True: - try: - return self._ssl.read(buflen) - except socket.sslerror as x: - if x.args[0] == socket.SSL_ERROR_WANT_READ: - continue - else: - raise x - - _PROTOCOL_SSLv23 = 2 - - CERT_NONE = 0 - CERT_OPTIONAL = 1 - CERT_REQUIRED = 2 - - # Disable unused-argument because we're making a dumb wrapper - # that's like an upstream method. - # - # pylint: disable=W0613,R0913 - def wrap_socket(sock, keyfile=None, certfile=None, - server_side=False, cert_reqs=CERT_NONE, - ssl_version=_PROTOCOL_SSLv23, ca_certs=None, - do_handshake_on_connect=True, - suppress_ragged_eofs=True, - server_hostname=None): - """Backport of ssl.wrap_socket from Python 2.6.""" - if cert_reqs != CERT_NONE and ca_certs: - raise CertificateValidationUnsupported( - 'SSL certificate validation requires the ssl module' - '(included in Python 2.6 and later.)') - sslob = socket.ssl(sock) - # borrow httplib's workaround for no ssl.wrap_socket - sock = FakeSocket(sock, sslob) - return sock - # pylint: enable=W0613,R0913 - - -class CertificateValidationUnsupported(Exception): - """Exception raised when cert validation is requested but unavailable.""" -# no-check-code diff --git a/tests/test-check-code.t b/tests/test-check-code.t --- a/tests/test-check-code.t +++ b/tests/test-check-code.t @@ -17,4 +17,3 @@ New errors are not allowed. Warnings are Skipping i18n/polib.py it has no-che?k-code (glob) Skipping mercurial/httpclient/__init__.py it has no-che?k-code (glob) Skipping mercurial/httpclient/_readers.py it has no-che?k-code (glob) - Skipping mercurial/httpclient/socketutil.py it has no-che?k-code (glob)