Show More
__init__.py
910 lines
| 37.0 KiB
| text/x-python
|
PythonLexer
Augie Fackler
|
r14243 | # Copyright 2010, Google Inc. | ||
# All rights reserved. | ||||
# | ||||
# Redistribution and use in source and binary forms, with or without | ||||
# modification, are permitted provided that the following conditions are | ||||
# met: | ||||
# | ||||
# * Redistributions of source code must retain the above copyright | ||||
# notice, this list of conditions and the following disclaimer. | ||||
# * Redistributions in binary form must reproduce the above | ||||
# copyright notice, this list of conditions and the following disclaimer | ||||
# in the documentation and/or other materials provided with the | ||||
# distribution. | ||||
# * Neither the name of Google Inc. nor the names of its | ||||
# contributors may be used to endorse or promote products derived from | ||||
# this software without specific prior written permission. | ||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||||
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
"""Improved HTTP/1.1 client library | ||||
This library contains an HTTPConnection which is similar to the one in | ||||
httplib, but has several additional features: | ||||
* supports keepalives natively | ||||
* uses select() to block for incoming data | ||||
* notices when the server responds early to a request | ||||
* implements ssl inline instead of in a different class | ||||
""" | ||||
Augie Fackler
|
r27601 | from __future__ import absolute_import | ||
Augie Fackler
|
r14243 | |||
Augie Fackler
|
r19182 | # Many functions in this file have too many arguments. | ||
# pylint: disable=R0913 | ||||
Augie Fackler
|
r29442 | import email | ||
import email.message | ||||
Augie Fackler
|
r14243 | import errno | ||
Augie Fackler
|
r29131 | import inspect | ||
Augie Fackler
|
r14243 | import logging | ||
import select | ||||
import socket | ||||
Augie Fackler
|
r29442 | import ssl | ||
import sys | ||||
Augie Fackler
|
r14243 | |||
Augie Fackler
|
r29131 | try: | ||
import cStringIO as io | ||||
io.StringIO | ||||
except ImportError: | ||||
import io | ||||
try: | ||||
import httplib | ||||
httplib.HTTPException | ||||
except ImportError: | ||||
import http.client as httplib | ||||
Augie Fackler
|
r27601 | from . import ( | ||
_readers, | ||||
Augie Fackler
|
r29131 | ) | ||
Augie Fackler
|
r14243 | |||
logger = logging.getLogger(__name__) | ||||
__all__ = ['HTTPConnection', 'HTTPResponse'] | ||||
Augie Fackler
|
r29442 | HTTP_VER_1_0 = b'HTTP/1.0' | ||
HTTP_VER_1_1 = b'HTTP/1.1' | ||||
Augie Fackler
|
r14243 | |||
OUTGOING_BUFFER_SIZE = 1 << 15 | ||||
INCOMING_BUFFER_SIZE = 1 << 20 | ||||
HDR_ACCEPT_ENCODING = 'accept-encoding' | ||||
HDR_CONNECTION_CTRL = 'connection' | ||||
HDR_CONTENT_LENGTH = 'content-length' | ||||
HDR_XFER_ENCODING = 'transfer-encoding' | ||||
XFER_ENCODING_CHUNKED = 'chunked' | ||||
CONNECTION_CLOSE = 'close' | ||||
Augie Fackler
|
r29442 | EOL = b'\r\n' | ||
Augie Fackler
|
r14243 | _END_HEADERS = EOL * 2 | ||
# Based on some searching around, 1 second seems like a reasonable | ||||
# default here. | ||||
TIMEOUT_ASSUME_CONTINUE = 1 | ||||
TIMEOUT_DEFAULT = None | ||||
Augie Fackler
|
r29442 | if sys.version_info > (3, 0): | ||
_unicode = str | ||||
else: | ||||
_unicode = unicode | ||||
def _ensurebytes(data): | ||||
if not isinstance(data, (_unicode, bytes)): | ||||
data = str(data) | ||||
if not isinstance(data, bytes): | ||||
try: | ||||
return data.encode('latin-1') | ||||
except UnicodeEncodeError as err: | ||||
raise UnicodeEncodeError( | ||||
err.encoding, | ||||
err.object, | ||||
err.start, | ||||
err.end, | ||||
'%r is not valid Latin-1 Use .encode("utf-8") ' | ||||
'if sending as utf-8 is desired.' % ( | ||||
data[err.start:err.end],)) | ||||
return data | ||||
class _CompatMessage(email.message.Message): | ||||
"""Workaround for rfc822.Message and email.message.Message API diffs.""" | ||||
@classmethod | ||||
def from_string(cls, s): | ||||
if sys.version_info > (3, 0): | ||||
# Python 3 can't decode headers from bytes, so we have to | ||||
# trust RFC 2616 and decode the headers as iso-8859-1 | ||||
# bytes. | ||||
s = s.decode('iso-8859-1') | ||||
headers = email.message_from_string(s, _class=_CompatMessage) | ||||
# Fix multi-line headers to match httplib's behavior from | ||||
# Python 2.x, since email.message.Message handles them in | ||||
# slightly different ways. | ||||
if sys.version_info < (3, 0): | ||||
new = [] | ||||
for h, v in headers._headers: | ||||
if '\r\n' in v: | ||||
v = '\n'.join([' ' + x.lstrip() for x in v.split('\r\n')])[1:] | ||||
new.append((h, v)) | ||||
headers._headers = new | ||||
return headers | ||||
def getheaders(self, key): | ||||
return self.get_all(key) | ||||
def getheader(self, key, default=None): | ||||
return self.get(key, failobj=default) | ||||
Augie Fackler
|
r14243 | |||
class HTTPResponse(object): | ||||
"""Response from an HTTP server. | ||||
The response will continue to load as available. If you need the | ||||
complete response before continuing, check the .complete() method. | ||||
""" | ||||
Augie Fackler
|
r16643 | def __init__(self, sock, timeout, method): | ||
Augie Fackler
|
r14243 | self.sock = sock | ||
Augie Fackler
|
r16643 | self.method = method | ||
Augie Fackler
|
r29442 | self.raw_response = b'' | ||
Augie Fackler
|
r14243 | self._headers_len = 0 | ||
self.headers = None | ||||
self.will_close = False | ||||
Augie Fackler
|
r29442 | self.status_line = b'' | ||
Augie Fackler
|
r14243 | self.status = None | ||
Augie Fackler
|
r16643 | self.continued = False | ||
Augie Fackler
|
r14243 | self.http_version = None | ||
self.reason = None | ||||
Augie Fackler
|
r16643 | self._reader = None | ||
Augie Fackler
|
r14243 | |||
self._read_location = 0 | ||||
self._eol = EOL | ||||
self._timeout = timeout | ||||
@property | ||||
def _end_headers(self): | ||||
return self._eol * 2 | ||||
def complete(self): | ||||
"""Returns true if this response is completely loaded. | ||||
Augie Fackler
|
r14376 | |||
Note that if this is a connection where complete means the | ||||
socket is closed, this will nearly always return False, even | ||||
in cases where all the data has actually been loaded. | ||||
Augie Fackler
|
r14243 | """ | ||
Augie Fackler
|
r16643 | if self._reader: | ||
return self._reader.done() | ||||
def _close(self): | ||||
if self._reader is not None: | ||||
Augie Fackler
|
r19182 | # We're a friend of the reader class here. | ||
# pylint: disable=W0212 | ||||
Augie Fackler
|
r16643 | self._reader._close() | ||
Augie Fackler
|
r14243 | |||
Augie Fackler
|
r27601 | def getheader(self, header, default=None): | ||
return self.headers.getheader(header, default=default) | ||||
def getheaders(self): | ||||
Augie Fackler
|
r29442 | if sys.version_info < (3, 0): | ||
return [(k.lower(), v) for k, v in self.headers.items()] | ||||
# Starting in Python 3, headers aren't lowercased before being | ||||
# returned here. | ||||
Augie Fackler
|
r27601 | return self.headers.items() | ||
Augie Fackler
|
r14243 | def readline(self): | ||
"""Read a single line from the response body. | ||||
This may block until either a line ending is found or the | ||||
response is complete. | ||||
""" | ||||
Brendan Cully
|
r19038 | blocks = [] | ||
while True: | ||||
Augie Fackler
|
r29442 | self._reader.readto(b'\n', blocks) | ||
Brendan Cully
|
r19038 | |||
Augie Fackler
|
r29442 | if blocks and blocks[-1][-1:] == b'\n' or self.complete(): | ||
Brendan Cully
|
r19038 | break | ||
Augie Fackler
|
r14243 | self._select() | ||
Brendan Cully
|
r19038 | |||
Augie Fackler
|
r29442 | return b''.join(blocks) | ||
Augie Fackler
|
r14243 | |||
def read(self, length=None): | ||||
Augie Fackler
|
r19182 | """Read data from the response body.""" | ||
Augie Fackler
|
r14243 | # if length is None, unbounded read | ||
while (not self.complete() # never select on a finished read | ||||
and (not length # unbounded, so we wait for complete() | ||||
Augie Fackler
|
r16643 | or length > self._reader.available_data)): | ||
Augie Fackler
|
r14243 | self._select() | ||
if not length: | ||||
Augie Fackler
|
r16643 | length = self._reader.available_data | ||
r = self._reader.read(length) | ||||
Augie Fackler
|
r14243 | if self.complete() and self.will_close: | ||
self.sock.close() | ||||
return r | ||||
def _select(self): | ||||
Augie Fackler
|
r19182 | r, unused_write, unused_err = select.select( | ||
[self.sock], [], [], self._timeout) | ||||
Augie Fackler
|
r14243 | if not r: | ||
Augie Fackler
|
r16643 | # socket was not readable. If the response is not | ||
# complete, raise a timeout. | ||||
if not self.complete(): | ||||
Augie Fackler
|
r14243 | logger.info('timed out with timeout of %s', self._timeout) | ||
raise HTTPTimeoutException('timeout reading data') | ||||
Augie Fackler
|
r14341 | try: | ||
data = self.sock.recv(INCOMING_BUFFER_SIZE) | ||||
Augie Fackler
|
r29442 | except ssl.SSLError as e: | ||
if e.args[0] != ssl.SSL_ERROR_WANT_READ: | ||||
Augie Fackler
|
r14341 | raise | ||
Mads Kiilerich
|
r17428 | logger.debug('SSL_ERROR_WANT_READ in _select, should retry later') | ||
Augie Fackler
|
r14341 | return True | ||
Augie Fackler
|
r14243 | logger.debug('response read %d data during _select', len(data)) | ||
Augie Fackler
|
r16643 | # If the socket was readable and no data was read, that means | ||
# the socket was closed. Inform the reader (if any) so it can | ||||
# raise an exception if this is an invalid situation. | ||||
Augie Fackler
|
r14243 | if not data: | ||
Augie Fackler
|
r16643 | if self._reader: | ||
Augie Fackler
|
r19182 | # We're a friend of the reader class here. | ||
# pylint: disable=W0212 | ||||
Augie Fackler
|
r16643 | self._reader._close() | ||
Augie Fackler
|
r14243 | return False | ||
else: | ||||
self._load_response(data) | ||||
return True | ||||
Augie Fackler
|
r19182 | # This method gets replaced by _load later, which confuses pylint. | ||
def _load_response(self, data): # pylint: disable=E0202 | ||||
Augie Fackler
|
r16643 | # Being here implies we're not at the end of the headers yet, | ||
# since at the end of this method if headers were completely | ||||
# loaded we replace this method with the load() method of the | ||||
# reader we created. | ||||
Augie Fackler
|
r14243 | self.raw_response += data | ||
# This is a bogus server with bad line endings | ||||
if self._eol not in self.raw_response: | ||||
Augie Fackler
|
r29442 | for bad_eol in (b'\n', b'\r'): | ||
Augie Fackler
|
r14243 | if (bad_eol in self.raw_response | ||
# verify that bad_eol is not the end of the incoming data | ||||
# as this could be a response line that just got | ||||
# split between \r and \n. | ||||
and (self.raw_response.index(bad_eol) < | ||||
(len(self.raw_response) - 1))): | ||||
logger.info('bogus line endings detected, ' | ||||
'using %r for EOL', bad_eol) | ||||
self._eol = bad_eol | ||||
break | ||||
# exit early if not at end of headers | ||||
if self._end_headers not in self.raw_response or self.headers: | ||||
return | ||||
# handle 100-continue response | ||||
hdrs, body = self.raw_response.split(self._end_headers, 1) | ||||
Augie Fackler
|
r29442 | unused_http_ver, status = hdrs.split(b' ', 1) | ||
if status.startswith(b'100'): | ||||
Augie Fackler
|
r14243 | self.raw_response = body | ||
Augie Fackler
|
r16643 | self.continued = True | ||
Augie Fackler
|
r14243 | logger.debug('continue seen, setting body to %r', body) | ||
return | ||||
# arriving here means we should parse response headers | ||||
# as all headers have arrived completely | ||||
hdrs, body = self.raw_response.split(self._end_headers, 1) | ||||
del self.raw_response | ||||
if self._eol in hdrs: | ||||
self.status_line, hdrs = hdrs.split(self._eol, 1) | ||||
else: | ||||
self.status_line = hdrs | ||||
Augie Fackler
|
r29442 | hdrs = b'' | ||
Augie Fackler
|
r14243 | # TODO HTTP < 1.0 support | ||
(self.http_version, self.status, | ||||
Augie Fackler
|
r29442 | self.reason) = self.status_line.split(b' ', 2) | ||
Augie Fackler
|
r14243 | self.status = int(self.status) | ||
if self._eol != EOL: | ||||
Augie Fackler
|
r29442 | hdrs = hdrs.replace(self._eol, b'\r\n') | ||
headers = _CompatMessage.from_string(hdrs) | ||||
Augie Fackler
|
r16643 | content_len = None | ||
Augie Fackler
|
r14243 | if HDR_CONTENT_LENGTH in headers: | ||
Augie Fackler
|
r16643 | content_len = int(headers[HDR_CONTENT_LENGTH]) | ||
Augie Fackler
|
r14243 | if self.http_version == HTTP_VER_1_0: | ||
self.will_close = True | ||||
elif HDR_CONNECTION_CTRL in headers: | ||||
self.will_close = ( | ||||
headers[HDR_CONNECTION_CTRL].lower() == CONNECTION_CLOSE) | ||||
if (HDR_XFER_ENCODING in headers | ||||
and headers[HDR_XFER_ENCODING].lower() == XFER_ENCODING_CHUNKED): | ||||
Augie Fackler
|
r16643 | self._reader = _readers.ChunkedReader(self._eol) | ||
logger.debug('using a chunked reader') | ||||
else: | ||||
# HEAD responses are forbidden from returning a body, and | ||||
# it's implausible for a CONNECT response to use | ||||
# close-is-end logic for an OK response. | ||||
Augie Fackler
|
r29442 | if (self.method == b'HEAD' or | ||
(self.method == b'CONNECT' and content_len is None)): | ||||
Augie Fackler
|
r16643 | content_len = 0 | ||
if content_len is not None: | ||||
logger.debug('using a content-length reader with length %d', | ||||
content_len) | ||||
self._reader = _readers.ContentLengthReader(content_len) | ||||
else: | ||||
# Response body had no length specified and is not | ||||
# chunked, so the end of the body will only be | ||||
# identifiable by the termination of the socket by the | ||||
# server. My interpretation of the spec means that we | ||||
# are correct in hitting this case if | ||||
# transfer-encoding, content-length, and | ||||
# connection-control were left unspecified. | ||||
self._reader = _readers.CloseIsEndReader() | ||||
logger.debug('using a close-is-end reader') | ||||
self.will_close = True | ||||
if body: | ||||
Augie Fackler
|
r19182 | # We're a friend of the reader class here. | ||
# pylint: disable=W0212 | ||||
Augie Fackler
|
r16643 | self._reader._load(body) | ||
logger.debug('headers complete') | ||||
Augie Fackler
|
r14243 | self.headers = headers | ||
Augie Fackler
|
r19182 | # We're a friend of the reader class here. | ||
# pylint: disable=W0212 | ||||
Augie Fackler
|
r16643 | self._load_response = self._reader._load | ||
Augie Fackler
|
r14243 | |||
Augie Fackler
|
r27601 | def _foldheaders(headers): | ||
"""Given some headers, rework them so we can safely overwrite values. | ||||
>>> _foldheaders({'Accept-Encoding': 'wat'}) | ||||
{'accept-encoding': ('Accept-Encoding', 'wat')} | ||||
""" | ||||
Augie Fackler
|
r29442 | return dict((k.lower(), (k, v)) for k, v in headers.items()) | ||
Augie Fackler
|
r27601 | |||
Augie Fackler
|
r29131 | try: | ||
inspect.signature | ||||
def _handlesarg(func, arg): | ||||
""" Try to determine if func accepts arg | ||||
If it takes arg, return True | ||||
If it happens to take **args, then it could do anything: | ||||
* It could throw a different TypeError, just for fun | ||||
* It could throw an ArgumentError or anything else | ||||
* It could choose not to throw an Exception at all | ||||
... return 'unknown' | ||||
Otherwise, return False | ||||
""" | ||||
params = inspect.signature(func).parameters | ||||
if arg in params: | ||||
return True | ||||
for p in params: | ||||
if params[p].kind == inspect._ParameterKind.VAR_KEYWORD: | ||||
return 'unknown' | ||||
return False | ||||
except AttributeError: | ||||
def _handlesarg(func, arg): | ||||
""" Try to determine if func accepts arg | ||||
If it takes arg, return True | ||||
If it happens to take **args, then it could do anything: | ||||
* It could throw a different TypeError, just for fun | ||||
* It could throw an ArgumentError or anything else | ||||
* It could choose not to throw an Exception at all | ||||
... return 'unknown' | ||||
Otherwise, return False | ||||
""" | ||||
spec = inspect.getargspec(func) | ||||
if arg in spec.args: | ||||
return True | ||||
if spec.keywords: | ||||
return 'unknown' | ||||
return False | ||||
Augie Fackler
|
r14243 | |||
class HTTPConnection(object): | ||||
"""Connection to a single http server. | ||||
Supports 100-continue and keepalives natively. Uses select() for | ||||
non-blocking socket operations. | ||||
""" | ||||
http_version = HTTP_VER_1_1 | ||||
response_class = HTTPResponse | ||||
def __init__(self, host, port=None, use_ssl=None, ssl_validator=None, | ||||
timeout=TIMEOUT_DEFAULT, | ||||
continue_timeout=TIMEOUT_ASSUME_CONTINUE, | ||||
Augie Fackler
|
r27601 | proxy_hostport=None, proxy_headers=None, | ||
ssl_wrap_socket=None, **ssl_opts): | ||||
Augie Fackler
|
r14243 | """Create a new HTTPConnection. | ||
Args: | ||||
host: The host to which we'll connect. | ||||
port: Optional. The port over which we'll connect. Default 80 for | ||||
non-ssl, 443 for ssl. | ||||
Mads Kiilerich
|
r17424 | use_ssl: Optional. Whether to use ssl. Defaults to False if port is | ||
Augie Fackler
|
r14243 | not 443, true if port is 443. | ||
ssl_validator: a function(socket) to validate the ssl cert | ||||
timeout: Optional. Connection timeout, default is TIMEOUT_DEFAULT. | ||||
continue_timeout: Optional. Timeout for waiting on an expected | ||||
"100 Continue" response. Default is TIMEOUT_ASSUME_CONTINUE. | ||||
proxy_hostport: Optional. Tuple of (host, port) to use as an http | ||||
proxy for the connection. Default is to not use a proxy. | ||||
Augie Fackler
|
r27601 | proxy_headers: Optional dict of header keys and values to send to | ||
a proxy when using CONNECT. For compatibility with | ||||
httplib, the Proxy-Authorization header may be | ||||
specified in headers for request(), which will clobber | ||||
any such header specified here if specified. Providing | ||||
this option and not proxy_hostport will raise an | ||||
ValueError. | ||||
Augie Fackler
|
r19807 | ssl_wrap_socket: Optional function to use for wrapping | ||
sockets. If unspecified, the one from the ssl module will | ||||
be used if available, or something that's compatible with | ||||
it if on a Python older than 2.6. | ||||
Any extra keyword arguments to this function will be provided | ||||
to the ssl_wrap_socket method. If no ssl | ||||
Augie Fackler
|
r14243 | """ | ||
Augie Fackler
|
r29442 | host = _ensurebytes(host) | ||
if port is None and host.count(b':') == 1 or b']:' in host: | ||||
host, port = host.rsplit(b':', 1) | ||||
Augie Fackler
|
r14243 | port = int(port) | ||
Augie Fackler
|
r29442 | if b'[' in host: | ||
Augie Fackler
|
r14243 | host = host[1:-1] | ||
Augie Fackler
|
r19807 | if ssl_wrap_socket is not None: | ||
Augie Fackler
|
r29131 | _wrap_socket = ssl_wrap_socket | ||
Augie Fackler
|
r19807 | else: | ||
Augie Fackler
|
r29442 | _wrap_socket = ssl.wrap_socket | ||
Augie Fackler
|
r29131 | call_wrap_socket = None | ||
handlesubar = _handlesarg(_wrap_socket, 'server_hostname') | ||||
if handlesubar is True: | ||||
# supports server_hostname | ||||
call_wrap_socket = _wrap_socket | ||||
handlesnobar = _handlesarg(_wrap_socket, 'serverhostname') | ||||
if handlesnobar is True and handlesubar is not True: | ||||
# supports serverhostname | ||||
def call_wrap_socket(sock, server_hostname=None, **ssl_opts): | ||||
return _wrap_socket(sock, serverhostname=server_hostname, | ||||
**ssl_opts) | ||||
if handlesubar is False and handlesnobar is False: | ||||
# does not support either | ||||
def call_wrap_socket(sock, server_hostname=None, **ssl_opts): | ||||
return _wrap_socket(sock, **ssl_opts) | ||||
if call_wrap_socket is None: | ||||
# we assume it takes **args | ||||
def call_wrap_socket(sock, **ssl_opts): | ||||
if 'server_hostname' in ssl_opts: | ||||
ssl_opts['serverhostname'] = ssl_opts['server_hostname'] | ||||
return _wrap_socket(sock, **ssl_opts) | ||||
self._ssl_wrap_socket = call_wrap_socket | ||||
Augie Fackler
|
r14243 | if use_ssl is None and port is None: | ||
use_ssl = False | ||||
port = 80 | ||||
elif use_ssl is None: | ||||
use_ssl = (port == 443) | ||||
elif port is None: | ||||
Augie Fackler
|
r27601 | port = (use_ssl and 443 or 80) | ||
Augie Fackler
|
r14243 | self.port = port | ||
self.ssl = use_ssl | ||||
self.ssl_opts = ssl_opts | ||||
self._ssl_validator = ssl_validator | ||||
self.host = host | ||||
self.sock = None | ||||
self._current_response = None | ||||
self._current_response_taken = False | ||||
if proxy_hostport is None: | ||||
self._proxy_host = self._proxy_port = None | ||||
Augie Fackler
|
r27601 | if proxy_headers: | ||
raise ValueError( | ||||
'proxy_headers may not be specified unless ' | ||||
'proxy_hostport is also specified.') | ||||
else: | ||||
self._proxy_headers = {} | ||||
Augie Fackler
|
r14243 | else: | ||
self._proxy_host, self._proxy_port = proxy_hostport | ||||
Augie Fackler
|
r27601 | self._proxy_headers = _foldheaders(proxy_headers or {}) | ||
Augie Fackler
|
r14243 | |||
self.timeout = timeout | ||||
self.continue_timeout = continue_timeout | ||||
Augie Fackler
|
r27601 | def _connect(self, proxy_headers): | ||
Augie Fackler
|
r14243 | """Connect to the host and port specified in __init__.""" | ||
if self.sock: | ||||
return | ||||
if self._proxy_host is not None: | ||||
logger.info('Connecting to http proxy %s:%s', | ||||
self._proxy_host, self._proxy_port) | ||||
Augie Fackler
|
r29442 | sock = socket.create_connection((self._proxy_host, | ||
self._proxy_port)) | ||||
Augie Fackler
|
r14243 | if self.ssl: | ||
Augie Fackler
|
r29442 | data = self._buildheaders(b'CONNECT', b'%s:%d' % (self.host, | ||
self.port), | ||||
Augie Fackler
|
r27601 | proxy_headers, HTTP_VER_1_0) | ||
Augie Fackler
|
r14243 | sock.send(data) | ||
sock.setblocking(0) | ||||
Augie Fackler
|
r29442 | r = self.response_class(sock, self.timeout, b'CONNECT') | ||
Augie Fackler
|
r14243 | timeout_exc = HTTPTimeoutException( | ||
'Timed out waiting for CONNECT response from proxy') | ||||
while not r.complete(): | ||||
try: | ||||
Augie Fackler
|
r19182 | # We're a friend of the response class, so let | ||
# us use the private attribute. | ||||
# pylint: disable=W0212 | ||||
Augie Fackler
|
r14243 | if not r._select(): | ||
Augie Fackler
|
r16643 | if not r.complete(): | ||
raise timeout_exc | ||||
Augie Fackler
|
r14243 | except HTTPTimeoutException: | ||
# This raise/except pattern looks goofy, but | ||||
# _select can raise the timeout as well as the | ||||
# loop body. I wish it wasn't this convoluted, | ||||
# but I don't have a better solution | ||||
# immediately handy. | ||||
raise timeout_exc | ||||
if r.status != 200: | ||||
raise HTTPProxyConnectFailedException( | ||||
'Proxy connection failed: %d %s' % (r.status, | ||||
r.read())) | ||||
logger.info('CONNECT (for SSL) to %s:%s via proxy succeeded.', | ||||
self.host, self.port) | ||||
else: | ||||
Augie Fackler
|
r29442 | sock = socket.create_connection((self.host, self.port)) | ||
Augie Fackler
|
r14243 | if self.ssl: | ||
Augie Fackler
|
r16774 | # This is the default, but in the case of proxied SSL | ||
# requests the proxy logic above will have cleared | ||||
Mads Kiilerich
|
r17424 | # blocking mode, so re-enable it just to be safe. | ||
Augie Fackler
|
r16774 | sock.setblocking(1) | ||
Augie Fackler
|
r14243 | logger.debug('wrapping socket for ssl with options %r', | ||
self.ssl_opts) | ||||
Augie Fackler
|
r29131 | sock = self._ssl_wrap_socket(sock, server_hostname=self.host, | ||
**self.ssl_opts) | ||||
Augie Fackler
|
r14243 | if self._ssl_validator: | ||
self._ssl_validator(sock) | ||||
sock.setblocking(0) | ||||
self.sock = sock | ||||
Augie Fackler
|
r19182 | def _buildheaders(self, method, path, headers, http_ver): | ||
Augie Fackler
|
r14243 | if self.ssl and self.port == 443 or self.port == 80: | ||
# default port for protocol, so leave it out | ||||
hdrhost = self.host | ||||
else: | ||||
# include nonstandard port in header | ||||
Augie Fackler
|
r29442 | if b':' in self.host: # must be IPv6 | ||
hdrhost = b'[%s]:%d' % (self.host, self.port) | ||||
Augie Fackler
|
r14243 | else: | ||
Augie Fackler
|
r29442 | hdrhost = b'%s:%d' % (self.host, self.port) | ||
Augie Fackler
|
r14243 | if self._proxy_host and not self.ssl: | ||
# When talking to a regular http proxy we must send the | ||||
# full URI, but in all other cases we must not (although | ||||
# technically RFC 2616 says servers must accept our | ||||
# request if we screw up, experimentally few do that | ||||
# correctly.) | ||||
Augie Fackler
|
r29442 | assert path[0:1] == b'/', 'path must start with a /' | ||
path = b'http://%s%s' % (hdrhost, path) | ||||
outgoing = [b'%s %s %s%s' % (method, path, http_ver, EOL)] | ||||
headers[b'host'] = (b'Host', hdrhost) | ||||
Augie Fackler
|
r14243 | headers[HDR_ACCEPT_ENCODING] = (HDR_ACCEPT_ENCODING, 'identity') | ||
Augie Fackler
|
r29442 | for hdr, val in sorted((_ensurebytes(h), _ensurebytes(v)) | ||
for h, v in headers.values()): | ||||
outgoing.append(b'%s: %s%s' % (hdr, val, EOL)) | ||||
Augie Fackler
|
r14243 | outgoing.append(EOL) | ||
Augie Fackler
|
r29442 | return b''.join(outgoing) | ||
Augie Fackler
|
r14243 | |||
def close(self): | ||||
"""Close the connection to the server. | ||||
This is a no-op if the connection is already closed. The | ||||
timeless@mozdev.org
|
r17536 | connection may automatically close if requested by the server | ||
Augie Fackler
|
r14243 | or required by the nature of a response. | ||
""" | ||||
if self.sock is None: | ||||
return | ||||
self.sock.close() | ||||
self.sock = None | ||||
logger.info('closed connection to %s on %s', self.host, self.port) | ||||
def busy(self): | ||||
"""Returns True if this connection object is currently in use. | ||||
If a response is still pending, this will return True, even if | ||||
the request has finished sending. In the future, | ||||
HTTPConnection may transparently juggle multiple connections | ||||
to the server, in which case this will be useful to detect if | ||||
any of those connections is ready for use. | ||||
""" | ||||
cr = self._current_response | ||||
if cr is not None: | ||||
if self._current_response_taken: | ||||
if cr.will_close: | ||||
self.sock = None | ||||
self._current_response = None | ||||
return False | ||||
elif cr.complete(): | ||||
self._current_response = None | ||||
return False | ||||
return True | ||||
return False | ||||
Augie Fackler
|
r27601 | def _reconnect(self, where, pheaders): | ||
Augie Fackler
|
r19182 | logger.info('reconnecting during %s', where) | ||
self.close() | ||||
Augie Fackler
|
r27601 | self._connect(pheaders) | ||
Augie Fackler
|
r19182 | |||
Augie Fackler
|
r14243 | def request(self, method, path, body=None, headers={}, | ||
expect_continue=False): | ||||
"""Send a request to the server. | ||||
For increased flexibility, this does not return the response | ||||
object. Future versions of HTTPConnection that juggle multiple | ||||
sockets will be able to send (for example) 5 requests all at | ||||
once, and then let the requests arrive as data is | ||||
available. Use the `getresponse()` method to retrieve the | ||||
response. | ||||
""" | ||||
Augie Fackler
|
r29442 | method = _ensurebytes(method) | ||
path = _ensurebytes(path) | ||||
Augie Fackler
|
r14243 | if self.busy(): | ||
raise httplib.CannotSendRequest( | ||||
'Can not send another request before ' | ||||
'current response is read!') | ||||
self._current_response_taken = False | ||||
logger.info('sending %s request for %s to %s on port %s', | ||||
method, path, self.host, self.port) | ||||
Augie Fackler
|
r29442 | |||
Augie Fackler
|
r27601 | hdrs = _foldheaders(headers) | ||
Augie Fackler
|
r29442 | # Figure out headers that have to be computed from the request | ||
# body. | ||||
chunked = False | ||||
if body and HDR_CONTENT_LENGTH not in hdrs: | ||||
if getattr(body, '__len__', False): | ||||
hdrs[HDR_CONTENT_LENGTH] = (HDR_CONTENT_LENGTH, | ||||
b'%d' % len(body)) | ||||
elif getattr(body, 'read', False): | ||||
hdrs[HDR_XFER_ENCODING] = (HDR_XFER_ENCODING, | ||||
XFER_ENCODING_CHUNKED) | ||||
chunked = True | ||||
else: | ||||
raise BadRequestData('body has no __len__() nor read()') | ||||
# Figure out expect-continue header | ||||
if hdrs.get('expect', ('', ''))[1].lower() == b'100-continue': | ||||
Augie Fackler
|
r14243 | expect_continue = True | ||
elif expect_continue: | ||||
Augie Fackler
|
r29442 | hdrs['expect'] = (b'Expect', b'100-Continue') | ||
Augie Fackler
|
r27601 | # httplib compatibility: if the user specified a | ||
# proxy-authorization header, that's actually intended for a | ||||
# proxy CONNECT action, not the real request, but only if | ||||
# we're going to use a proxy. | ||||
pheaders = dict(self._proxy_headers) | ||||
if self._proxy_host and self.ssl: | ||||
pa = hdrs.pop('proxy-authorization', None) | ||||
if pa is not None: | ||||
pheaders['proxy-authorization'] = pa | ||||
Augie Fackler
|
r29442 | # Build header data | ||
outgoing_headers = self._buildheaders( | ||||
method, path, hdrs, self.http_version) | ||||
Augie Fackler
|
r14243 | |||
Augie Fackler
|
r19620 | # If we're reusing the underlying socket, there are some | ||
# conditions where we'll want to retry, so make a note of the | ||||
# state of self.sock | ||||
fresh_socket = self.sock is None | ||||
Augie Fackler
|
r27601 | self._connect(pheaders) | ||
Augie Fackler
|
r14243 | response = None | ||
first = True | ||||
while ((outgoing_headers or body) | ||||
and not (response and response.complete())): | ||||
select_timeout = self.timeout | ||||
out = outgoing_headers or body | ||||
blocking_on_continue = False | ||||
if expect_continue and not outgoing_headers and not ( | ||||
Augie Fackler
|
r16643 | response and (response.headers or response.continued)): | ||
Augie Fackler
|
r14243 | logger.info( | ||
'waiting up to %s seconds for' | ||||
' continue response from server', | ||||
self.continue_timeout) | ||||
select_timeout = self.continue_timeout | ||||
blocking_on_continue = True | ||||
out = False | ||||
if out: | ||||
w = [self.sock] | ||||
else: | ||||
w = [] | ||||
r, w, x = select.select([self.sock], w, [], select_timeout) | ||||
# if we were expecting a 100 continue and it's been long | ||||
# enough, just go ahead and assume it's ok. This is the | ||||
# recommended behavior from the RFC. | ||||
if r == w == x == []: | ||||
if blocking_on_continue: | ||||
expect_continue = False | ||||
logger.info('no response to continue expectation from ' | ||||
'server, optimistically sending request body') | ||||
else: | ||||
raise HTTPTimeoutException('timeout sending data') | ||||
was_first = first | ||||
# incoming data | ||||
if r: | ||||
try: | ||||
Augie Fackler
|
r14341 | try: | ||
data = r[0].recv(INCOMING_BUFFER_SIZE) | ||||
Augie Fackler
|
r29442 | except ssl.SSLError as e: | ||
if e.args[0] != ssl.SSL_ERROR_WANT_READ: | ||||
Augie Fackler
|
r14341 | raise | ||
Augie Fackler
|
r19182 | logger.debug('SSL_ERROR_WANT_READ while sending ' | ||
'data, retrying...') | ||||
Augie Fackler
|
r14341 | continue | ||
Augie Fackler
|
r14243 | if not data: | ||
logger.info('socket appears closed in read') | ||||
Augie Fackler
|
r14376 | self.sock = None | ||
self._current_response = None | ||||
Augie Fackler
|
r16643 | if response is not None: | ||
Augie Fackler
|
r19182 | # We're a friend of the response class, so let | ||
# us use the private attribute. | ||||
# pylint: disable=W0212 | ||||
Augie Fackler
|
r16643 | response._close() | ||
Augie Fackler
|
r14376 | # This if/elif ladder is a bit subtle, | ||
# comments in each branch should help. | ||||
Augie Fackler
|
r16643 | if response is not None and response.complete(): | ||
Augie Fackler
|
r14376 | # Server responded completely and then | ||
# closed the socket. We should just shut | ||||
# things down and let the caller get their | ||||
# response. | ||||
logger.info('Got an early response, ' | ||||
'aborting remaining request.') | ||||
break | ||||
elif was_first and response is None: | ||||
# Most likely a keepalive that got killed | ||||
# on the server's end. Commonly happens | ||||
# after getting a really large response | ||||
# from the server. | ||||
logger.info( | ||||
'Connection appeared closed in read on first' | ||||
' request loop iteration, will retry.') | ||||
Augie Fackler
|
r27601 | self._reconnect('read', pheaders) | ||
Augie Fackler
|
r14376 | continue | ||
else: | ||||
# We didn't just send the first data hunk, | ||||
# and either have a partial response or no | ||||
# response at all. There's really nothing | ||||
# meaningful we can do here. | ||||
raise HTTPStateError( | ||||
'Connection appears closed after ' | ||||
'some request data was written, but the ' | ||||
'response was missing or incomplete!') | ||||
logger.debug('read %d bytes in request()', len(data)) | ||||
Augie Fackler
|
r14243 | if response is None: | ||
Augie Fackler
|
r19182 | response = self.response_class( | ||
r[0], self.timeout, method) | ||||
# We're a friend of the response class, so let us | ||||
# use the private attribute. | ||||
# pylint: disable=W0212 | ||||
Augie Fackler
|
r14243 | response._load_response(data) | ||
Augie Fackler
|
r14376 | # Jump to the next select() call so we load more | ||
# data if the server is still sending us content. | ||||
continue | ||||
Gregory Szorc
|
r25660 | except socket.error as e: | ||
Augie Fackler
|
r14243 | if e[0] != errno.EPIPE and not was_first: | ||
raise | ||||
# outgoing data | ||||
if w and out: | ||||
try: | ||||
if getattr(out, 'read', False): | ||||
Augie Fackler
|
r19182 | # pylint guesses the type of out incorrectly here | ||
# pylint: disable=E1103 | ||||
Augie Fackler
|
r14243 | data = out.read(OUTGOING_BUFFER_SIZE) | ||
if not data: | ||||
continue | ||||
if len(data) < OUTGOING_BUFFER_SIZE: | ||||
if chunked: | ||||
Augie Fackler
|
r29442 | body = b'0' + EOL + EOL | ||
Augie Fackler
|
r14243 | else: | ||
body = None | ||||
if chunked: | ||||
Augie Fackler
|
r29442 | # This encode is okay because we know | ||
# hex() is building us only 0-9 and a-f | ||||
# digits. | ||||
asciilen = hex(len(data))[2:].encode('ascii') | ||||
out = asciilen + EOL + data + EOL | ||||
Augie Fackler
|
r14243 | else: | ||
out = data | ||||
amt = w[0].send(out) | ||||
Gregory Szorc
|
r25660 | except socket.error as e: | ||
Augie Fackler
|
r29442 | if e[0] == ssl.SSL_ERROR_WANT_WRITE and self.ssl: | ||
Augie Fackler
|
r14243 | # This means that SSL hasn't flushed its buffer into | ||
# the socket yet. | ||||
# TODO: find a way to block on ssl flushing its buffer | ||||
# similar to selecting on a raw socket. | ||||
continue | ||||
Augie Fackler
|
r19489 | if e[0] == errno.EWOULDBLOCK or e[0] == errno.EAGAIN: | ||
continue | ||||
Augie Fackler
|
r14243 | elif (e[0] not in (errno.ECONNRESET, errno.EPIPE) | ||
and not first): | ||||
raise | ||||
Augie Fackler
|
r27601 | self._reconnect('write', pheaders) | ||
Augie Fackler
|
r14243 | amt = self.sock.send(out) | ||
logger.debug('sent %d', amt) | ||||
first = False | ||||
if out is body: | ||||
body = out[amt:] | ||||
else: | ||||
outgoing_headers = out[amt:] | ||||
Augie Fackler
|
r29442 | # End of request-sending loop. | ||
Augie Fackler
|
r14243 | |||
# close if the server response said to or responded before eating | ||||
# the whole request | ||||
if response is None: | ||||
Augie Fackler
|
r16643 | response = self.response_class(self.sock, self.timeout, method) | ||
Augie Fackler
|
r19620 | if not fresh_socket: | ||
if not response._select(): | ||||
# This means the response failed to get any response | ||||
# data at all, and in all probability the socket was | ||||
# closed before the server even saw our request. Try | ||||
# the request again on a fresh socket. | ||||
Augie Fackler
|
r27601 | logger.debug('response._select() failed during request().' | ||
' Assuming request needs to be retried.') | ||||
Augie Fackler
|
r19620 | self.sock = None | ||
# Call this method explicitly to re-try the | ||||
# request. We don't use self.request() because | ||||
# some tools (notably Mercurial) expect to be able | ||||
# to subclass and redefine request(), and they | ||||
# don't have the same argspec as we do. | ||||
# | ||||
# TODO restructure sending of requests to avoid | ||||
# this recursion | ||||
return HTTPConnection.request( | ||||
self, method, path, body=body, headers=headers, | ||||
expect_continue=expect_continue) | ||||
Augie Fackler
|
r14243 | data_left = bool(outgoing_headers or body) | ||
if data_left: | ||||
logger.info('stopped sending request early, ' | ||||
'will close the socket to be safe.') | ||||
response.will_close = True | ||||
if response.will_close: | ||||
# The socket will be closed by the response, so we disown | ||||
# the socket | ||||
self.sock = None | ||||
self._current_response = response | ||||
def getresponse(self): | ||||
Augie Fackler
|
r19182 | """Returns the response to the most recent request.""" | ||
Augie Fackler
|
r14243 | if self._current_response is None: | ||
raise httplib.ResponseNotReady() | ||||
r = self._current_response | ||||
while r.headers is None: | ||||
Augie Fackler
|
r19182 | # We're a friend of the response class, so let us use the | ||
# private attribute. | ||||
# pylint: disable=W0212 | ||||
Augie Fackler
|
r16643 | if not r._select() and not r.complete(): | ||
raise _readers.HTTPRemoteClosedError() | ||||
Augie Fackler
|
r14293 | if r.will_close: | ||
Augie Fackler
|
r14243 | self.sock = None | ||
self._current_response = None | ||||
Augie Fackler
|
r14293 | elif r.complete(): | ||
self._current_response = None | ||||
Augie Fackler
|
r14243 | else: | ||
self._current_response_taken = True | ||||
return r | ||||
class HTTPTimeoutException(httplib.HTTPException): | ||||
"""A timeout occurred while waiting on the server.""" | ||||
class BadRequestData(httplib.HTTPException): | ||||
"""Request body object has neither __len__ nor read.""" | ||||
class HTTPProxyConnectFailedException(httplib.HTTPException): | ||||
"""Connecting to the HTTP proxy failed.""" | ||||
Augie Fackler
|
r14376 | |||
Augie Fackler
|
r15218 | |||
Augie Fackler
|
r14376 | class HTTPStateError(httplib.HTTPException): | ||
"""Invalid internal state encountered.""" | ||||
Augie Fackler
|
r15218 | |||
Augie Fackler
|
r16643 | # Forward this exception type from _readers since it needs to be part | ||
# of the public API. | ||||
HTTPRemoteClosedError = _readers.HTTPRemoteClosedError | ||||
Augie Fackler
|
r14243 | # no-check-code | ||