##// END OF EJS Templates
httpclient: don't use mutable default argument value...
Pierre-Yves David -
r31411:a53f2d4c default
parent child Browse files
Show More
@@ -1,910 +1,912
1 # Copyright 2010, Google Inc.
1 # Copyright 2010, Google Inc.
2 # All rights reserved.
2 # All rights reserved.
3 #
3 #
4 # Redistribution and use in source and binary forms, with or without
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are
5 # modification, are permitted provided that the following conditions are
6 # met:
6 # met:
7 #
7 #
8 # * Redistributions of source code must retain the above copyright
8 # * Redistributions of source code must retain the above copyright
9 # notice, this list of conditions and the following disclaimer.
9 # notice, this list of conditions and the following disclaimer.
10 # * Redistributions in binary form must reproduce the above
10 # * Redistributions in binary form must reproduce the above
11 # copyright notice, this list of conditions and the following disclaimer
11 # copyright notice, this list of conditions and the following disclaimer
12 # in the documentation and/or other materials provided with the
12 # in the documentation and/or other materials provided with the
13 # distribution.
13 # distribution.
14 # * Neither the name of Google Inc. nor the names of its
14 # * Neither the name of Google Inc. nor the names of its
15 # contributors may be used to endorse or promote products derived from
15 # contributors may be used to endorse or promote products derived from
16 # this software without specific prior written permission.
16 # this software without specific prior written permission.
17
17
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 """Improved HTTP/1.1 client library
29 """Improved HTTP/1.1 client library
30
30
31 This library contains an HTTPConnection which is similar to the one in
31 This library contains an HTTPConnection which is similar to the one in
32 httplib, but has several additional features:
32 httplib, but has several additional features:
33
33
34 * supports keepalives natively
34 * supports keepalives natively
35 * uses select() to block for incoming data
35 * uses select() to block for incoming data
36 * notices when the server responds early to a request
36 * notices when the server responds early to a request
37 * implements ssl inline instead of in a different class
37 * implements ssl inline instead of in a different class
38 """
38 """
39 from __future__ import absolute_import
39 from __future__ import absolute_import
40
40
41 # Many functions in this file have too many arguments.
41 # Many functions in this file have too many arguments.
42 # pylint: disable=R0913
42 # pylint: disable=R0913
43 import email
43 import email
44 import email.message
44 import email.message
45 import errno
45 import errno
46 import inspect
46 import inspect
47 import logging
47 import logging
48 import select
48 import select
49 import socket
49 import socket
50 import ssl
50 import ssl
51 import sys
51 import sys
52
52
53 try:
53 try:
54 import cStringIO as io
54 import cStringIO as io
55 io.StringIO
55 io.StringIO
56 except ImportError:
56 except ImportError:
57 import io
57 import io
58
58
59 try:
59 try:
60 import httplib
60 import httplib
61 httplib.HTTPException
61 httplib.HTTPException
62 except ImportError:
62 except ImportError:
63 import http.client as httplib
63 import http.client as httplib
64
64
65 from . import (
65 from . import (
66 _readers,
66 _readers,
67 )
67 )
68
68
69 logger = logging.getLogger(__name__)
69 logger = logging.getLogger(__name__)
70
70
71 __all__ = ['HTTPConnection', 'HTTPResponse']
71 __all__ = ['HTTPConnection', 'HTTPResponse']
72
72
73 HTTP_VER_1_0 = b'HTTP/1.0'
73 HTTP_VER_1_0 = b'HTTP/1.0'
74 HTTP_VER_1_1 = b'HTTP/1.1'
74 HTTP_VER_1_1 = b'HTTP/1.1'
75
75
76 OUTGOING_BUFFER_SIZE = 1 << 15
76 OUTGOING_BUFFER_SIZE = 1 << 15
77 INCOMING_BUFFER_SIZE = 1 << 20
77 INCOMING_BUFFER_SIZE = 1 << 20
78
78
79 HDR_ACCEPT_ENCODING = 'accept-encoding'
79 HDR_ACCEPT_ENCODING = 'accept-encoding'
80 HDR_CONNECTION_CTRL = 'connection'
80 HDR_CONNECTION_CTRL = 'connection'
81 HDR_CONTENT_LENGTH = 'content-length'
81 HDR_CONTENT_LENGTH = 'content-length'
82 HDR_XFER_ENCODING = 'transfer-encoding'
82 HDR_XFER_ENCODING = 'transfer-encoding'
83
83
84 XFER_ENCODING_CHUNKED = 'chunked'
84 XFER_ENCODING_CHUNKED = 'chunked'
85
85
86 CONNECTION_CLOSE = 'close'
86 CONNECTION_CLOSE = 'close'
87
87
88 EOL = b'\r\n'
88 EOL = b'\r\n'
89 _END_HEADERS = EOL * 2
89 _END_HEADERS = EOL * 2
90
90
91 # Based on some searching around, 1 second seems like a reasonable
91 # Based on some searching around, 1 second seems like a reasonable
92 # default here.
92 # default here.
93 TIMEOUT_ASSUME_CONTINUE = 1
93 TIMEOUT_ASSUME_CONTINUE = 1
94 TIMEOUT_DEFAULT = None
94 TIMEOUT_DEFAULT = None
95
95
96 if sys.version_info > (3, 0):
96 if sys.version_info > (3, 0):
97 _unicode = str
97 _unicode = str
98 else:
98 else:
99 _unicode = unicode
99 _unicode = unicode
100
100
101 def _ensurebytes(data):
101 def _ensurebytes(data):
102 if not isinstance(data, (_unicode, bytes)):
102 if not isinstance(data, (_unicode, bytes)):
103 data = str(data)
103 data = str(data)
104 if not isinstance(data, bytes):
104 if not isinstance(data, bytes):
105 try:
105 try:
106 return data.encode('latin-1')
106 return data.encode('latin-1')
107 except UnicodeEncodeError as err:
107 except UnicodeEncodeError as err:
108 raise UnicodeEncodeError(
108 raise UnicodeEncodeError(
109 err.encoding,
109 err.encoding,
110 err.object,
110 err.object,
111 err.start,
111 err.start,
112 err.end,
112 err.end,
113 '%r is not valid Latin-1 Use .encode("utf-8") '
113 '%r is not valid Latin-1 Use .encode("utf-8") '
114 'if sending as utf-8 is desired.' % (
114 'if sending as utf-8 is desired.' % (
115 data[err.start:err.end],))
115 data[err.start:err.end],))
116 return data
116 return data
117
117
118 class _CompatMessage(email.message.Message):
118 class _CompatMessage(email.message.Message):
119 """Workaround for rfc822.Message and email.message.Message API diffs."""
119 """Workaround for rfc822.Message and email.message.Message API diffs."""
120
120
121 @classmethod
121 @classmethod
122 def from_string(cls, s):
122 def from_string(cls, s):
123 if sys.version_info > (3, 0):
123 if sys.version_info > (3, 0):
124 # Python 3 can't decode headers from bytes, so we have to
124 # Python 3 can't decode headers from bytes, so we have to
125 # trust RFC 2616 and decode the headers as iso-8859-1
125 # trust RFC 2616 and decode the headers as iso-8859-1
126 # bytes.
126 # bytes.
127 s = s.decode('iso-8859-1')
127 s = s.decode('iso-8859-1')
128 headers = email.message_from_string(s, _class=_CompatMessage)
128 headers = email.message_from_string(s, _class=_CompatMessage)
129 # Fix multi-line headers to match httplib's behavior from
129 # Fix multi-line headers to match httplib's behavior from
130 # Python 2.x, since email.message.Message handles them in
130 # Python 2.x, since email.message.Message handles them in
131 # slightly different ways.
131 # slightly different ways.
132 if sys.version_info < (3, 0):
132 if sys.version_info < (3, 0):
133 new = []
133 new = []
134 for h, v in headers._headers:
134 for h, v in headers._headers:
135 if '\r\n' in v:
135 if '\r\n' in v:
136 v = '\n'.join([' ' + x.lstrip() for x in v.split('\r\n')])[1:]
136 v = '\n'.join([' ' + x.lstrip() for x in v.split('\r\n')])[1:]
137 new.append((h, v))
137 new.append((h, v))
138 headers._headers = new
138 headers._headers = new
139 return headers
139 return headers
140
140
141 def getheaders(self, key):
141 def getheaders(self, key):
142 return self.get_all(key)
142 return self.get_all(key)
143
143
144 def getheader(self, key, default=None):
144 def getheader(self, key, default=None):
145 return self.get(key, failobj=default)
145 return self.get(key, failobj=default)
146
146
147
147
148 class HTTPResponse(object):
148 class HTTPResponse(object):
149 """Response from an HTTP server.
149 """Response from an HTTP server.
150
150
151 The response will continue to load as available. If you need the
151 The response will continue to load as available. If you need the
152 complete response before continuing, check the .complete() method.
152 complete response before continuing, check the .complete() method.
153 """
153 """
154 def __init__(self, sock, timeout, method):
154 def __init__(self, sock, timeout, method):
155 self.sock = sock
155 self.sock = sock
156 self.method = method
156 self.method = method
157 self.raw_response = b''
157 self.raw_response = b''
158 self._headers_len = 0
158 self._headers_len = 0
159 self.headers = None
159 self.headers = None
160 self.will_close = False
160 self.will_close = False
161 self.status_line = b''
161 self.status_line = b''
162 self.status = None
162 self.status = None
163 self.continued = False
163 self.continued = False
164 self.http_version = None
164 self.http_version = None
165 self.reason = None
165 self.reason = None
166 self._reader = None
166 self._reader = None
167
167
168 self._read_location = 0
168 self._read_location = 0
169 self._eol = EOL
169 self._eol = EOL
170
170
171 self._timeout = timeout
171 self._timeout = timeout
172
172
173 @property
173 @property
174 def _end_headers(self):
174 def _end_headers(self):
175 return self._eol * 2
175 return self._eol * 2
176
176
177 def complete(self):
177 def complete(self):
178 """Returns true if this response is completely loaded.
178 """Returns true if this response is completely loaded.
179
179
180 Note that if this is a connection where complete means the
180 Note that if this is a connection where complete means the
181 socket is closed, this will nearly always return False, even
181 socket is closed, this will nearly always return False, even
182 in cases where all the data has actually been loaded.
182 in cases where all the data has actually been loaded.
183 """
183 """
184 if self._reader:
184 if self._reader:
185 return self._reader.done()
185 return self._reader.done()
186
186
187 def _close(self):
187 def _close(self):
188 if self._reader is not None:
188 if self._reader is not None:
189 # We're a friend of the reader class here.
189 # We're a friend of the reader class here.
190 # pylint: disable=W0212
190 # pylint: disable=W0212
191 self._reader._close()
191 self._reader._close()
192
192
193 def getheader(self, header, default=None):
193 def getheader(self, header, default=None):
194 return self.headers.getheader(header, default=default)
194 return self.headers.getheader(header, default=default)
195
195
196 def getheaders(self):
196 def getheaders(self):
197 if sys.version_info < (3, 0):
197 if sys.version_info < (3, 0):
198 return [(k.lower(), v) for k, v in self.headers.items()]
198 return [(k.lower(), v) for k, v in self.headers.items()]
199 # Starting in Python 3, headers aren't lowercased before being
199 # Starting in Python 3, headers aren't lowercased before being
200 # returned here.
200 # returned here.
201 return self.headers.items()
201 return self.headers.items()
202
202
203 def readline(self):
203 def readline(self):
204 """Read a single line from the response body.
204 """Read a single line from the response body.
205
205
206 This may block until either a line ending is found or the
206 This may block until either a line ending is found or the
207 response is complete.
207 response is complete.
208 """
208 """
209 blocks = []
209 blocks = []
210 while True:
210 while True:
211 self._reader.readto(b'\n', blocks)
211 self._reader.readto(b'\n', blocks)
212
212
213 if blocks and blocks[-1][-1:] == b'\n' or self.complete():
213 if blocks and blocks[-1][-1:] == b'\n' or self.complete():
214 break
214 break
215
215
216 self._select()
216 self._select()
217
217
218 return b''.join(blocks)
218 return b''.join(blocks)
219
219
220 def read(self, length=None):
220 def read(self, length=None):
221 """Read data from the response body."""
221 """Read data from the response body."""
222 # if length is None, unbounded read
222 # if length is None, unbounded read
223 while (not self.complete() # never select on a finished read
223 while (not self.complete() # never select on a finished read
224 and (not length # unbounded, so we wait for complete()
224 and (not length # unbounded, so we wait for complete()
225 or length > self._reader.available_data)):
225 or length > self._reader.available_data)):
226 self._select()
226 self._select()
227 if not length:
227 if not length:
228 length = self._reader.available_data
228 length = self._reader.available_data
229 r = self._reader.read(length)
229 r = self._reader.read(length)
230 if self.complete() and self.will_close:
230 if self.complete() and self.will_close:
231 self.sock.close()
231 self.sock.close()
232 return r
232 return r
233
233
234 def _select(self):
234 def _select(self):
235 r, unused_write, unused_err = select.select(
235 r, unused_write, unused_err = select.select(
236 [self.sock], [], [], self._timeout)
236 [self.sock], [], [], self._timeout)
237 if not r:
237 if not r:
238 # socket was not readable. If the response is not
238 # socket was not readable. If the response is not
239 # complete, raise a timeout.
239 # complete, raise a timeout.
240 if not self.complete():
240 if not self.complete():
241 logger.info('timed out with timeout of %s', self._timeout)
241 logger.info('timed out with timeout of %s', self._timeout)
242 raise HTTPTimeoutException('timeout reading data')
242 raise HTTPTimeoutException('timeout reading data')
243 try:
243 try:
244 data = self.sock.recv(INCOMING_BUFFER_SIZE)
244 data = self.sock.recv(INCOMING_BUFFER_SIZE)
245 except ssl.SSLError as e:
245 except ssl.SSLError as e:
246 if e.args[0] != ssl.SSL_ERROR_WANT_READ:
246 if e.args[0] != ssl.SSL_ERROR_WANT_READ:
247 raise
247 raise
248 logger.debug('SSL_ERROR_WANT_READ in _select, should retry later')
248 logger.debug('SSL_ERROR_WANT_READ in _select, should retry later')
249 return True
249 return True
250 logger.debug('response read %d data during _select', len(data))
250 logger.debug('response read %d data during _select', len(data))
251 # If the socket was readable and no data was read, that means
251 # If the socket was readable and no data was read, that means
252 # the socket was closed. Inform the reader (if any) so it can
252 # the socket was closed. Inform the reader (if any) so it can
253 # raise an exception if this is an invalid situation.
253 # raise an exception if this is an invalid situation.
254 if not data:
254 if not data:
255 if self._reader:
255 if self._reader:
256 # We're a friend of the reader class here.
256 # We're a friend of the reader class here.
257 # pylint: disable=W0212
257 # pylint: disable=W0212
258 self._reader._close()
258 self._reader._close()
259 return False
259 return False
260 else:
260 else:
261 self._load_response(data)
261 self._load_response(data)
262 return True
262 return True
263
263
264 # This method gets replaced by _load later, which confuses pylint.
264 # This method gets replaced by _load later, which confuses pylint.
265 def _load_response(self, data): # pylint: disable=E0202
265 def _load_response(self, data): # pylint: disable=E0202
266 # Being here implies we're not at the end of the headers yet,
266 # Being here implies we're not at the end of the headers yet,
267 # since at the end of this method if headers were completely
267 # since at the end of this method if headers were completely
268 # loaded we replace this method with the load() method of the
268 # loaded we replace this method with the load() method of the
269 # reader we created.
269 # reader we created.
270 self.raw_response += data
270 self.raw_response += data
271 # This is a bogus server with bad line endings
271 # This is a bogus server with bad line endings
272 if self._eol not in self.raw_response:
272 if self._eol not in self.raw_response:
273 for bad_eol in (b'\n', b'\r'):
273 for bad_eol in (b'\n', b'\r'):
274 if (bad_eol in self.raw_response
274 if (bad_eol in self.raw_response
275 # verify that bad_eol is not the end of the incoming data
275 # verify that bad_eol is not the end of the incoming data
276 # as this could be a response line that just got
276 # as this could be a response line that just got
277 # split between \r and \n.
277 # split between \r and \n.
278 and (self.raw_response.index(bad_eol) <
278 and (self.raw_response.index(bad_eol) <
279 (len(self.raw_response) - 1))):
279 (len(self.raw_response) - 1))):
280 logger.info('bogus line endings detected, '
280 logger.info('bogus line endings detected, '
281 'using %r for EOL', bad_eol)
281 'using %r for EOL', bad_eol)
282 self._eol = bad_eol
282 self._eol = bad_eol
283 break
283 break
284 # exit early if not at end of headers
284 # exit early if not at end of headers
285 if self._end_headers not in self.raw_response or self.headers:
285 if self._end_headers not in self.raw_response or self.headers:
286 return
286 return
287
287
288 # handle 100-continue response
288 # handle 100-continue response
289 hdrs, body = self.raw_response.split(self._end_headers, 1)
289 hdrs, body = self.raw_response.split(self._end_headers, 1)
290 unused_http_ver, status = hdrs.split(b' ', 1)
290 unused_http_ver, status = hdrs.split(b' ', 1)
291 if status.startswith(b'100'):
291 if status.startswith(b'100'):
292 self.raw_response = body
292 self.raw_response = body
293 self.continued = True
293 self.continued = True
294 logger.debug('continue seen, setting body to %r', body)
294 logger.debug('continue seen, setting body to %r', body)
295 return
295 return
296
296
297 # arriving here means we should parse response headers
297 # arriving here means we should parse response headers
298 # as all headers have arrived completely
298 # as all headers have arrived completely
299 hdrs, body = self.raw_response.split(self._end_headers, 1)
299 hdrs, body = self.raw_response.split(self._end_headers, 1)
300 del self.raw_response
300 del self.raw_response
301 if self._eol in hdrs:
301 if self._eol in hdrs:
302 self.status_line, hdrs = hdrs.split(self._eol, 1)
302 self.status_line, hdrs = hdrs.split(self._eol, 1)
303 else:
303 else:
304 self.status_line = hdrs
304 self.status_line = hdrs
305 hdrs = b''
305 hdrs = b''
306 # TODO HTTP < 1.0 support
306 # TODO HTTP < 1.0 support
307 (self.http_version, self.status,
307 (self.http_version, self.status,
308 self.reason) = self.status_line.split(b' ', 2)
308 self.reason) = self.status_line.split(b' ', 2)
309 self.status = int(self.status)
309 self.status = int(self.status)
310 if self._eol != EOL:
310 if self._eol != EOL:
311 hdrs = hdrs.replace(self._eol, b'\r\n')
311 hdrs = hdrs.replace(self._eol, b'\r\n')
312 headers = _CompatMessage.from_string(hdrs)
312 headers = _CompatMessage.from_string(hdrs)
313 content_len = None
313 content_len = None
314 if HDR_CONTENT_LENGTH in headers:
314 if HDR_CONTENT_LENGTH in headers:
315 content_len = int(headers[HDR_CONTENT_LENGTH])
315 content_len = int(headers[HDR_CONTENT_LENGTH])
316 if self.http_version == HTTP_VER_1_0:
316 if self.http_version == HTTP_VER_1_0:
317 self.will_close = True
317 self.will_close = True
318 elif HDR_CONNECTION_CTRL in headers:
318 elif HDR_CONNECTION_CTRL in headers:
319 self.will_close = (
319 self.will_close = (
320 headers[HDR_CONNECTION_CTRL].lower() == CONNECTION_CLOSE)
320 headers[HDR_CONNECTION_CTRL].lower() == CONNECTION_CLOSE)
321 if (HDR_XFER_ENCODING in headers
321 if (HDR_XFER_ENCODING in headers
322 and headers[HDR_XFER_ENCODING].lower() == XFER_ENCODING_CHUNKED):
322 and headers[HDR_XFER_ENCODING].lower() == XFER_ENCODING_CHUNKED):
323 self._reader = _readers.ChunkedReader(self._eol)
323 self._reader = _readers.ChunkedReader(self._eol)
324 logger.debug('using a chunked reader')
324 logger.debug('using a chunked reader')
325 else:
325 else:
326 # HEAD responses are forbidden from returning a body, and
326 # HEAD responses are forbidden from returning a body, and
327 # it's implausible for a CONNECT response to use
327 # it's implausible for a CONNECT response to use
328 # close-is-end logic for an OK response.
328 # close-is-end logic for an OK response.
329 if (self.method == b'HEAD' or
329 if (self.method == b'HEAD' or
330 (self.method == b'CONNECT' and content_len is None)):
330 (self.method == b'CONNECT' and content_len is None)):
331 content_len = 0
331 content_len = 0
332 if content_len is not None:
332 if content_len is not None:
333 logger.debug('using a content-length reader with length %d',
333 logger.debug('using a content-length reader with length %d',
334 content_len)
334 content_len)
335 self._reader = _readers.ContentLengthReader(content_len)
335 self._reader = _readers.ContentLengthReader(content_len)
336 else:
336 else:
337 # Response body had no length specified and is not
337 # Response body had no length specified and is not
338 # chunked, so the end of the body will only be
338 # chunked, so the end of the body will only be
339 # identifiable by the termination of the socket by the
339 # identifiable by the termination of the socket by the
340 # server. My interpretation of the spec means that we
340 # server. My interpretation of the spec means that we
341 # are correct in hitting this case if
341 # are correct in hitting this case if
342 # transfer-encoding, content-length, and
342 # transfer-encoding, content-length, and
343 # connection-control were left unspecified.
343 # connection-control were left unspecified.
344 self._reader = _readers.CloseIsEndReader()
344 self._reader = _readers.CloseIsEndReader()
345 logger.debug('using a close-is-end reader')
345 logger.debug('using a close-is-end reader')
346 self.will_close = True
346 self.will_close = True
347
347
348 if body:
348 if body:
349 # We're a friend of the reader class here.
349 # We're a friend of the reader class here.
350 # pylint: disable=W0212
350 # pylint: disable=W0212
351 self._reader._load(body)
351 self._reader._load(body)
352 logger.debug('headers complete')
352 logger.debug('headers complete')
353 self.headers = headers
353 self.headers = headers
354 # We're a friend of the reader class here.
354 # We're a friend of the reader class here.
355 # pylint: disable=W0212
355 # pylint: disable=W0212
356 self._load_response = self._reader._load
356 self._load_response = self._reader._load
357
357
358 def _foldheaders(headers):
358 def _foldheaders(headers):
359 """Given some headers, rework them so we can safely overwrite values.
359 """Given some headers, rework them so we can safely overwrite values.
360
360
361 >>> _foldheaders({'Accept-Encoding': 'wat'})
361 >>> _foldheaders({'Accept-Encoding': 'wat'})
362 {'accept-encoding': ('Accept-Encoding', 'wat')}
362 {'accept-encoding': ('Accept-Encoding', 'wat')}
363 """
363 """
364 return dict((k.lower(), (k, v)) for k, v in headers.items())
364 return dict((k.lower(), (k, v)) for k, v in headers.items())
365
365
366 try:
366 try:
367 inspect.signature
367 inspect.signature
368 def _handlesarg(func, arg):
368 def _handlesarg(func, arg):
369 """ Try to determine if func accepts arg
369 """ Try to determine if func accepts arg
370
370
371 If it takes arg, return True
371 If it takes arg, return True
372 If it happens to take **args, then it could do anything:
372 If it happens to take **args, then it could do anything:
373 * It could throw a different TypeError, just for fun
373 * It could throw a different TypeError, just for fun
374 * It could throw an ArgumentError or anything else
374 * It could throw an ArgumentError or anything else
375 * It could choose not to throw an Exception at all
375 * It could choose not to throw an Exception at all
376 ... return 'unknown'
376 ... return 'unknown'
377
377
378 Otherwise, return False
378 Otherwise, return False
379 """
379 """
380 params = inspect.signature(func).parameters
380 params = inspect.signature(func).parameters
381 if arg in params:
381 if arg in params:
382 return True
382 return True
383 for p in params:
383 for p in params:
384 if params[p].kind == inspect._ParameterKind.VAR_KEYWORD:
384 if params[p].kind == inspect._ParameterKind.VAR_KEYWORD:
385 return 'unknown'
385 return 'unknown'
386 return False
386 return False
387 except AttributeError:
387 except AttributeError:
388 def _handlesarg(func, arg):
388 def _handlesarg(func, arg):
389 """ Try to determine if func accepts arg
389 """ Try to determine if func accepts arg
390
390
391 If it takes arg, return True
391 If it takes arg, return True
392 If it happens to take **args, then it could do anything:
392 If it happens to take **args, then it could do anything:
393 * It could throw a different TypeError, just for fun
393 * It could throw a different TypeError, just for fun
394 * It could throw an ArgumentError or anything else
394 * It could throw an ArgumentError or anything else
395 * It could choose not to throw an Exception at all
395 * It could choose not to throw an Exception at all
396 ... return 'unknown'
396 ... return 'unknown'
397
397
398 Otherwise, return False
398 Otherwise, return False
399 """
399 """
400 spec = inspect.getargspec(func)
400 spec = inspect.getargspec(func)
401 if arg in spec.args:
401 if arg in spec.args:
402 return True
402 return True
403 if spec.keywords:
403 if spec.keywords:
404 return 'unknown'
404 return 'unknown'
405 return False
405 return False
406
406
407 class HTTPConnection(object):
407 class HTTPConnection(object):
408 """Connection to a single http server.
408 """Connection to a single http server.
409
409
410 Supports 100-continue and keepalives natively. Uses select() for
410 Supports 100-continue and keepalives natively. Uses select() for
411 non-blocking socket operations.
411 non-blocking socket operations.
412 """
412 """
413 http_version = HTTP_VER_1_1
413 http_version = HTTP_VER_1_1
414 response_class = HTTPResponse
414 response_class = HTTPResponse
415
415
416 def __init__(self, host, port=None, use_ssl=None, ssl_validator=None,
416 def __init__(self, host, port=None, use_ssl=None, ssl_validator=None,
417 timeout=TIMEOUT_DEFAULT,
417 timeout=TIMEOUT_DEFAULT,
418 continue_timeout=TIMEOUT_ASSUME_CONTINUE,
418 continue_timeout=TIMEOUT_ASSUME_CONTINUE,
419 proxy_hostport=None, proxy_headers=None,
419 proxy_hostport=None, proxy_headers=None,
420 ssl_wrap_socket=None, **ssl_opts):
420 ssl_wrap_socket=None, **ssl_opts):
421 """Create a new HTTPConnection.
421 """Create a new HTTPConnection.
422
422
423 Args:
423 Args:
424 host: The host to which we'll connect.
424 host: The host to which we'll connect.
425 port: Optional. The port over which we'll connect. Default 80 for
425 port: Optional. The port over which we'll connect. Default 80 for
426 non-ssl, 443 for ssl.
426 non-ssl, 443 for ssl.
427 use_ssl: Optional. Whether to use ssl. Defaults to False if port is
427 use_ssl: Optional. Whether to use ssl. Defaults to False if port is
428 not 443, true if port is 443.
428 not 443, true if port is 443.
429 ssl_validator: a function(socket) to validate the ssl cert
429 ssl_validator: a function(socket) to validate the ssl cert
430 timeout: Optional. Connection timeout, default is TIMEOUT_DEFAULT.
430 timeout: Optional. Connection timeout, default is TIMEOUT_DEFAULT.
431 continue_timeout: Optional. Timeout for waiting on an expected
431 continue_timeout: Optional. Timeout for waiting on an expected
432 "100 Continue" response. Default is TIMEOUT_ASSUME_CONTINUE.
432 "100 Continue" response. Default is TIMEOUT_ASSUME_CONTINUE.
433 proxy_hostport: Optional. Tuple of (host, port) to use as an http
433 proxy_hostport: Optional. Tuple of (host, port) to use as an http
434 proxy for the connection. Default is to not use a proxy.
434 proxy for the connection. Default is to not use a proxy.
435 proxy_headers: Optional dict of header keys and values to send to
435 proxy_headers: Optional dict of header keys and values to send to
436 a proxy when using CONNECT. For compatibility with
436 a proxy when using CONNECT. For compatibility with
437 httplib, the Proxy-Authorization header may be
437 httplib, the Proxy-Authorization header may be
438 specified in headers for request(), which will clobber
438 specified in headers for request(), which will clobber
439 any such header specified here if specified. Providing
439 any such header specified here if specified. Providing
440 this option and not proxy_hostport will raise an
440 this option and not proxy_hostport will raise an
441 ValueError.
441 ValueError.
442 ssl_wrap_socket: Optional function to use for wrapping
442 ssl_wrap_socket: Optional function to use for wrapping
443 sockets. If unspecified, the one from the ssl module will
443 sockets. If unspecified, the one from the ssl module will
444 be used if available, or something that's compatible with
444 be used if available, or something that's compatible with
445 it if on a Python older than 2.6.
445 it if on a Python older than 2.6.
446
446
447 Any extra keyword arguments to this function will be provided
447 Any extra keyword arguments to this function will be provided
448 to the ssl_wrap_socket method. If no ssl
448 to the ssl_wrap_socket method. If no ssl
449 """
449 """
450 host = _ensurebytes(host)
450 host = _ensurebytes(host)
451 if port is None and host.count(b':') == 1 or b']:' in host:
451 if port is None and host.count(b':') == 1 or b']:' in host:
452 host, port = host.rsplit(b':', 1)
452 host, port = host.rsplit(b':', 1)
453 port = int(port)
453 port = int(port)
454 if b'[' in host:
454 if b'[' in host:
455 host = host[1:-1]
455 host = host[1:-1]
456 if ssl_wrap_socket is not None:
456 if ssl_wrap_socket is not None:
457 _wrap_socket = ssl_wrap_socket
457 _wrap_socket = ssl_wrap_socket
458 else:
458 else:
459 _wrap_socket = ssl.wrap_socket
459 _wrap_socket = ssl.wrap_socket
460 call_wrap_socket = None
460 call_wrap_socket = None
461 handlesubar = _handlesarg(_wrap_socket, 'server_hostname')
461 handlesubar = _handlesarg(_wrap_socket, 'server_hostname')
462 if handlesubar is True:
462 if handlesubar is True:
463 # supports server_hostname
463 # supports server_hostname
464 call_wrap_socket = _wrap_socket
464 call_wrap_socket = _wrap_socket
465 handlesnobar = _handlesarg(_wrap_socket, 'serverhostname')
465 handlesnobar = _handlesarg(_wrap_socket, 'serverhostname')
466 if handlesnobar is True and handlesubar is not True:
466 if handlesnobar is True and handlesubar is not True:
467 # supports serverhostname
467 # supports serverhostname
468 def call_wrap_socket(sock, server_hostname=None, **ssl_opts):
468 def call_wrap_socket(sock, server_hostname=None, **ssl_opts):
469 return _wrap_socket(sock, serverhostname=server_hostname,
469 return _wrap_socket(sock, serverhostname=server_hostname,
470 **ssl_opts)
470 **ssl_opts)
471 if handlesubar is False and handlesnobar is False:
471 if handlesubar is False and handlesnobar is False:
472 # does not support either
472 # does not support either
473 def call_wrap_socket(sock, server_hostname=None, **ssl_opts):
473 def call_wrap_socket(sock, server_hostname=None, **ssl_opts):
474 return _wrap_socket(sock, **ssl_opts)
474 return _wrap_socket(sock, **ssl_opts)
475 if call_wrap_socket is None:
475 if call_wrap_socket is None:
476 # we assume it takes **args
476 # we assume it takes **args
477 def call_wrap_socket(sock, **ssl_opts):
477 def call_wrap_socket(sock, **ssl_opts):
478 if 'server_hostname' in ssl_opts:
478 if 'server_hostname' in ssl_opts:
479 ssl_opts['serverhostname'] = ssl_opts['server_hostname']
479 ssl_opts['serverhostname'] = ssl_opts['server_hostname']
480 return _wrap_socket(sock, **ssl_opts)
480 return _wrap_socket(sock, **ssl_opts)
481 self._ssl_wrap_socket = call_wrap_socket
481 self._ssl_wrap_socket = call_wrap_socket
482 if use_ssl is None and port is None:
482 if use_ssl is None and port is None:
483 use_ssl = False
483 use_ssl = False
484 port = 80
484 port = 80
485 elif use_ssl is None:
485 elif use_ssl is None:
486 use_ssl = (port == 443)
486 use_ssl = (port == 443)
487 elif port is None:
487 elif port is None:
488 port = (use_ssl and 443 or 80)
488 port = (use_ssl and 443 or 80)
489 self.port = port
489 self.port = port
490 self.ssl = use_ssl
490 self.ssl = use_ssl
491 self.ssl_opts = ssl_opts
491 self.ssl_opts = ssl_opts
492 self._ssl_validator = ssl_validator
492 self._ssl_validator = ssl_validator
493 self.host = host
493 self.host = host
494 self.sock = None
494 self.sock = None
495 self._current_response = None
495 self._current_response = None
496 self._current_response_taken = False
496 self._current_response_taken = False
497 if proxy_hostport is None:
497 if proxy_hostport is None:
498 self._proxy_host = self._proxy_port = None
498 self._proxy_host = self._proxy_port = None
499 if proxy_headers:
499 if proxy_headers:
500 raise ValueError(
500 raise ValueError(
501 'proxy_headers may not be specified unless '
501 'proxy_headers may not be specified unless '
502 'proxy_hostport is also specified.')
502 'proxy_hostport is also specified.')
503 else:
503 else:
504 self._proxy_headers = {}
504 self._proxy_headers = {}
505 else:
505 else:
506 self._proxy_host, self._proxy_port = proxy_hostport
506 self._proxy_host, self._proxy_port = proxy_hostport
507 self._proxy_headers = _foldheaders(proxy_headers or {})
507 self._proxy_headers = _foldheaders(proxy_headers or {})
508
508
509 self.timeout = timeout
509 self.timeout = timeout
510 self.continue_timeout = continue_timeout
510 self.continue_timeout = continue_timeout
511
511
512 def _connect(self, proxy_headers):
512 def _connect(self, proxy_headers):
513 """Connect to the host and port specified in __init__."""
513 """Connect to the host and port specified in __init__."""
514 if self.sock:
514 if self.sock:
515 return
515 return
516 if self._proxy_host is not None:
516 if self._proxy_host is not None:
517 logger.info('Connecting to http proxy %s:%s',
517 logger.info('Connecting to http proxy %s:%s',
518 self._proxy_host, self._proxy_port)
518 self._proxy_host, self._proxy_port)
519 sock = socket.create_connection((self._proxy_host,
519 sock = socket.create_connection((self._proxy_host,
520 self._proxy_port))
520 self._proxy_port))
521 if self.ssl:
521 if self.ssl:
522 data = self._buildheaders(b'CONNECT', b'%s:%d' % (self.host,
522 data = self._buildheaders(b'CONNECT', b'%s:%d' % (self.host,
523 self.port),
523 self.port),
524 proxy_headers, HTTP_VER_1_0)
524 proxy_headers, HTTP_VER_1_0)
525 sock.send(data)
525 sock.send(data)
526 sock.setblocking(0)
526 sock.setblocking(0)
527 r = self.response_class(sock, self.timeout, b'CONNECT')
527 r = self.response_class(sock, self.timeout, b'CONNECT')
528 timeout_exc = HTTPTimeoutException(
528 timeout_exc = HTTPTimeoutException(
529 'Timed out waiting for CONNECT response from proxy')
529 'Timed out waiting for CONNECT response from proxy')
530 while not r.complete():
530 while not r.complete():
531 try:
531 try:
532 # We're a friend of the response class, so let
532 # We're a friend of the response class, so let
533 # us use the private attribute.
533 # us use the private attribute.
534 # pylint: disable=W0212
534 # pylint: disable=W0212
535 if not r._select():
535 if not r._select():
536 if not r.complete():
536 if not r.complete():
537 raise timeout_exc
537 raise timeout_exc
538 except HTTPTimeoutException:
538 except HTTPTimeoutException:
539 # This raise/except pattern looks goofy, but
539 # This raise/except pattern looks goofy, but
540 # _select can raise the timeout as well as the
540 # _select can raise the timeout as well as the
541 # loop body. I wish it wasn't this convoluted,
541 # loop body. I wish it wasn't this convoluted,
542 # but I don't have a better solution
542 # but I don't have a better solution
543 # immediately handy.
543 # immediately handy.
544 raise timeout_exc
544 raise timeout_exc
545 if r.status != 200:
545 if r.status != 200:
546 raise HTTPProxyConnectFailedException(
546 raise HTTPProxyConnectFailedException(
547 'Proxy connection failed: %d %s' % (r.status,
547 'Proxy connection failed: %d %s' % (r.status,
548 r.read()))
548 r.read()))
549 logger.info('CONNECT (for SSL) to %s:%s via proxy succeeded.',
549 logger.info('CONNECT (for SSL) to %s:%s via proxy succeeded.',
550 self.host, self.port)
550 self.host, self.port)
551 else:
551 else:
552 sock = socket.create_connection((self.host, self.port))
552 sock = socket.create_connection((self.host, self.port))
553 if self.ssl:
553 if self.ssl:
554 # This is the default, but in the case of proxied SSL
554 # This is the default, but in the case of proxied SSL
555 # requests the proxy logic above will have cleared
555 # requests the proxy logic above will have cleared
556 # blocking mode, so re-enable it just to be safe.
556 # blocking mode, so re-enable it just to be safe.
557 sock.setblocking(1)
557 sock.setblocking(1)
558 logger.debug('wrapping socket for ssl with options %r',
558 logger.debug('wrapping socket for ssl with options %r',
559 self.ssl_opts)
559 self.ssl_opts)
560 sock = self._ssl_wrap_socket(sock, server_hostname=self.host,
560 sock = self._ssl_wrap_socket(sock, server_hostname=self.host,
561 **self.ssl_opts)
561 **self.ssl_opts)
562 if self._ssl_validator:
562 if self._ssl_validator:
563 self._ssl_validator(sock)
563 self._ssl_validator(sock)
564 sock.setblocking(0)
564 sock.setblocking(0)
565 self.sock = sock
565 self.sock = sock
566
566
567 def _buildheaders(self, method, path, headers, http_ver):
567 def _buildheaders(self, method, path, headers, http_ver):
568 if self.ssl and self.port == 443 or self.port == 80:
568 if self.ssl and self.port == 443 or self.port == 80:
569 # default port for protocol, so leave it out
569 # default port for protocol, so leave it out
570 hdrhost = self.host
570 hdrhost = self.host
571 else:
571 else:
572 # include nonstandard port in header
572 # include nonstandard port in header
573 if b':' in self.host: # must be IPv6
573 if b':' in self.host: # must be IPv6
574 hdrhost = b'[%s]:%d' % (self.host, self.port)
574 hdrhost = b'[%s]:%d' % (self.host, self.port)
575 else:
575 else:
576 hdrhost = b'%s:%d' % (self.host, self.port)
576 hdrhost = b'%s:%d' % (self.host, self.port)
577 if self._proxy_host and not self.ssl:
577 if self._proxy_host and not self.ssl:
578 # When talking to a regular http proxy we must send the
578 # When talking to a regular http proxy we must send the
579 # full URI, but in all other cases we must not (although
579 # full URI, but in all other cases we must not (although
580 # technically RFC 2616 says servers must accept our
580 # technically RFC 2616 says servers must accept our
581 # request if we screw up, experimentally few do that
581 # request if we screw up, experimentally few do that
582 # correctly.)
582 # correctly.)
583 assert path[0:1] == b'/', 'path must start with a /'
583 assert path[0:1] == b'/', 'path must start with a /'
584 path = b'http://%s%s' % (hdrhost, path)
584 path = b'http://%s%s' % (hdrhost, path)
585 outgoing = [b'%s %s %s%s' % (method, path, http_ver, EOL)]
585 outgoing = [b'%s %s %s%s' % (method, path, http_ver, EOL)]
586 headers[b'host'] = (b'Host', hdrhost)
586 headers[b'host'] = (b'Host', hdrhost)
587 headers[HDR_ACCEPT_ENCODING] = (HDR_ACCEPT_ENCODING, 'identity')
587 headers[HDR_ACCEPT_ENCODING] = (HDR_ACCEPT_ENCODING, 'identity')
588 for hdr, val in sorted((_ensurebytes(h), _ensurebytes(v))
588 for hdr, val in sorted((_ensurebytes(h), _ensurebytes(v))
589 for h, v in headers.values()):
589 for h, v in headers.values()):
590 outgoing.append(b'%s: %s%s' % (hdr, val, EOL))
590 outgoing.append(b'%s: %s%s' % (hdr, val, EOL))
591 outgoing.append(EOL)
591 outgoing.append(EOL)
592 return b''.join(outgoing)
592 return b''.join(outgoing)
593
593
594 def close(self):
594 def close(self):
595 """Close the connection to the server.
595 """Close the connection to the server.
596
596
597 This is a no-op if the connection is already closed. The
597 This is a no-op if the connection is already closed. The
598 connection may automatically close if requested by the server
598 connection may automatically close if requested by the server
599 or required by the nature of a response.
599 or required by the nature of a response.
600 """
600 """
601 if self.sock is None:
601 if self.sock is None:
602 return
602 return
603 self.sock.close()
603 self.sock.close()
604 self.sock = None
604 self.sock = None
605 logger.info('closed connection to %s on %s', self.host, self.port)
605 logger.info('closed connection to %s on %s', self.host, self.port)
606
606
607 def busy(self):
607 def busy(self):
608 """Returns True if this connection object is currently in use.
608 """Returns True if this connection object is currently in use.
609
609
610 If a response is still pending, this will return True, even if
610 If a response is still pending, this will return True, even if
611 the request has finished sending. In the future,
611 the request has finished sending. In the future,
612 HTTPConnection may transparently juggle multiple connections
612 HTTPConnection may transparently juggle multiple connections
613 to the server, in which case this will be useful to detect if
613 to the server, in which case this will be useful to detect if
614 any of those connections is ready for use.
614 any of those connections is ready for use.
615 """
615 """
616 cr = self._current_response
616 cr = self._current_response
617 if cr is not None:
617 if cr is not None:
618 if self._current_response_taken:
618 if self._current_response_taken:
619 if cr.will_close:
619 if cr.will_close:
620 self.sock = None
620 self.sock = None
621 self._current_response = None
621 self._current_response = None
622 return False
622 return False
623 elif cr.complete():
623 elif cr.complete():
624 self._current_response = None
624 self._current_response = None
625 return False
625 return False
626 return True
626 return True
627 return False
627 return False
628
628
629 def _reconnect(self, where, pheaders):
629 def _reconnect(self, where, pheaders):
630 logger.info('reconnecting during %s', where)
630 logger.info('reconnecting during %s', where)
631 self.close()
631 self.close()
632 self._connect(pheaders)
632 self._connect(pheaders)
633
633
634 def request(self, method, path, body=None, headers={},
634 def request(self, method, path, body=None, headers=None,
635 expect_continue=False):
635 expect_continue=False):
636 """Send a request to the server.
636 """Send a request to the server.
637
637
638 For increased flexibility, this does not return the response
638 For increased flexibility, this does not return the response
639 object. Future versions of HTTPConnection that juggle multiple
639 object. Future versions of HTTPConnection that juggle multiple
640 sockets will be able to send (for example) 5 requests all at
640 sockets will be able to send (for example) 5 requests all at
641 once, and then let the requests arrive as data is
641 once, and then let the requests arrive as data is
642 available. Use the `getresponse()` method to retrieve the
642 available. Use the `getresponse()` method to retrieve the
643 response.
643 response.
644 """
644 """
645 if headers is None:
646 headers = {}
645 method = _ensurebytes(method)
647 method = _ensurebytes(method)
646 path = _ensurebytes(path)
648 path = _ensurebytes(path)
647 if self.busy():
649 if self.busy():
648 raise httplib.CannotSendRequest(
650 raise httplib.CannotSendRequest(
649 'Can not send another request before '
651 'Can not send another request before '
650 'current response is read!')
652 'current response is read!')
651 self._current_response_taken = False
653 self._current_response_taken = False
652
654
653 logger.info('sending %s request for %s to %s on port %s',
655 logger.info('sending %s request for %s to %s on port %s',
654 method, path, self.host, self.port)
656 method, path, self.host, self.port)
655
657
656 hdrs = _foldheaders(headers)
658 hdrs = _foldheaders(headers)
657 # Figure out headers that have to be computed from the request
659 # Figure out headers that have to be computed from the request
658 # body.
660 # body.
659 chunked = False
661 chunked = False
660 if body and HDR_CONTENT_LENGTH not in hdrs:
662 if body and HDR_CONTENT_LENGTH not in hdrs:
661 if getattr(body, '__len__', False):
663 if getattr(body, '__len__', False):
662 hdrs[HDR_CONTENT_LENGTH] = (HDR_CONTENT_LENGTH,
664 hdrs[HDR_CONTENT_LENGTH] = (HDR_CONTENT_LENGTH,
663 b'%d' % len(body))
665 b'%d' % len(body))
664 elif getattr(body, 'read', False):
666 elif getattr(body, 'read', False):
665 hdrs[HDR_XFER_ENCODING] = (HDR_XFER_ENCODING,
667 hdrs[HDR_XFER_ENCODING] = (HDR_XFER_ENCODING,
666 XFER_ENCODING_CHUNKED)
668 XFER_ENCODING_CHUNKED)
667 chunked = True
669 chunked = True
668 else:
670 else:
669 raise BadRequestData('body has no __len__() nor read()')
671 raise BadRequestData('body has no __len__() nor read()')
670 # Figure out expect-continue header
672 # Figure out expect-continue header
671 if hdrs.get('expect', ('', ''))[1].lower() == b'100-continue':
673 if hdrs.get('expect', ('', ''))[1].lower() == b'100-continue':
672 expect_continue = True
674 expect_continue = True
673 elif expect_continue:
675 elif expect_continue:
674 hdrs['expect'] = (b'Expect', b'100-Continue')
676 hdrs['expect'] = (b'Expect', b'100-Continue')
675 # httplib compatibility: if the user specified a
677 # httplib compatibility: if the user specified a
676 # proxy-authorization header, that's actually intended for a
678 # proxy-authorization header, that's actually intended for a
677 # proxy CONNECT action, not the real request, but only if
679 # proxy CONNECT action, not the real request, but only if
678 # we're going to use a proxy.
680 # we're going to use a proxy.
679 pheaders = dict(self._proxy_headers)
681 pheaders = dict(self._proxy_headers)
680 if self._proxy_host and self.ssl:
682 if self._proxy_host and self.ssl:
681 pa = hdrs.pop('proxy-authorization', None)
683 pa = hdrs.pop('proxy-authorization', None)
682 if pa is not None:
684 if pa is not None:
683 pheaders['proxy-authorization'] = pa
685 pheaders['proxy-authorization'] = pa
684 # Build header data
686 # Build header data
685 outgoing_headers = self._buildheaders(
687 outgoing_headers = self._buildheaders(
686 method, path, hdrs, self.http_version)
688 method, path, hdrs, self.http_version)
687
689
688 # If we're reusing the underlying socket, there are some
690 # If we're reusing the underlying socket, there are some
689 # conditions where we'll want to retry, so make a note of the
691 # conditions where we'll want to retry, so make a note of the
690 # state of self.sock
692 # state of self.sock
691 fresh_socket = self.sock is None
693 fresh_socket = self.sock is None
692 self._connect(pheaders)
694 self._connect(pheaders)
693 response = None
695 response = None
694 first = True
696 first = True
695
697
696 while ((outgoing_headers or body)
698 while ((outgoing_headers or body)
697 and not (response and response.complete())):
699 and not (response and response.complete())):
698 select_timeout = self.timeout
700 select_timeout = self.timeout
699 out = outgoing_headers or body
701 out = outgoing_headers or body
700 blocking_on_continue = False
702 blocking_on_continue = False
701 if expect_continue and not outgoing_headers and not (
703 if expect_continue and not outgoing_headers and not (
702 response and (response.headers or response.continued)):
704 response and (response.headers or response.continued)):
703 logger.info(
705 logger.info(
704 'waiting up to %s seconds for'
706 'waiting up to %s seconds for'
705 ' continue response from server',
707 ' continue response from server',
706 self.continue_timeout)
708 self.continue_timeout)
707 select_timeout = self.continue_timeout
709 select_timeout = self.continue_timeout
708 blocking_on_continue = True
710 blocking_on_continue = True
709 out = False
711 out = False
710 if out:
712 if out:
711 w = [self.sock]
713 w = [self.sock]
712 else:
714 else:
713 w = []
715 w = []
714 r, w, x = select.select([self.sock], w, [], select_timeout)
716 r, w, x = select.select([self.sock], w, [], select_timeout)
715 # if we were expecting a 100 continue and it's been long
717 # if we were expecting a 100 continue and it's been long
716 # enough, just go ahead and assume it's ok. This is the
718 # enough, just go ahead and assume it's ok. This is the
717 # recommended behavior from the RFC.
719 # recommended behavior from the RFC.
718 if r == w == x == []:
720 if r == w == x == []:
719 if blocking_on_continue:
721 if blocking_on_continue:
720 expect_continue = False
722 expect_continue = False
721 logger.info('no response to continue expectation from '
723 logger.info('no response to continue expectation from '
722 'server, optimistically sending request body')
724 'server, optimistically sending request body')
723 else:
725 else:
724 raise HTTPTimeoutException('timeout sending data')
726 raise HTTPTimeoutException('timeout sending data')
725 was_first = first
727 was_first = first
726
728
727 # incoming data
729 # incoming data
728 if r:
730 if r:
729 try:
731 try:
730 try:
732 try:
731 data = r[0].recv(INCOMING_BUFFER_SIZE)
733 data = r[0].recv(INCOMING_BUFFER_SIZE)
732 except ssl.SSLError as e:
734 except ssl.SSLError as e:
733 if e.args[0] != ssl.SSL_ERROR_WANT_READ:
735 if e.args[0] != ssl.SSL_ERROR_WANT_READ:
734 raise
736 raise
735 logger.debug('SSL_ERROR_WANT_READ while sending '
737 logger.debug('SSL_ERROR_WANT_READ while sending '
736 'data, retrying...')
738 'data, retrying...')
737 continue
739 continue
738 if not data:
740 if not data:
739 logger.info('socket appears closed in read')
741 logger.info('socket appears closed in read')
740 self.sock = None
742 self.sock = None
741 self._current_response = None
743 self._current_response = None
742 if response is not None:
744 if response is not None:
743 # We're a friend of the response class, so let
745 # We're a friend of the response class, so let
744 # us use the private attribute.
746 # us use the private attribute.
745 # pylint: disable=W0212
747 # pylint: disable=W0212
746 response._close()
748 response._close()
747 # This if/elif ladder is a bit subtle,
749 # This if/elif ladder is a bit subtle,
748 # comments in each branch should help.
750 # comments in each branch should help.
749 if response is not None and response.complete():
751 if response is not None and response.complete():
750 # Server responded completely and then
752 # Server responded completely and then
751 # closed the socket. We should just shut
753 # closed the socket. We should just shut
752 # things down and let the caller get their
754 # things down and let the caller get their
753 # response.
755 # response.
754 logger.info('Got an early response, '
756 logger.info('Got an early response, '
755 'aborting remaining request.')
757 'aborting remaining request.')
756 break
758 break
757 elif was_first and response is None:
759 elif was_first and response is None:
758 # Most likely a keepalive that got killed
760 # Most likely a keepalive that got killed
759 # on the server's end. Commonly happens
761 # on the server's end. Commonly happens
760 # after getting a really large response
762 # after getting a really large response
761 # from the server.
763 # from the server.
762 logger.info(
764 logger.info(
763 'Connection appeared closed in read on first'
765 'Connection appeared closed in read on first'
764 ' request loop iteration, will retry.')
766 ' request loop iteration, will retry.')
765 self._reconnect('read', pheaders)
767 self._reconnect('read', pheaders)
766 continue
768 continue
767 else:
769 else:
768 # We didn't just send the first data hunk,
770 # We didn't just send the first data hunk,
769 # and either have a partial response or no
771 # and either have a partial response or no
770 # response at all. There's really nothing
772 # response at all. There's really nothing
771 # meaningful we can do here.
773 # meaningful we can do here.
772 raise HTTPStateError(
774 raise HTTPStateError(
773 'Connection appears closed after '
775 'Connection appears closed after '
774 'some request data was written, but the '
776 'some request data was written, but the '
775 'response was missing or incomplete!')
777 'response was missing or incomplete!')
776 logger.debug('read %d bytes in request()', len(data))
778 logger.debug('read %d bytes in request()', len(data))
777 if response is None:
779 if response is None:
778 response = self.response_class(
780 response = self.response_class(
779 r[0], self.timeout, method)
781 r[0], self.timeout, method)
780 # We're a friend of the response class, so let us
782 # We're a friend of the response class, so let us
781 # use the private attribute.
783 # use the private attribute.
782 # pylint: disable=W0212
784 # pylint: disable=W0212
783 response._load_response(data)
785 response._load_response(data)
784 # Jump to the next select() call so we load more
786 # Jump to the next select() call so we load more
785 # data if the server is still sending us content.
787 # data if the server is still sending us content.
786 continue
788 continue
787 except socket.error as e:
789 except socket.error as e:
788 if e[0] != errno.EPIPE and not was_first:
790 if e[0] != errno.EPIPE and not was_first:
789 raise
791 raise
790
792
791 # outgoing data
793 # outgoing data
792 if w and out:
794 if w and out:
793 try:
795 try:
794 if getattr(out, 'read', False):
796 if getattr(out, 'read', False):
795 # pylint guesses the type of out incorrectly here
797 # pylint guesses the type of out incorrectly here
796 # pylint: disable=E1103
798 # pylint: disable=E1103
797 data = out.read(OUTGOING_BUFFER_SIZE)
799 data = out.read(OUTGOING_BUFFER_SIZE)
798 if not data:
800 if not data:
799 continue
801 continue
800 if len(data) < OUTGOING_BUFFER_SIZE:
802 if len(data) < OUTGOING_BUFFER_SIZE:
801 if chunked:
803 if chunked:
802 body = b'0' + EOL + EOL
804 body = b'0' + EOL + EOL
803 else:
805 else:
804 body = None
806 body = None
805 if chunked:
807 if chunked:
806 # This encode is okay because we know
808 # This encode is okay because we know
807 # hex() is building us only 0-9 and a-f
809 # hex() is building us only 0-9 and a-f
808 # digits.
810 # digits.
809 asciilen = hex(len(data))[2:].encode('ascii')
811 asciilen = hex(len(data))[2:].encode('ascii')
810 out = asciilen + EOL + data + EOL
812 out = asciilen + EOL + data + EOL
811 else:
813 else:
812 out = data
814 out = data
813 amt = w[0].send(out)
815 amt = w[0].send(out)
814 except socket.error as e:
816 except socket.error as e:
815 if e[0] == ssl.SSL_ERROR_WANT_WRITE and self.ssl:
817 if e[0] == ssl.SSL_ERROR_WANT_WRITE and self.ssl:
816 # This means that SSL hasn't flushed its buffer into
818 # This means that SSL hasn't flushed its buffer into
817 # the socket yet.
819 # the socket yet.
818 # TODO: find a way to block on ssl flushing its buffer
820 # TODO: find a way to block on ssl flushing its buffer
819 # similar to selecting on a raw socket.
821 # similar to selecting on a raw socket.
820 continue
822 continue
821 if e[0] == errno.EWOULDBLOCK or e[0] == errno.EAGAIN:
823 if e[0] == errno.EWOULDBLOCK or e[0] == errno.EAGAIN:
822 continue
824 continue
823 elif (e[0] not in (errno.ECONNRESET, errno.EPIPE)
825 elif (e[0] not in (errno.ECONNRESET, errno.EPIPE)
824 and not first):
826 and not first):
825 raise
827 raise
826 self._reconnect('write', pheaders)
828 self._reconnect('write', pheaders)
827 amt = self.sock.send(out)
829 amt = self.sock.send(out)
828 logger.debug('sent %d', amt)
830 logger.debug('sent %d', amt)
829 first = False
831 first = False
830 if out is body:
832 if out is body:
831 body = out[amt:]
833 body = out[amt:]
832 else:
834 else:
833 outgoing_headers = out[amt:]
835 outgoing_headers = out[amt:]
834 # End of request-sending loop.
836 # End of request-sending loop.
835
837
836 # close if the server response said to or responded before eating
838 # close if the server response said to or responded before eating
837 # the whole request
839 # the whole request
838 if response is None:
840 if response is None:
839 response = self.response_class(self.sock, self.timeout, method)
841 response = self.response_class(self.sock, self.timeout, method)
840 if not fresh_socket:
842 if not fresh_socket:
841 if not response._select():
843 if not response._select():
842 # This means the response failed to get any response
844 # This means the response failed to get any response
843 # data at all, and in all probability the socket was
845 # data at all, and in all probability the socket was
844 # closed before the server even saw our request. Try
846 # closed before the server even saw our request. Try
845 # the request again on a fresh socket.
847 # the request again on a fresh socket.
846 logger.debug('response._select() failed during request().'
848 logger.debug('response._select() failed during request().'
847 ' Assuming request needs to be retried.')
849 ' Assuming request needs to be retried.')
848 self.sock = None
850 self.sock = None
849 # Call this method explicitly to re-try the
851 # Call this method explicitly to re-try the
850 # request. We don't use self.request() because
852 # request. We don't use self.request() because
851 # some tools (notably Mercurial) expect to be able
853 # some tools (notably Mercurial) expect to be able
852 # to subclass and redefine request(), and they
854 # to subclass and redefine request(), and they
853 # don't have the same argspec as we do.
855 # don't have the same argspec as we do.
854 #
856 #
855 # TODO restructure sending of requests to avoid
857 # TODO restructure sending of requests to avoid
856 # this recursion
858 # this recursion
857 return HTTPConnection.request(
859 return HTTPConnection.request(
858 self, method, path, body=body, headers=headers,
860 self, method, path, body=body, headers=headers,
859 expect_continue=expect_continue)
861 expect_continue=expect_continue)
860 data_left = bool(outgoing_headers or body)
862 data_left = bool(outgoing_headers or body)
861 if data_left:
863 if data_left:
862 logger.info('stopped sending request early, '
864 logger.info('stopped sending request early, '
863 'will close the socket to be safe.')
865 'will close the socket to be safe.')
864 response.will_close = True
866 response.will_close = True
865 if response.will_close:
867 if response.will_close:
866 # The socket will be closed by the response, so we disown
868 # The socket will be closed by the response, so we disown
867 # the socket
869 # the socket
868 self.sock = None
870 self.sock = None
869 self._current_response = response
871 self._current_response = response
870
872
871 def getresponse(self):
873 def getresponse(self):
872 """Returns the response to the most recent request."""
874 """Returns the response to the most recent request."""
873 if self._current_response is None:
875 if self._current_response is None:
874 raise httplib.ResponseNotReady()
876 raise httplib.ResponseNotReady()
875 r = self._current_response
877 r = self._current_response
876 while r.headers is None:
878 while r.headers is None:
877 # We're a friend of the response class, so let us use the
879 # We're a friend of the response class, so let us use the
878 # private attribute.
880 # private attribute.
879 # pylint: disable=W0212
881 # pylint: disable=W0212
880 if not r._select() and not r.complete():
882 if not r._select() and not r.complete():
881 raise _readers.HTTPRemoteClosedError()
883 raise _readers.HTTPRemoteClosedError()
882 if r.will_close:
884 if r.will_close:
883 self.sock = None
885 self.sock = None
884 self._current_response = None
886 self._current_response = None
885 elif r.complete():
887 elif r.complete():
886 self._current_response = None
888 self._current_response = None
887 else:
889 else:
888 self._current_response_taken = True
890 self._current_response_taken = True
889 return r
891 return r
890
892
891
893
892 class HTTPTimeoutException(httplib.HTTPException):
894 class HTTPTimeoutException(httplib.HTTPException):
893 """A timeout occurred while waiting on the server."""
895 """A timeout occurred while waiting on the server."""
894
896
895
897
896 class BadRequestData(httplib.HTTPException):
898 class BadRequestData(httplib.HTTPException):
897 """Request body object has neither __len__ nor read."""
899 """Request body object has neither __len__ nor read."""
898
900
899
901
900 class HTTPProxyConnectFailedException(httplib.HTTPException):
902 class HTTPProxyConnectFailedException(httplib.HTTPException):
901 """Connecting to the HTTP proxy failed."""
903 """Connecting to the HTTP proxy failed."""
902
904
903
905
904 class HTTPStateError(httplib.HTTPException):
906 class HTTPStateError(httplib.HTTPException):
905 """Invalid internal state encountered."""
907 """Invalid internal state encountered."""
906
908
907 # Forward this exception type from _readers since it needs to be part
909 # Forward this exception type from _readers since it needs to be part
908 # of the public API.
910 # of the public API.
909 HTTPRemoteClosedError = _readers.HTTPRemoteClosedError
911 HTTPRemoteClosedError = _readers.HTTPRemoteClosedError
910 # no-check-code
912 # no-check-code
General Comments 0
You need to be logged in to leave comments. Login now