##// END OF EJS Templates
httpclient: don't use mutable default argument value...
Pierre-Yves David -
r31411:a53f2d4c default
parent child Browse files
Show More
@@ -1,910 +1,912
1 1 # Copyright 2010, Google Inc.
2 2 # All rights reserved.
3 3 #
4 4 # Redistribution and use in source and binary forms, with or without
5 5 # modification, are permitted provided that the following conditions are
6 6 # met:
7 7 #
8 8 # * Redistributions of source code must retain the above copyright
9 9 # notice, this list of conditions and the following disclaimer.
10 10 # * Redistributions in binary form must reproduce the above
11 11 # copyright notice, this list of conditions and the following disclaimer
12 12 # in the documentation and/or other materials provided with the
13 13 # distribution.
14 14 # * Neither the name of Google Inc. nor the names of its
15 15 # contributors may be used to endorse or promote products derived from
16 16 # this software without specific prior written permission.
17 17
18 18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 29 """Improved HTTP/1.1 client library
30 30
31 31 This library contains an HTTPConnection which is similar to the one in
32 32 httplib, but has several additional features:
33 33
34 34 * supports keepalives natively
35 35 * uses select() to block for incoming data
36 36 * notices when the server responds early to a request
37 37 * implements ssl inline instead of in a different class
38 38 """
39 39 from __future__ import absolute_import
40 40
41 41 # Many functions in this file have too many arguments.
42 42 # pylint: disable=R0913
43 43 import email
44 44 import email.message
45 45 import errno
46 46 import inspect
47 47 import logging
48 48 import select
49 49 import socket
50 50 import ssl
51 51 import sys
52 52
53 53 try:
54 54 import cStringIO as io
55 55 io.StringIO
56 56 except ImportError:
57 57 import io
58 58
59 59 try:
60 60 import httplib
61 61 httplib.HTTPException
62 62 except ImportError:
63 63 import http.client as httplib
64 64
65 65 from . import (
66 66 _readers,
67 67 )
68 68
69 69 logger = logging.getLogger(__name__)
70 70
71 71 __all__ = ['HTTPConnection', 'HTTPResponse']
72 72
73 73 HTTP_VER_1_0 = b'HTTP/1.0'
74 74 HTTP_VER_1_1 = b'HTTP/1.1'
75 75
76 76 OUTGOING_BUFFER_SIZE = 1 << 15
77 77 INCOMING_BUFFER_SIZE = 1 << 20
78 78
79 79 HDR_ACCEPT_ENCODING = 'accept-encoding'
80 80 HDR_CONNECTION_CTRL = 'connection'
81 81 HDR_CONTENT_LENGTH = 'content-length'
82 82 HDR_XFER_ENCODING = 'transfer-encoding'
83 83
84 84 XFER_ENCODING_CHUNKED = 'chunked'
85 85
86 86 CONNECTION_CLOSE = 'close'
87 87
88 88 EOL = b'\r\n'
89 89 _END_HEADERS = EOL * 2
90 90
91 91 # Based on some searching around, 1 second seems like a reasonable
92 92 # default here.
93 93 TIMEOUT_ASSUME_CONTINUE = 1
94 94 TIMEOUT_DEFAULT = None
95 95
96 96 if sys.version_info > (3, 0):
97 97 _unicode = str
98 98 else:
99 99 _unicode = unicode
100 100
101 101 def _ensurebytes(data):
102 102 if not isinstance(data, (_unicode, bytes)):
103 103 data = str(data)
104 104 if not isinstance(data, bytes):
105 105 try:
106 106 return data.encode('latin-1')
107 107 except UnicodeEncodeError as err:
108 108 raise UnicodeEncodeError(
109 109 err.encoding,
110 110 err.object,
111 111 err.start,
112 112 err.end,
113 113 '%r is not valid Latin-1 Use .encode("utf-8") '
114 114 'if sending as utf-8 is desired.' % (
115 115 data[err.start:err.end],))
116 116 return data
117 117
118 118 class _CompatMessage(email.message.Message):
119 119 """Workaround for rfc822.Message and email.message.Message API diffs."""
120 120
121 121 @classmethod
122 122 def from_string(cls, s):
123 123 if sys.version_info > (3, 0):
124 124 # Python 3 can't decode headers from bytes, so we have to
125 125 # trust RFC 2616 and decode the headers as iso-8859-1
126 126 # bytes.
127 127 s = s.decode('iso-8859-1')
128 128 headers = email.message_from_string(s, _class=_CompatMessage)
129 129 # Fix multi-line headers to match httplib's behavior from
130 130 # Python 2.x, since email.message.Message handles them in
131 131 # slightly different ways.
132 132 if sys.version_info < (3, 0):
133 133 new = []
134 134 for h, v in headers._headers:
135 135 if '\r\n' in v:
136 136 v = '\n'.join([' ' + x.lstrip() for x in v.split('\r\n')])[1:]
137 137 new.append((h, v))
138 138 headers._headers = new
139 139 return headers
140 140
141 141 def getheaders(self, key):
142 142 return self.get_all(key)
143 143
144 144 def getheader(self, key, default=None):
145 145 return self.get(key, failobj=default)
146 146
147 147
148 148 class HTTPResponse(object):
149 149 """Response from an HTTP server.
150 150
151 151 The response will continue to load as available. If you need the
152 152 complete response before continuing, check the .complete() method.
153 153 """
154 154 def __init__(self, sock, timeout, method):
155 155 self.sock = sock
156 156 self.method = method
157 157 self.raw_response = b''
158 158 self._headers_len = 0
159 159 self.headers = None
160 160 self.will_close = False
161 161 self.status_line = b''
162 162 self.status = None
163 163 self.continued = False
164 164 self.http_version = None
165 165 self.reason = None
166 166 self._reader = None
167 167
168 168 self._read_location = 0
169 169 self._eol = EOL
170 170
171 171 self._timeout = timeout
172 172
173 173 @property
174 174 def _end_headers(self):
175 175 return self._eol * 2
176 176
177 177 def complete(self):
178 178 """Returns true if this response is completely loaded.
179 179
180 180 Note that if this is a connection where complete means the
181 181 socket is closed, this will nearly always return False, even
182 182 in cases where all the data has actually been loaded.
183 183 """
184 184 if self._reader:
185 185 return self._reader.done()
186 186
187 187 def _close(self):
188 188 if self._reader is not None:
189 189 # We're a friend of the reader class here.
190 190 # pylint: disable=W0212
191 191 self._reader._close()
192 192
193 193 def getheader(self, header, default=None):
194 194 return self.headers.getheader(header, default=default)
195 195
196 196 def getheaders(self):
197 197 if sys.version_info < (3, 0):
198 198 return [(k.lower(), v) for k, v in self.headers.items()]
199 199 # Starting in Python 3, headers aren't lowercased before being
200 200 # returned here.
201 201 return self.headers.items()
202 202
203 203 def readline(self):
204 204 """Read a single line from the response body.
205 205
206 206 This may block until either a line ending is found or the
207 207 response is complete.
208 208 """
209 209 blocks = []
210 210 while True:
211 211 self._reader.readto(b'\n', blocks)
212 212
213 213 if blocks and blocks[-1][-1:] == b'\n' or self.complete():
214 214 break
215 215
216 216 self._select()
217 217
218 218 return b''.join(blocks)
219 219
220 220 def read(self, length=None):
221 221 """Read data from the response body."""
222 222 # if length is None, unbounded read
223 223 while (not self.complete() # never select on a finished read
224 224 and (not length # unbounded, so we wait for complete()
225 225 or length > self._reader.available_data)):
226 226 self._select()
227 227 if not length:
228 228 length = self._reader.available_data
229 229 r = self._reader.read(length)
230 230 if self.complete() and self.will_close:
231 231 self.sock.close()
232 232 return r
233 233
234 234 def _select(self):
235 235 r, unused_write, unused_err = select.select(
236 236 [self.sock], [], [], self._timeout)
237 237 if not r:
238 238 # socket was not readable. If the response is not
239 239 # complete, raise a timeout.
240 240 if not self.complete():
241 241 logger.info('timed out with timeout of %s', self._timeout)
242 242 raise HTTPTimeoutException('timeout reading data')
243 243 try:
244 244 data = self.sock.recv(INCOMING_BUFFER_SIZE)
245 245 except ssl.SSLError as e:
246 246 if e.args[0] != ssl.SSL_ERROR_WANT_READ:
247 247 raise
248 248 logger.debug('SSL_ERROR_WANT_READ in _select, should retry later')
249 249 return True
250 250 logger.debug('response read %d data during _select', len(data))
251 251 # If the socket was readable and no data was read, that means
252 252 # the socket was closed. Inform the reader (if any) so it can
253 253 # raise an exception if this is an invalid situation.
254 254 if not data:
255 255 if self._reader:
256 256 # We're a friend of the reader class here.
257 257 # pylint: disable=W0212
258 258 self._reader._close()
259 259 return False
260 260 else:
261 261 self._load_response(data)
262 262 return True
263 263
264 264 # This method gets replaced by _load later, which confuses pylint.
265 265 def _load_response(self, data): # pylint: disable=E0202
266 266 # Being here implies we're not at the end of the headers yet,
267 267 # since at the end of this method if headers were completely
268 268 # loaded we replace this method with the load() method of the
269 269 # reader we created.
270 270 self.raw_response += data
271 271 # This is a bogus server with bad line endings
272 272 if self._eol not in self.raw_response:
273 273 for bad_eol in (b'\n', b'\r'):
274 274 if (bad_eol in self.raw_response
275 275 # verify that bad_eol is not the end of the incoming data
276 276 # as this could be a response line that just got
277 277 # split between \r and \n.
278 278 and (self.raw_response.index(bad_eol) <
279 279 (len(self.raw_response) - 1))):
280 280 logger.info('bogus line endings detected, '
281 281 'using %r for EOL', bad_eol)
282 282 self._eol = bad_eol
283 283 break
284 284 # exit early if not at end of headers
285 285 if self._end_headers not in self.raw_response or self.headers:
286 286 return
287 287
288 288 # handle 100-continue response
289 289 hdrs, body = self.raw_response.split(self._end_headers, 1)
290 290 unused_http_ver, status = hdrs.split(b' ', 1)
291 291 if status.startswith(b'100'):
292 292 self.raw_response = body
293 293 self.continued = True
294 294 logger.debug('continue seen, setting body to %r', body)
295 295 return
296 296
297 297 # arriving here means we should parse response headers
298 298 # as all headers have arrived completely
299 299 hdrs, body = self.raw_response.split(self._end_headers, 1)
300 300 del self.raw_response
301 301 if self._eol in hdrs:
302 302 self.status_line, hdrs = hdrs.split(self._eol, 1)
303 303 else:
304 304 self.status_line = hdrs
305 305 hdrs = b''
306 306 # TODO HTTP < 1.0 support
307 307 (self.http_version, self.status,
308 308 self.reason) = self.status_line.split(b' ', 2)
309 309 self.status = int(self.status)
310 310 if self._eol != EOL:
311 311 hdrs = hdrs.replace(self._eol, b'\r\n')
312 312 headers = _CompatMessage.from_string(hdrs)
313 313 content_len = None
314 314 if HDR_CONTENT_LENGTH in headers:
315 315 content_len = int(headers[HDR_CONTENT_LENGTH])
316 316 if self.http_version == HTTP_VER_1_0:
317 317 self.will_close = True
318 318 elif HDR_CONNECTION_CTRL in headers:
319 319 self.will_close = (
320 320 headers[HDR_CONNECTION_CTRL].lower() == CONNECTION_CLOSE)
321 321 if (HDR_XFER_ENCODING in headers
322 322 and headers[HDR_XFER_ENCODING].lower() == XFER_ENCODING_CHUNKED):
323 323 self._reader = _readers.ChunkedReader(self._eol)
324 324 logger.debug('using a chunked reader')
325 325 else:
326 326 # HEAD responses are forbidden from returning a body, and
327 327 # it's implausible for a CONNECT response to use
328 328 # close-is-end logic for an OK response.
329 329 if (self.method == b'HEAD' or
330 330 (self.method == b'CONNECT' and content_len is None)):
331 331 content_len = 0
332 332 if content_len is not None:
333 333 logger.debug('using a content-length reader with length %d',
334 334 content_len)
335 335 self._reader = _readers.ContentLengthReader(content_len)
336 336 else:
337 337 # Response body had no length specified and is not
338 338 # chunked, so the end of the body will only be
339 339 # identifiable by the termination of the socket by the
340 340 # server. My interpretation of the spec means that we
341 341 # are correct in hitting this case if
342 342 # transfer-encoding, content-length, and
343 343 # connection-control were left unspecified.
344 344 self._reader = _readers.CloseIsEndReader()
345 345 logger.debug('using a close-is-end reader')
346 346 self.will_close = True
347 347
348 348 if body:
349 349 # We're a friend of the reader class here.
350 350 # pylint: disable=W0212
351 351 self._reader._load(body)
352 352 logger.debug('headers complete')
353 353 self.headers = headers
354 354 # We're a friend of the reader class here.
355 355 # pylint: disable=W0212
356 356 self._load_response = self._reader._load
357 357
358 358 def _foldheaders(headers):
359 359 """Given some headers, rework them so we can safely overwrite values.
360 360
361 361 >>> _foldheaders({'Accept-Encoding': 'wat'})
362 362 {'accept-encoding': ('Accept-Encoding', 'wat')}
363 363 """
364 364 return dict((k.lower(), (k, v)) for k, v in headers.items())
365 365
366 366 try:
367 367 inspect.signature
368 368 def _handlesarg(func, arg):
369 369 """ Try to determine if func accepts arg
370 370
371 371 If it takes arg, return True
372 372 If it happens to take **args, then it could do anything:
373 373 * It could throw a different TypeError, just for fun
374 374 * It could throw an ArgumentError or anything else
375 375 * It could choose not to throw an Exception at all
376 376 ... return 'unknown'
377 377
378 378 Otherwise, return False
379 379 """
380 380 params = inspect.signature(func).parameters
381 381 if arg in params:
382 382 return True
383 383 for p in params:
384 384 if params[p].kind == inspect._ParameterKind.VAR_KEYWORD:
385 385 return 'unknown'
386 386 return False
387 387 except AttributeError:
388 388 def _handlesarg(func, arg):
389 389 """ Try to determine if func accepts arg
390 390
391 391 If it takes arg, return True
392 392 If it happens to take **args, then it could do anything:
393 393 * It could throw a different TypeError, just for fun
394 394 * It could throw an ArgumentError or anything else
395 395 * It could choose not to throw an Exception at all
396 396 ... return 'unknown'
397 397
398 398 Otherwise, return False
399 399 """
400 400 spec = inspect.getargspec(func)
401 401 if arg in spec.args:
402 402 return True
403 403 if spec.keywords:
404 404 return 'unknown'
405 405 return False
406 406
407 407 class HTTPConnection(object):
408 408 """Connection to a single http server.
409 409
410 410 Supports 100-continue and keepalives natively. Uses select() for
411 411 non-blocking socket operations.
412 412 """
413 413 http_version = HTTP_VER_1_1
414 414 response_class = HTTPResponse
415 415
416 416 def __init__(self, host, port=None, use_ssl=None, ssl_validator=None,
417 417 timeout=TIMEOUT_DEFAULT,
418 418 continue_timeout=TIMEOUT_ASSUME_CONTINUE,
419 419 proxy_hostport=None, proxy_headers=None,
420 420 ssl_wrap_socket=None, **ssl_opts):
421 421 """Create a new HTTPConnection.
422 422
423 423 Args:
424 424 host: The host to which we'll connect.
425 425 port: Optional. The port over which we'll connect. Default 80 for
426 426 non-ssl, 443 for ssl.
427 427 use_ssl: Optional. Whether to use ssl. Defaults to False if port is
428 428 not 443, true if port is 443.
429 429 ssl_validator: a function(socket) to validate the ssl cert
430 430 timeout: Optional. Connection timeout, default is TIMEOUT_DEFAULT.
431 431 continue_timeout: Optional. Timeout for waiting on an expected
432 432 "100 Continue" response. Default is TIMEOUT_ASSUME_CONTINUE.
433 433 proxy_hostport: Optional. Tuple of (host, port) to use as an http
434 434 proxy for the connection. Default is to not use a proxy.
435 435 proxy_headers: Optional dict of header keys and values to send to
436 436 a proxy when using CONNECT. For compatibility with
437 437 httplib, the Proxy-Authorization header may be
438 438 specified in headers for request(), which will clobber
439 439 any such header specified here if specified. Providing
440 440 this option and not proxy_hostport will raise an
441 441 ValueError.
442 442 ssl_wrap_socket: Optional function to use for wrapping
443 443 sockets. If unspecified, the one from the ssl module will
444 444 be used if available, or something that's compatible with
445 445 it if on a Python older than 2.6.
446 446
447 447 Any extra keyword arguments to this function will be provided
448 448 to the ssl_wrap_socket method. If no ssl
449 449 """
450 450 host = _ensurebytes(host)
451 451 if port is None and host.count(b':') == 1 or b']:' in host:
452 452 host, port = host.rsplit(b':', 1)
453 453 port = int(port)
454 454 if b'[' in host:
455 455 host = host[1:-1]
456 456 if ssl_wrap_socket is not None:
457 457 _wrap_socket = ssl_wrap_socket
458 458 else:
459 459 _wrap_socket = ssl.wrap_socket
460 460 call_wrap_socket = None
461 461 handlesubar = _handlesarg(_wrap_socket, 'server_hostname')
462 462 if handlesubar is True:
463 463 # supports server_hostname
464 464 call_wrap_socket = _wrap_socket
465 465 handlesnobar = _handlesarg(_wrap_socket, 'serverhostname')
466 466 if handlesnobar is True and handlesubar is not True:
467 467 # supports serverhostname
468 468 def call_wrap_socket(sock, server_hostname=None, **ssl_opts):
469 469 return _wrap_socket(sock, serverhostname=server_hostname,
470 470 **ssl_opts)
471 471 if handlesubar is False and handlesnobar is False:
472 472 # does not support either
473 473 def call_wrap_socket(sock, server_hostname=None, **ssl_opts):
474 474 return _wrap_socket(sock, **ssl_opts)
475 475 if call_wrap_socket is None:
476 476 # we assume it takes **args
477 477 def call_wrap_socket(sock, **ssl_opts):
478 478 if 'server_hostname' in ssl_opts:
479 479 ssl_opts['serverhostname'] = ssl_opts['server_hostname']
480 480 return _wrap_socket(sock, **ssl_opts)
481 481 self._ssl_wrap_socket = call_wrap_socket
482 482 if use_ssl is None and port is None:
483 483 use_ssl = False
484 484 port = 80
485 485 elif use_ssl is None:
486 486 use_ssl = (port == 443)
487 487 elif port is None:
488 488 port = (use_ssl and 443 or 80)
489 489 self.port = port
490 490 self.ssl = use_ssl
491 491 self.ssl_opts = ssl_opts
492 492 self._ssl_validator = ssl_validator
493 493 self.host = host
494 494 self.sock = None
495 495 self._current_response = None
496 496 self._current_response_taken = False
497 497 if proxy_hostport is None:
498 498 self._proxy_host = self._proxy_port = None
499 499 if proxy_headers:
500 500 raise ValueError(
501 501 'proxy_headers may not be specified unless '
502 502 'proxy_hostport is also specified.')
503 503 else:
504 504 self._proxy_headers = {}
505 505 else:
506 506 self._proxy_host, self._proxy_port = proxy_hostport
507 507 self._proxy_headers = _foldheaders(proxy_headers or {})
508 508
509 509 self.timeout = timeout
510 510 self.continue_timeout = continue_timeout
511 511
512 512 def _connect(self, proxy_headers):
513 513 """Connect to the host and port specified in __init__."""
514 514 if self.sock:
515 515 return
516 516 if self._proxy_host is not None:
517 517 logger.info('Connecting to http proxy %s:%s',
518 518 self._proxy_host, self._proxy_port)
519 519 sock = socket.create_connection((self._proxy_host,
520 520 self._proxy_port))
521 521 if self.ssl:
522 522 data = self._buildheaders(b'CONNECT', b'%s:%d' % (self.host,
523 523 self.port),
524 524 proxy_headers, HTTP_VER_1_0)
525 525 sock.send(data)
526 526 sock.setblocking(0)
527 527 r = self.response_class(sock, self.timeout, b'CONNECT')
528 528 timeout_exc = HTTPTimeoutException(
529 529 'Timed out waiting for CONNECT response from proxy')
530 530 while not r.complete():
531 531 try:
532 532 # We're a friend of the response class, so let
533 533 # us use the private attribute.
534 534 # pylint: disable=W0212
535 535 if not r._select():
536 536 if not r.complete():
537 537 raise timeout_exc
538 538 except HTTPTimeoutException:
539 539 # This raise/except pattern looks goofy, but
540 540 # _select can raise the timeout as well as the
541 541 # loop body. I wish it wasn't this convoluted,
542 542 # but I don't have a better solution
543 543 # immediately handy.
544 544 raise timeout_exc
545 545 if r.status != 200:
546 546 raise HTTPProxyConnectFailedException(
547 547 'Proxy connection failed: %d %s' % (r.status,
548 548 r.read()))
549 549 logger.info('CONNECT (for SSL) to %s:%s via proxy succeeded.',
550 550 self.host, self.port)
551 551 else:
552 552 sock = socket.create_connection((self.host, self.port))
553 553 if self.ssl:
554 554 # This is the default, but in the case of proxied SSL
555 555 # requests the proxy logic above will have cleared
556 556 # blocking mode, so re-enable it just to be safe.
557 557 sock.setblocking(1)
558 558 logger.debug('wrapping socket for ssl with options %r',
559 559 self.ssl_opts)
560 560 sock = self._ssl_wrap_socket(sock, server_hostname=self.host,
561 561 **self.ssl_opts)
562 562 if self._ssl_validator:
563 563 self._ssl_validator(sock)
564 564 sock.setblocking(0)
565 565 self.sock = sock
566 566
567 567 def _buildheaders(self, method, path, headers, http_ver):
568 568 if self.ssl and self.port == 443 or self.port == 80:
569 569 # default port for protocol, so leave it out
570 570 hdrhost = self.host
571 571 else:
572 572 # include nonstandard port in header
573 573 if b':' in self.host: # must be IPv6
574 574 hdrhost = b'[%s]:%d' % (self.host, self.port)
575 575 else:
576 576 hdrhost = b'%s:%d' % (self.host, self.port)
577 577 if self._proxy_host and not self.ssl:
578 578 # When talking to a regular http proxy we must send the
579 579 # full URI, but in all other cases we must not (although
580 580 # technically RFC 2616 says servers must accept our
581 581 # request if we screw up, experimentally few do that
582 582 # correctly.)
583 583 assert path[0:1] == b'/', 'path must start with a /'
584 584 path = b'http://%s%s' % (hdrhost, path)
585 585 outgoing = [b'%s %s %s%s' % (method, path, http_ver, EOL)]
586 586 headers[b'host'] = (b'Host', hdrhost)
587 587 headers[HDR_ACCEPT_ENCODING] = (HDR_ACCEPT_ENCODING, 'identity')
588 588 for hdr, val in sorted((_ensurebytes(h), _ensurebytes(v))
589 589 for h, v in headers.values()):
590 590 outgoing.append(b'%s: %s%s' % (hdr, val, EOL))
591 591 outgoing.append(EOL)
592 592 return b''.join(outgoing)
593 593
594 594 def close(self):
595 595 """Close the connection to the server.
596 596
597 597 This is a no-op if the connection is already closed. The
598 598 connection may automatically close if requested by the server
599 599 or required by the nature of a response.
600 600 """
601 601 if self.sock is None:
602 602 return
603 603 self.sock.close()
604 604 self.sock = None
605 605 logger.info('closed connection to %s on %s', self.host, self.port)
606 606
607 607 def busy(self):
608 608 """Returns True if this connection object is currently in use.
609 609
610 610 If a response is still pending, this will return True, even if
611 611 the request has finished sending. In the future,
612 612 HTTPConnection may transparently juggle multiple connections
613 613 to the server, in which case this will be useful to detect if
614 614 any of those connections is ready for use.
615 615 """
616 616 cr = self._current_response
617 617 if cr is not None:
618 618 if self._current_response_taken:
619 619 if cr.will_close:
620 620 self.sock = None
621 621 self._current_response = None
622 622 return False
623 623 elif cr.complete():
624 624 self._current_response = None
625 625 return False
626 626 return True
627 627 return False
628 628
629 629 def _reconnect(self, where, pheaders):
630 630 logger.info('reconnecting during %s', where)
631 631 self.close()
632 632 self._connect(pheaders)
633 633
634 def request(self, method, path, body=None, headers={},
634 def request(self, method, path, body=None, headers=None,
635 635 expect_continue=False):
636 636 """Send a request to the server.
637 637
638 638 For increased flexibility, this does not return the response
639 639 object. Future versions of HTTPConnection that juggle multiple
640 640 sockets will be able to send (for example) 5 requests all at
641 641 once, and then let the requests arrive as data is
642 642 available. Use the `getresponse()` method to retrieve the
643 643 response.
644 644 """
645 if headers is None:
646 headers = {}
645 647 method = _ensurebytes(method)
646 648 path = _ensurebytes(path)
647 649 if self.busy():
648 650 raise httplib.CannotSendRequest(
649 651 'Can not send another request before '
650 652 'current response is read!')
651 653 self._current_response_taken = False
652 654
653 655 logger.info('sending %s request for %s to %s on port %s',
654 656 method, path, self.host, self.port)
655 657
656 658 hdrs = _foldheaders(headers)
657 659 # Figure out headers that have to be computed from the request
658 660 # body.
659 661 chunked = False
660 662 if body and HDR_CONTENT_LENGTH not in hdrs:
661 663 if getattr(body, '__len__', False):
662 664 hdrs[HDR_CONTENT_LENGTH] = (HDR_CONTENT_LENGTH,
663 665 b'%d' % len(body))
664 666 elif getattr(body, 'read', False):
665 667 hdrs[HDR_XFER_ENCODING] = (HDR_XFER_ENCODING,
666 668 XFER_ENCODING_CHUNKED)
667 669 chunked = True
668 670 else:
669 671 raise BadRequestData('body has no __len__() nor read()')
670 672 # Figure out expect-continue header
671 673 if hdrs.get('expect', ('', ''))[1].lower() == b'100-continue':
672 674 expect_continue = True
673 675 elif expect_continue:
674 676 hdrs['expect'] = (b'Expect', b'100-Continue')
675 677 # httplib compatibility: if the user specified a
676 678 # proxy-authorization header, that's actually intended for a
677 679 # proxy CONNECT action, not the real request, but only if
678 680 # we're going to use a proxy.
679 681 pheaders = dict(self._proxy_headers)
680 682 if self._proxy_host and self.ssl:
681 683 pa = hdrs.pop('proxy-authorization', None)
682 684 if pa is not None:
683 685 pheaders['proxy-authorization'] = pa
684 686 # Build header data
685 687 outgoing_headers = self._buildheaders(
686 688 method, path, hdrs, self.http_version)
687 689
688 690 # If we're reusing the underlying socket, there are some
689 691 # conditions where we'll want to retry, so make a note of the
690 692 # state of self.sock
691 693 fresh_socket = self.sock is None
692 694 self._connect(pheaders)
693 695 response = None
694 696 first = True
695 697
696 698 while ((outgoing_headers or body)
697 699 and not (response and response.complete())):
698 700 select_timeout = self.timeout
699 701 out = outgoing_headers or body
700 702 blocking_on_continue = False
701 703 if expect_continue and not outgoing_headers and not (
702 704 response and (response.headers or response.continued)):
703 705 logger.info(
704 706 'waiting up to %s seconds for'
705 707 ' continue response from server',
706 708 self.continue_timeout)
707 709 select_timeout = self.continue_timeout
708 710 blocking_on_continue = True
709 711 out = False
710 712 if out:
711 713 w = [self.sock]
712 714 else:
713 715 w = []
714 716 r, w, x = select.select([self.sock], w, [], select_timeout)
715 717 # if we were expecting a 100 continue and it's been long
716 718 # enough, just go ahead and assume it's ok. This is the
717 719 # recommended behavior from the RFC.
718 720 if r == w == x == []:
719 721 if blocking_on_continue:
720 722 expect_continue = False
721 723 logger.info('no response to continue expectation from '
722 724 'server, optimistically sending request body')
723 725 else:
724 726 raise HTTPTimeoutException('timeout sending data')
725 727 was_first = first
726 728
727 729 # incoming data
728 730 if r:
729 731 try:
730 732 try:
731 733 data = r[0].recv(INCOMING_BUFFER_SIZE)
732 734 except ssl.SSLError as e:
733 735 if e.args[0] != ssl.SSL_ERROR_WANT_READ:
734 736 raise
735 737 logger.debug('SSL_ERROR_WANT_READ while sending '
736 738 'data, retrying...')
737 739 continue
738 740 if not data:
739 741 logger.info('socket appears closed in read')
740 742 self.sock = None
741 743 self._current_response = None
742 744 if response is not None:
743 745 # We're a friend of the response class, so let
744 746 # us use the private attribute.
745 747 # pylint: disable=W0212
746 748 response._close()
747 749 # This if/elif ladder is a bit subtle,
748 750 # comments in each branch should help.
749 751 if response is not None and response.complete():
750 752 # Server responded completely and then
751 753 # closed the socket. We should just shut
752 754 # things down and let the caller get their
753 755 # response.
754 756 logger.info('Got an early response, '
755 757 'aborting remaining request.')
756 758 break
757 759 elif was_first and response is None:
758 760 # Most likely a keepalive that got killed
759 761 # on the server's end. Commonly happens
760 762 # after getting a really large response
761 763 # from the server.
762 764 logger.info(
763 765 'Connection appeared closed in read on first'
764 766 ' request loop iteration, will retry.')
765 767 self._reconnect('read', pheaders)
766 768 continue
767 769 else:
768 770 # We didn't just send the first data hunk,
769 771 # and either have a partial response or no
770 772 # response at all. There's really nothing
771 773 # meaningful we can do here.
772 774 raise HTTPStateError(
773 775 'Connection appears closed after '
774 776 'some request data was written, but the '
775 777 'response was missing or incomplete!')
776 778 logger.debug('read %d bytes in request()', len(data))
777 779 if response is None:
778 780 response = self.response_class(
779 781 r[0], self.timeout, method)
780 782 # We're a friend of the response class, so let us
781 783 # use the private attribute.
782 784 # pylint: disable=W0212
783 785 response._load_response(data)
784 786 # Jump to the next select() call so we load more
785 787 # data if the server is still sending us content.
786 788 continue
787 789 except socket.error as e:
788 790 if e[0] != errno.EPIPE and not was_first:
789 791 raise
790 792
791 793 # outgoing data
792 794 if w and out:
793 795 try:
794 796 if getattr(out, 'read', False):
795 797 # pylint guesses the type of out incorrectly here
796 798 # pylint: disable=E1103
797 799 data = out.read(OUTGOING_BUFFER_SIZE)
798 800 if not data:
799 801 continue
800 802 if len(data) < OUTGOING_BUFFER_SIZE:
801 803 if chunked:
802 804 body = b'0' + EOL + EOL
803 805 else:
804 806 body = None
805 807 if chunked:
806 808 # This encode is okay because we know
807 809 # hex() is building us only 0-9 and a-f
808 810 # digits.
809 811 asciilen = hex(len(data))[2:].encode('ascii')
810 812 out = asciilen + EOL + data + EOL
811 813 else:
812 814 out = data
813 815 amt = w[0].send(out)
814 816 except socket.error as e:
815 817 if e[0] == ssl.SSL_ERROR_WANT_WRITE and self.ssl:
816 818 # This means that SSL hasn't flushed its buffer into
817 819 # the socket yet.
818 820 # TODO: find a way to block on ssl flushing its buffer
819 821 # similar to selecting on a raw socket.
820 822 continue
821 823 if e[0] == errno.EWOULDBLOCK or e[0] == errno.EAGAIN:
822 824 continue
823 825 elif (e[0] not in (errno.ECONNRESET, errno.EPIPE)
824 826 and not first):
825 827 raise
826 828 self._reconnect('write', pheaders)
827 829 amt = self.sock.send(out)
828 830 logger.debug('sent %d', amt)
829 831 first = False
830 832 if out is body:
831 833 body = out[amt:]
832 834 else:
833 835 outgoing_headers = out[amt:]
834 836 # End of request-sending loop.
835 837
836 838 # close if the server response said to or responded before eating
837 839 # the whole request
838 840 if response is None:
839 841 response = self.response_class(self.sock, self.timeout, method)
840 842 if not fresh_socket:
841 843 if not response._select():
842 844 # This means the response failed to get any response
843 845 # data at all, and in all probability the socket was
844 846 # closed before the server even saw our request. Try
845 847 # the request again on a fresh socket.
846 848 logger.debug('response._select() failed during request().'
847 849 ' Assuming request needs to be retried.')
848 850 self.sock = None
849 851 # Call this method explicitly to re-try the
850 852 # request. We don't use self.request() because
851 853 # some tools (notably Mercurial) expect to be able
852 854 # to subclass and redefine request(), and they
853 855 # don't have the same argspec as we do.
854 856 #
855 857 # TODO restructure sending of requests to avoid
856 858 # this recursion
857 859 return HTTPConnection.request(
858 860 self, method, path, body=body, headers=headers,
859 861 expect_continue=expect_continue)
860 862 data_left = bool(outgoing_headers or body)
861 863 if data_left:
862 864 logger.info('stopped sending request early, '
863 865 'will close the socket to be safe.')
864 866 response.will_close = True
865 867 if response.will_close:
866 868 # The socket will be closed by the response, so we disown
867 869 # the socket
868 870 self.sock = None
869 871 self._current_response = response
870 872
871 873 def getresponse(self):
872 874 """Returns the response to the most recent request."""
873 875 if self._current_response is None:
874 876 raise httplib.ResponseNotReady()
875 877 r = self._current_response
876 878 while r.headers is None:
877 879 # We're a friend of the response class, so let us use the
878 880 # private attribute.
879 881 # pylint: disable=W0212
880 882 if not r._select() and not r.complete():
881 883 raise _readers.HTTPRemoteClosedError()
882 884 if r.will_close:
883 885 self.sock = None
884 886 self._current_response = None
885 887 elif r.complete():
886 888 self._current_response = None
887 889 else:
888 890 self._current_response_taken = True
889 891 return r
890 892
891 893
892 894 class HTTPTimeoutException(httplib.HTTPException):
893 895 """A timeout occurred while waiting on the server."""
894 896
895 897
896 898 class BadRequestData(httplib.HTTPException):
897 899 """Request body object has neither __len__ nor read."""
898 900
899 901
900 902 class HTTPProxyConnectFailedException(httplib.HTTPException):
901 903 """Connecting to the HTTP proxy failed."""
902 904
903 905
904 906 class HTTPStateError(httplib.HTTPException):
905 907 """Invalid internal state encountered."""
906 908
907 909 # Forward this exception type from _readers since it needs to be part
908 910 # of the public API.
909 911 HTTPRemoteClosedError = _readers.HTTPRemoteClosedError
910 912 # no-check-code
General Comments 0
You need to be logged in to leave comments. Login now