##// END OF EJS Templates
http2: sane readline...
Brendan Cully -
r19038:36733ab7 default
parent child Browse files
Show More
@@ -1,674 +1,666
1 1 # Copyright 2010, Google Inc.
2 2 # All rights reserved.
3 3 #
4 4 # Redistribution and use in source and binary forms, with or without
5 5 # modification, are permitted provided that the following conditions are
6 6 # met:
7 7 #
8 8 # * Redistributions of source code must retain the above copyright
9 9 # notice, this list of conditions and the following disclaimer.
10 10 # * Redistributions in binary form must reproduce the above
11 11 # copyright notice, this list of conditions and the following disclaimer
12 12 # in the documentation and/or other materials provided with the
13 13 # distribution.
14 14 # * Neither the name of Google Inc. nor the names of its
15 15 # contributors may be used to endorse or promote products derived from
16 16 # this software without specific prior written permission.
17 17
18 18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 29 """Improved HTTP/1.1 client library
30 30
31 31 This library contains an HTTPConnection which is similar to the one in
32 32 httplib, but has several additional features:
33 33
34 34 * supports keepalives natively
35 35 * uses select() to block for incoming data
36 36 * notices when the server responds early to a request
37 37 * implements ssl inline instead of in a different class
38 38 """
39 39
40 40 import cStringIO
41 41 import errno
42 42 import httplib
43 43 import logging
44 44 import rfc822
45 45 import select
46 46 import socket
47 47
48 48 import _readers
49 49 import socketutil
50 50
51 51 logger = logging.getLogger(__name__)
52 52
53 53 __all__ = ['HTTPConnection', 'HTTPResponse']
54 54
55 55 HTTP_VER_1_0 = 'HTTP/1.0'
56 56 HTTP_VER_1_1 = 'HTTP/1.1'
57 57
58 58 OUTGOING_BUFFER_SIZE = 1 << 15
59 59 INCOMING_BUFFER_SIZE = 1 << 20
60 60
61 61 HDR_ACCEPT_ENCODING = 'accept-encoding'
62 62 HDR_CONNECTION_CTRL = 'connection'
63 63 HDR_CONTENT_LENGTH = 'content-length'
64 64 HDR_XFER_ENCODING = 'transfer-encoding'
65 65
66 66 XFER_ENCODING_CHUNKED = 'chunked'
67 67
68 68 CONNECTION_CLOSE = 'close'
69 69
70 70 EOL = '\r\n'
71 71 _END_HEADERS = EOL * 2
72 72
73 73 # Based on some searching around, 1 second seems like a reasonable
74 74 # default here.
75 75 TIMEOUT_ASSUME_CONTINUE = 1
76 76 TIMEOUT_DEFAULT = None
77 77
78 78
79 79 class HTTPResponse(object):
80 80 """Response from an HTTP server.
81 81
82 82 The response will continue to load as available. If you need the
83 83 complete response before continuing, check the .complete() method.
84 84 """
85 85 def __init__(self, sock, timeout, method):
86 86 self.sock = sock
87 87 self.method = method
88 88 self.raw_response = ''
89 89 self._headers_len = 0
90 90 self.headers = None
91 91 self.will_close = False
92 92 self.status_line = ''
93 93 self.status = None
94 94 self.continued = False
95 95 self.http_version = None
96 96 self.reason = None
97 97 self._reader = None
98 98
99 99 self._read_location = 0
100 100 self._eol = EOL
101 101
102 102 self._timeout = timeout
103 103
104 104 @property
105 105 def _end_headers(self):
106 106 return self._eol * 2
107 107
108 108 def complete(self):
109 109 """Returns true if this response is completely loaded.
110 110
111 111 Note that if this is a connection where complete means the
112 112 socket is closed, this will nearly always return False, even
113 113 in cases where all the data has actually been loaded.
114 114 """
115 115 if self._reader:
116 116 return self._reader.done()
117 117
118 118 def _close(self):
119 119 if self._reader is not None:
120 120 self._reader._close()
121 121
122 122 def readline(self):
123 123 """Read a single line from the response body.
124 124
125 125 This may block until either a line ending is found or the
126 126 response is complete.
127 127 """
128 # TODO: move this into the reader interface where it can be
129 # smarter (and probably avoid copies)
130 bytes = []
131 while not bytes:
132 try:
133 bytes = [self._reader.read(1)]
134 except _readers.ReadNotReady:
128 blocks = []
129 while True:
130 self._reader.readto('\n', blocks)
131
132 if blocks and blocks[-1][-1] == '\n' or self.complete():
133 break
134
135 135 self._select()
136 while bytes[-1] != '\n' and not self.complete():
137 self._select()
138 bytes.append(self._reader.read(1))
139 if bytes[-1] != '\n':
140 next = self._reader.read(1)
141 while next and next != '\n':
142 bytes.append(next)
143 next = self._reader.read(1)
144 bytes.append(next)
145 return ''.join(bytes)
136
137 return ''.join(blocks)
146 138
147 139 def read(self, length=None):
148 140 # if length is None, unbounded read
149 141 while (not self.complete() # never select on a finished read
150 142 and (not length # unbounded, so we wait for complete()
151 143 or length > self._reader.available_data)):
152 144 self._select()
153 145 if not length:
154 146 length = self._reader.available_data
155 147 r = self._reader.read(length)
156 148 if self.complete() and self.will_close:
157 149 self.sock.close()
158 150 return r
159 151
160 152 def _select(self):
161 153 r, _, _ = select.select([self.sock], [], [], self._timeout)
162 154 if not r:
163 155 # socket was not readable. If the response is not
164 156 # complete, raise a timeout.
165 157 if not self.complete():
166 158 logger.info('timed out with timeout of %s', self._timeout)
167 159 raise HTTPTimeoutException('timeout reading data')
168 160 try:
169 161 data = self.sock.recv(INCOMING_BUFFER_SIZE)
170 162 except socket.sslerror, e:
171 163 if e.args[0] != socket.SSL_ERROR_WANT_READ:
172 164 raise
173 165 logger.debug('SSL_ERROR_WANT_READ in _select, should retry later')
174 166 return True
175 167 logger.debug('response read %d data during _select', len(data))
176 168 # If the socket was readable and no data was read, that means
177 169 # the socket was closed. Inform the reader (if any) so it can
178 170 # raise an exception if this is an invalid situation.
179 171 if not data:
180 172 if self._reader:
181 173 self._reader._close()
182 174 return False
183 175 else:
184 176 self._load_response(data)
185 177 return True
186 178
187 179 def _load_response(self, data):
188 180 # Being here implies we're not at the end of the headers yet,
189 181 # since at the end of this method if headers were completely
190 182 # loaded we replace this method with the load() method of the
191 183 # reader we created.
192 184 self.raw_response += data
193 185 # This is a bogus server with bad line endings
194 186 if self._eol not in self.raw_response:
195 187 for bad_eol in ('\n', '\r'):
196 188 if (bad_eol in self.raw_response
197 189 # verify that bad_eol is not the end of the incoming data
198 190 # as this could be a response line that just got
199 191 # split between \r and \n.
200 192 and (self.raw_response.index(bad_eol) <
201 193 (len(self.raw_response) - 1))):
202 194 logger.info('bogus line endings detected, '
203 195 'using %r for EOL', bad_eol)
204 196 self._eol = bad_eol
205 197 break
206 198 # exit early if not at end of headers
207 199 if self._end_headers not in self.raw_response or self.headers:
208 200 return
209 201
210 202 # handle 100-continue response
211 203 hdrs, body = self.raw_response.split(self._end_headers, 1)
212 204 http_ver, status = hdrs.split(' ', 1)
213 205 if status.startswith('100'):
214 206 self.raw_response = body
215 207 self.continued = True
216 208 logger.debug('continue seen, setting body to %r', body)
217 209 return
218 210
219 211 # arriving here means we should parse response headers
220 212 # as all headers have arrived completely
221 213 hdrs, body = self.raw_response.split(self._end_headers, 1)
222 214 del self.raw_response
223 215 if self._eol in hdrs:
224 216 self.status_line, hdrs = hdrs.split(self._eol, 1)
225 217 else:
226 218 self.status_line = hdrs
227 219 hdrs = ''
228 220 # TODO HTTP < 1.0 support
229 221 (self.http_version, self.status,
230 222 self.reason) = self.status_line.split(' ', 2)
231 223 self.status = int(self.status)
232 224 if self._eol != EOL:
233 225 hdrs = hdrs.replace(self._eol, '\r\n')
234 226 headers = rfc822.Message(cStringIO.StringIO(hdrs))
235 227 content_len = None
236 228 if HDR_CONTENT_LENGTH in headers:
237 229 content_len = int(headers[HDR_CONTENT_LENGTH])
238 230 if self.http_version == HTTP_VER_1_0:
239 231 self.will_close = True
240 232 elif HDR_CONNECTION_CTRL in headers:
241 233 self.will_close = (
242 234 headers[HDR_CONNECTION_CTRL].lower() == CONNECTION_CLOSE)
243 235 if (HDR_XFER_ENCODING in headers
244 236 and headers[HDR_XFER_ENCODING].lower() == XFER_ENCODING_CHUNKED):
245 237 self._reader = _readers.ChunkedReader(self._eol)
246 238 logger.debug('using a chunked reader')
247 239 else:
248 240 # HEAD responses are forbidden from returning a body, and
249 241 # it's implausible for a CONNECT response to use
250 242 # close-is-end logic for an OK response.
251 243 if (self.method == 'HEAD' or
252 244 (self.method == 'CONNECT' and content_len is None)):
253 245 content_len = 0
254 246 if content_len is not None:
255 247 logger.debug('using a content-length reader with length %d',
256 248 content_len)
257 249 self._reader = _readers.ContentLengthReader(content_len)
258 250 else:
259 251 # Response body had no length specified and is not
260 252 # chunked, so the end of the body will only be
261 253 # identifiable by the termination of the socket by the
262 254 # server. My interpretation of the spec means that we
263 255 # are correct in hitting this case if
264 256 # transfer-encoding, content-length, and
265 257 # connection-control were left unspecified.
266 258 self._reader = _readers.CloseIsEndReader()
267 259 logger.debug('using a close-is-end reader')
268 260 self.will_close = True
269 261
270 262 if body:
271 263 self._reader._load(body)
272 264 logger.debug('headers complete')
273 265 self.headers = headers
274 266 self._load_response = self._reader._load
275 267
276 268
277 269 class HTTPConnection(object):
278 270 """Connection to a single http server.
279 271
280 272 Supports 100-continue and keepalives natively. Uses select() for
281 273 non-blocking socket operations.
282 274 """
283 275 http_version = HTTP_VER_1_1
284 276 response_class = HTTPResponse
285 277
286 278 def __init__(self, host, port=None, use_ssl=None, ssl_validator=None,
287 279 timeout=TIMEOUT_DEFAULT,
288 280 continue_timeout=TIMEOUT_ASSUME_CONTINUE,
289 281 proxy_hostport=None, **ssl_opts):
290 282 """Create a new HTTPConnection.
291 283
292 284 Args:
293 285 host: The host to which we'll connect.
294 286 port: Optional. The port over which we'll connect. Default 80 for
295 287 non-ssl, 443 for ssl.
296 288 use_ssl: Optional. Whether to use ssl. Defaults to False if port is
297 289 not 443, true if port is 443.
298 290 ssl_validator: a function(socket) to validate the ssl cert
299 291 timeout: Optional. Connection timeout, default is TIMEOUT_DEFAULT.
300 292 continue_timeout: Optional. Timeout for waiting on an expected
301 293 "100 Continue" response. Default is TIMEOUT_ASSUME_CONTINUE.
302 294 proxy_hostport: Optional. Tuple of (host, port) to use as an http
303 295 proxy for the connection. Default is to not use a proxy.
304 296 """
305 297 if port is None and host.count(':') == 1 or ']:' in host:
306 298 host, port = host.rsplit(':', 1)
307 299 port = int(port)
308 300 if '[' in host:
309 301 host = host[1:-1]
310 302 if use_ssl is None and port is None:
311 303 use_ssl = False
312 304 port = 80
313 305 elif use_ssl is None:
314 306 use_ssl = (port == 443)
315 307 elif port is None:
316 308 port = (use_ssl and 443 or 80)
317 309 self.port = port
318 310 if use_ssl and not socketutil.have_ssl:
319 311 raise Exception('ssl requested but unavailable on this Python')
320 312 self.ssl = use_ssl
321 313 self.ssl_opts = ssl_opts
322 314 self._ssl_validator = ssl_validator
323 315 self.host = host
324 316 self.sock = None
325 317 self._current_response = None
326 318 self._current_response_taken = False
327 319 if proxy_hostport is None:
328 320 self._proxy_host = self._proxy_port = None
329 321 else:
330 322 self._proxy_host, self._proxy_port = proxy_hostport
331 323
332 324 self.timeout = timeout
333 325 self.continue_timeout = continue_timeout
334 326
335 327 def _connect(self):
336 328 """Connect to the host and port specified in __init__."""
337 329 if self.sock:
338 330 return
339 331 if self._proxy_host is not None:
340 332 logger.info('Connecting to http proxy %s:%s',
341 333 self._proxy_host, self._proxy_port)
342 334 sock = socketutil.create_connection((self._proxy_host,
343 335 self._proxy_port))
344 336 if self.ssl:
345 337 # TODO proxy header support
346 338 data = self.buildheaders('CONNECT', '%s:%d' % (self.host,
347 339 self.port),
348 340 {}, HTTP_VER_1_0)
349 341 sock.send(data)
350 342 sock.setblocking(0)
351 343 r = self.response_class(sock, self.timeout, 'CONNECT')
352 344 timeout_exc = HTTPTimeoutException(
353 345 'Timed out waiting for CONNECT response from proxy')
354 346 while not r.complete():
355 347 try:
356 348 if not r._select():
357 349 if not r.complete():
358 350 raise timeout_exc
359 351 except HTTPTimeoutException:
360 352 # This raise/except pattern looks goofy, but
361 353 # _select can raise the timeout as well as the
362 354 # loop body. I wish it wasn't this convoluted,
363 355 # but I don't have a better solution
364 356 # immediately handy.
365 357 raise timeout_exc
366 358 if r.status != 200:
367 359 raise HTTPProxyConnectFailedException(
368 360 'Proxy connection failed: %d %s' % (r.status,
369 361 r.read()))
370 362 logger.info('CONNECT (for SSL) to %s:%s via proxy succeeded.',
371 363 self.host, self.port)
372 364 else:
373 365 sock = socketutil.create_connection((self.host, self.port))
374 366 if self.ssl:
375 367 # This is the default, but in the case of proxied SSL
376 368 # requests the proxy logic above will have cleared
377 369 # blocking mode, so re-enable it just to be safe.
378 370 sock.setblocking(1)
379 371 logger.debug('wrapping socket for ssl with options %r',
380 372 self.ssl_opts)
381 373 sock = socketutil.wrap_socket(sock, **self.ssl_opts)
382 374 if self._ssl_validator:
383 375 self._ssl_validator(sock)
384 376 sock.setblocking(0)
385 377 self.sock = sock
386 378
387 379 def buildheaders(self, method, path, headers, http_ver):
388 380 if self.ssl and self.port == 443 or self.port == 80:
389 381 # default port for protocol, so leave it out
390 382 hdrhost = self.host
391 383 else:
392 384 # include nonstandard port in header
393 385 if ':' in self.host: # must be IPv6
394 386 hdrhost = '[%s]:%d' % (self.host, self.port)
395 387 else:
396 388 hdrhost = '%s:%d' % (self.host, self.port)
397 389 if self._proxy_host and not self.ssl:
398 390 # When talking to a regular http proxy we must send the
399 391 # full URI, but in all other cases we must not (although
400 392 # technically RFC 2616 says servers must accept our
401 393 # request if we screw up, experimentally few do that
402 394 # correctly.)
403 395 assert path[0] == '/', 'path must start with a /'
404 396 path = 'http://%s%s' % (hdrhost, path)
405 397 outgoing = ['%s %s %s%s' % (method, path, http_ver, EOL)]
406 398 headers['host'] = ('Host', hdrhost)
407 399 headers[HDR_ACCEPT_ENCODING] = (HDR_ACCEPT_ENCODING, 'identity')
408 400 for hdr, val in headers.itervalues():
409 401 outgoing.append('%s: %s%s' % (hdr, val, EOL))
410 402 outgoing.append(EOL)
411 403 return ''.join(outgoing)
412 404
413 405 def close(self):
414 406 """Close the connection to the server.
415 407
416 408 This is a no-op if the connection is already closed. The
417 409 connection may automatically close if requested by the server
418 410 or required by the nature of a response.
419 411 """
420 412 if self.sock is None:
421 413 return
422 414 self.sock.close()
423 415 self.sock = None
424 416 logger.info('closed connection to %s on %s', self.host, self.port)
425 417
426 418 def busy(self):
427 419 """Returns True if this connection object is currently in use.
428 420
429 421 If a response is still pending, this will return True, even if
430 422 the request has finished sending. In the future,
431 423 HTTPConnection may transparently juggle multiple connections
432 424 to the server, in which case this will be useful to detect if
433 425 any of those connections is ready for use.
434 426 """
435 427 cr = self._current_response
436 428 if cr is not None:
437 429 if self._current_response_taken:
438 430 if cr.will_close:
439 431 self.sock = None
440 432 self._current_response = None
441 433 return False
442 434 elif cr.complete():
443 435 self._current_response = None
444 436 return False
445 437 return True
446 438 return False
447 439
448 440 def request(self, method, path, body=None, headers={},
449 441 expect_continue=False):
450 442 """Send a request to the server.
451 443
452 444 For increased flexibility, this does not return the response
453 445 object. Future versions of HTTPConnection that juggle multiple
454 446 sockets will be able to send (for example) 5 requests all at
455 447 once, and then let the requests arrive as data is
456 448 available. Use the `getresponse()` method to retrieve the
457 449 response.
458 450 """
459 451 if self.busy():
460 452 raise httplib.CannotSendRequest(
461 453 'Can not send another request before '
462 454 'current response is read!')
463 455 self._current_response_taken = False
464 456
465 457 logger.info('sending %s request for %s to %s on port %s',
466 458 method, path, self.host, self.port)
467 459 hdrs = dict((k.lower(), (k, v)) for k, v in headers.iteritems())
468 460 if hdrs.get('expect', ('', ''))[1].lower() == '100-continue':
469 461 expect_continue = True
470 462 elif expect_continue:
471 463 hdrs['expect'] = ('Expect', '100-Continue')
472 464
473 465 chunked = False
474 466 if body and HDR_CONTENT_LENGTH not in hdrs:
475 467 if getattr(body, '__len__', False):
476 468 hdrs[HDR_CONTENT_LENGTH] = (HDR_CONTENT_LENGTH, len(body))
477 469 elif getattr(body, 'read', False):
478 470 hdrs[HDR_XFER_ENCODING] = (HDR_XFER_ENCODING,
479 471 XFER_ENCODING_CHUNKED)
480 472 chunked = True
481 473 else:
482 474 raise BadRequestData('body has no __len__() nor read()')
483 475
484 476 self._connect()
485 477 outgoing_headers = self.buildheaders(
486 478 method, path, hdrs, self.http_version)
487 479 response = None
488 480 first = True
489 481
490 482 def reconnect(where):
491 483 logger.info('reconnecting during %s', where)
492 484 self.close()
493 485 self._connect()
494 486
495 487 while ((outgoing_headers or body)
496 488 and not (response and response.complete())):
497 489 select_timeout = self.timeout
498 490 out = outgoing_headers or body
499 491 blocking_on_continue = False
500 492 if expect_continue and not outgoing_headers and not (
501 493 response and (response.headers or response.continued)):
502 494 logger.info(
503 495 'waiting up to %s seconds for'
504 496 ' continue response from server',
505 497 self.continue_timeout)
506 498 select_timeout = self.continue_timeout
507 499 blocking_on_continue = True
508 500 out = False
509 501 if out:
510 502 w = [self.sock]
511 503 else:
512 504 w = []
513 505 r, w, x = select.select([self.sock], w, [], select_timeout)
514 506 # if we were expecting a 100 continue and it's been long
515 507 # enough, just go ahead and assume it's ok. This is the
516 508 # recommended behavior from the RFC.
517 509 if r == w == x == []:
518 510 if blocking_on_continue:
519 511 expect_continue = False
520 512 logger.info('no response to continue expectation from '
521 513 'server, optimistically sending request body')
522 514 else:
523 515 raise HTTPTimeoutException('timeout sending data')
524 516 was_first = first
525 517
526 518 # incoming data
527 519 if r:
528 520 try:
529 521 try:
530 522 data = r[0].recv(INCOMING_BUFFER_SIZE)
531 523 except socket.sslerror, e:
532 524 if e.args[0] != socket.SSL_ERROR_WANT_READ:
533 525 raise
534 526 logger.debug(
535 527 'SSL_ERROR_WANT_READ while sending data, retrying...')
536 528 continue
537 529 if not data:
538 530 logger.info('socket appears closed in read')
539 531 self.sock = None
540 532 self._current_response = None
541 533 if response is not None:
542 534 response._close()
543 535 # This if/elif ladder is a bit subtle,
544 536 # comments in each branch should help.
545 537 if response is not None and response.complete():
546 538 # Server responded completely and then
547 539 # closed the socket. We should just shut
548 540 # things down and let the caller get their
549 541 # response.
550 542 logger.info('Got an early response, '
551 543 'aborting remaining request.')
552 544 break
553 545 elif was_first and response is None:
554 546 # Most likely a keepalive that got killed
555 547 # on the server's end. Commonly happens
556 548 # after getting a really large response
557 549 # from the server.
558 550 logger.info(
559 551 'Connection appeared closed in read on first'
560 552 ' request loop iteration, will retry.')
561 553 reconnect('read')
562 554 continue
563 555 else:
564 556 # We didn't just send the first data hunk,
565 557 # and either have a partial response or no
566 558 # response at all. There's really nothing
567 559 # meaningful we can do here.
568 560 raise HTTPStateError(
569 561 'Connection appears closed after '
570 562 'some request data was written, but the '
571 563 'response was missing or incomplete!')
572 564 logger.debug('read %d bytes in request()', len(data))
573 565 if response is None:
574 566 response = self.response_class(r[0], self.timeout, method)
575 567 response._load_response(data)
576 568 # Jump to the next select() call so we load more
577 569 # data if the server is still sending us content.
578 570 continue
579 571 except socket.error, e:
580 572 if e[0] != errno.EPIPE and not was_first:
581 573 raise
582 574
583 575 # outgoing data
584 576 if w and out:
585 577 try:
586 578 if getattr(out, 'read', False):
587 579 data = out.read(OUTGOING_BUFFER_SIZE)
588 580 if not data:
589 581 continue
590 582 if len(data) < OUTGOING_BUFFER_SIZE:
591 583 if chunked:
592 584 body = '0' + EOL + EOL
593 585 else:
594 586 body = None
595 587 if chunked:
596 588 out = hex(len(data))[2:] + EOL + data + EOL
597 589 else:
598 590 out = data
599 591 amt = w[0].send(out)
600 592 except socket.error, e:
601 593 if e[0] == socket.SSL_ERROR_WANT_WRITE and self.ssl:
602 594 # This means that SSL hasn't flushed its buffer into
603 595 # the socket yet.
604 596 # TODO: find a way to block on ssl flushing its buffer
605 597 # similar to selecting on a raw socket.
606 598 continue
607 599 elif (e[0] not in (errno.ECONNRESET, errno.EPIPE)
608 600 and not first):
609 601 raise
610 602 reconnect('write')
611 603 amt = self.sock.send(out)
612 604 logger.debug('sent %d', amt)
613 605 first = False
614 606 # stash data we think we sent in case the socket breaks
615 607 # when we read from it
616 608 if was_first:
617 609 sent_data = out[:amt]
618 610 if out is body:
619 611 body = out[amt:]
620 612 else:
621 613 outgoing_headers = out[amt:]
622 614
623 615 # close if the server response said to or responded before eating
624 616 # the whole request
625 617 if response is None:
626 618 response = self.response_class(self.sock, self.timeout, method)
627 619 complete = response.complete()
628 620 data_left = bool(outgoing_headers or body)
629 621 if data_left:
630 622 logger.info('stopped sending request early, '
631 623 'will close the socket to be safe.')
632 624 response.will_close = True
633 625 if response.will_close:
634 626 # The socket will be closed by the response, so we disown
635 627 # the socket
636 628 self.sock = None
637 629 self._current_response = response
638 630
639 631 def getresponse(self):
640 632 if self._current_response is None:
641 633 raise httplib.ResponseNotReady()
642 634 r = self._current_response
643 635 while r.headers is None:
644 636 if not r._select() and not r.complete():
645 637 raise _readers.HTTPRemoteClosedError()
646 638 if r.will_close:
647 639 self.sock = None
648 640 self._current_response = None
649 641 elif r.complete():
650 642 self._current_response = None
651 643 else:
652 644 self._current_response_taken = True
653 645 return r
654 646
655 647
656 648 class HTTPTimeoutException(httplib.HTTPException):
657 649 """A timeout occurred while waiting on the server."""
658 650
659 651
660 652 class BadRequestData(httplib.HTTPException):
661 653 """Request body object has neither __len__ nor read."""
662 654
663 655
664 656 class HTTPProxyConnectFailedException(httplib.HTTPException):
665 657 """Connecting to the HTTP proxy failed."""
666 658
667 659
668 660 class HTTPStateError(httplib.HTTPException):
669 661 """Invalid internal state encountered."""
670 662
671 663 # Forward this exception type from _readers since it needs to be part
672 664 # of the public API.
673 665 HTTPRemoteClosedError = _readers.HTTPRemoteClosedError
674 666 # no-check-code
@@ -1,206 +1,229
1 1 # Copyright 2011, Google Inc.
2 2 # All rights reserved.
3 3 #
4 4 # Redistribution and use in source and binary forms, with or without
5 5 # modification, are permitted provided that the following conditions are
6 6 # met:
7 7 #
8 8 # * Redistributions of source code must retain the above copyright
9 9 # notice, this list of conditions and the following disclaimer.
10 10 # * Redistributions in binary form must reproduce the above
11 11 # copyright notice, this list of conditions and the following disclaimer
12 12 # in the documentation and/or other materials provided with the
13 13 # distribution.
14 14 # * Neither the name of Google Inc. nor the names of its
15 15 # contributors may be used to endorse or promote products derived from
16 16 # this software without specific prior written permission.
17 17
18 18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 29 """Reader objects to abstract out different body response types.
30 30
31 31 This module is package-private. It is not expected that these will
32 32 have any clients outside of httpplus.
33 33 """
34 34
35 35 import httplib
36 36 import itertools
37 37 import logging
38 38
39 39 logger = logging.getLogger(__name__)
40 40
41 41
42 42 class ReadNotReady(Exception):
43 43 """Raised when read() is attempted but not enough data is loaded."""
44 44
45 45
46 46 class HTTPRemoteClosedError(httplib.HTTPException):
47 47 """The server closed the remote socket in the middle of a response."""
48 48
49 49
50 50 class AbstractReader(object):
51 51 """Abstract base class for response readers.
52 52
53 53 Subclasses must implement _load, and should implement _close if
54 54 it's not an error for the server to close their socket without
55 55 some termination condition being detected during _load.
56 56 """
57 57 def __init__(self):
58 58 self._finished = False
59 59 self._done_chunks = []
60 60 self.available_data = 0
61 61
62 62 def addchunk(self, data):
63 63 self._done_chunks.append(data)
64 64 self.available_data += len(data)
65 65
66 66 def pushchunk(self, data):
67 67 self._done_chunks.insert(0, data)
68 68 self.available_data += len(data)
69 69
70 70 def popchunk(self):
71 71 b = self._done_chunks.pop(0)
72 72 self.available_data -= len(b)
73 73
74 74 return b
75 75
76 76 def done(self):
77 77 return self._finished
78 78
79 79 def read(self, amt):
80 80 if self.available_data < amt and not self._finished:
81 81 raise ReadNotReady()
82 82 blocks = []
83 83 need = amt
84 84 while self._done_chunks:
85 85 b = self.popchunk()
86 86 if len(b) > need:
87 87 nb = b[:need]
88 88 self.pushchunk(b[need:])
89 89 b = nb
90 90 blocks.append(b)
91 91 need -= len(b)
92 92 if need == 0:
93 93 break
94 94 result = ''.join(blocks)
95 95 assert len(result) == amt or (self._finished and len(result) < amt)
96 96
97 97 return result
98 98
99 def readto(self, delimstr, blocks = None):
100 """return available data chunks up to the first one in which delimstr
101 occurs. No data will be returned after delimstr -- the chunk in which
102 it occurs will be split and the remainder pushed back onto the available
103 data queue. If blocks is supplied chunks will be added to blocks, otherwise
104 a new list will be allocated.
105 """
106 if blocks is None:
107 blocks = []
108
109 while self._done_chunks:
110 b = self.popchunk()
111 i = b.find(delimstr) + len(delimstr)
112 if i:
113 if i < len(b):
114 self.pushchunk(b[i:])
115 blocks.append(b[:i])
116 break
117 else:
118 blocks.append(b)
119
120 return blocks
121
99 122 def _load(self, data): # pragma: no cover
100 123 """Subclasses must implement this.
101 124
102 125 As data is available to be read out of this object, it should
103 126 be placed into the _done_chunks list. Subclasses should not
104 127 rely on data remaining in _done_chunks forever, as it may be
105 128 reaped if the client is parsing data as it comes in.
106 129 """
107 130 raise NotImplementedError
108 131
109 132 def _close(self):
110 133 """Default implementation of close.
111 134
112 135 The default implementation assumes that the reader will mark
113 136 the response as finished on the _finished attribute once the
114 137 entire response body has been read. In the event that this is
115 138 not true, the subclass should override the implementation of
116 139 close (for example, close-is-end responses have to set
117 140 self._finished in the close handler.)
118 141 """
119 142 if not self._finished:
120 143 raise HTTPRemoteClosedError(
121 144 'server appears to have closed the socket mid-response')
122 145
123 146
124 147 class AbstractSimpleReader(AbstractReader):
125 148 """Abstract base class for simple readers that require no response decoding.
126 149
127 150 Examples of such responses are Connection: Close (close-is-end)
128 151 and responses that specify a content length.
129 152 """
130 153 def _load(self, data):
131 154 if data:
132 155 assert not self._finished, (
133 156 'tried to add data (%r) to a closed reader!' % data)
134 157 logger.debug('%s read an additional %d data', self.name, len(data))
135 158 self.addchunk(data)
136 159
137 160
138 161 class CloseIsEndReader(AbstractSimpleReader):
139 162 """Reader for responses that specify Connection: Close for length."""
140 163 name = 'close-is-end'
141 164
142 165 def _close(self):
143 166 logger.info('Marking close-is-end reader as closed.')
144 167 self._finished = True
145 168
146 169
147 170 class ContentLengthReader(AbstractSimpleReader):
148 171 """Reader for responses that specify an exact content length."""
149 172 name = 'content-length'
150 173
151 174 def __init__(self, amount):
152 175 AbstractReader.__init__(self)
153 176 self._amount = amount
154 177 if amount == 0:
155 178 self._finished = True
156 179 self._amount_seen = 0
157 180
158 181 def _load(self, data):
159 182 AbstractSimpleReader._load(self, data)
160 183 self._amount_seen += len(data)
161 184 if self._amount_seen >= self._amount:
162 185 self._finished = True
163 186 logger.debug('content-length read complete')
164 187
165 188
166 189 class ChunkedReader(AbstractReader):
167 190 """Reader for chunked transfer encoding responses."""
168 191 def __init__(self, eol):
169 192 AbstractReader.__init__(self)
170 193 self._eol = eol
171 194 self._leftover_skip_amt = 0
172 195 self._leftover_data = ''
173 196
174 197 def _load(self, data):
175 198 assert not self._finished, 'tried to add data to a closed reader!'
176 199 logger.debug('chunked read an additional %d data', len(data))
177 200 position = 0
178 201 if self._leftover_data:
179 202 logger.debug('chunked reader trying to finish block from leftover data')
180 203 # TODO: avoid this string concatenation if possible
181 204 data = self._leftover_data + data
182 205 position = self._leftover_skip_amt
183 206 self._leftover_data = ''
184 207 self._leftover_skip_amt = 0
185 208 datalen = len(data)
186 209 while position < datalen:
187 210 split = data.find(self._eol, position)
188 211 if split == -1:
189 212 self._leftover_data = data
190 213 self._leftover_skip_amt = position
191 214 return
192 215 amt = int(data[position:split], base=16)
193 216 block_start = split + len(self._eol)
194 217 # If the whole data chunk plus the eol trailer hasn't
195 218 # loaded, we'll wait for the next load.
196 219 if block_start + amt + len(self._eol) > len(data):
197 220 self._leftover_data = data
198 221 self._leftover_skip_amt = position
199 222 return
200 223 if amt == 0:
201 224 self._finished = True
202 225 logger.debug('closing chunked reader due to chunk of length 0')
203 226 return
204 227 self.addchunk(data[block_start:block_start + amt])
205 228 position = block_start + amt + len(self._eol)
206 229 # no-check-code
General Comments 0
You need to be logged in to leave comments. Login now