##// END OF EJS Templates
http2: sane readline...
Brendan Cully -
r19038:36733ab7 default
parent child Browse files
Show More
@@ -1,674 +1,666 b''
1 # Copyright 2010, Google Inc.
1 # Copyright 2010, Google Inc.
2 # All rights reserved.
2 # All rights reserved.
3 #
3 #
4 # Redistribution and use in source and binary forms, with or without
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are
5 # modification, are permitted provided that the following conditions are
6 # met:
6 # met:
7 #
7 #
8 # * Redistributions of source code must retain the above copyright
8 # * Redistributions of source code must retain the above copyright
9 # notice, this list of conditions and the following disclaimer.
9 # notice, this list of conditions and the following disclaimer.
10 # * Redistributions in binary form must reproduce the above
10 # * Redistributions in binary form must reproduce the above
11 # copyright notice, this list of conditions and the following disclaimer
11 # copyright notice, this list of conditions and the following disclaimer
12 # in the documentation and/or other materials provided with the
12 # in the documentation and/or other materials provided with the
13 # distribution.
13 # distribution.
14 # * Neither the name of Google Inc. nor the names of its
14 # * Neither the name of Google Inc. nor the names of its
15 # contributors may be used to endorse or promote products derived from
15 # contributors may be used to endorse or promote products derived from
16 # this software without specific prior written permission.
16 # this software without specific prior written permission.
17
17
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 """Improved HTTP/1.1 client library
29 """Improved HTTP/1.1 client library
30
30
31 This library contains an HTTPConnection which is similar to the one in
31 This library contains an HTTPConnection which is similar to the one in
32 httplib, but has several additional features:
32 httplib, but has several additional features:
33
33
34 * supports keepalives natively
34 * supports keepalives natively
35 * uses select() to block for incoming data
35 * uses select() to block for incoming data
36 * notices when the server responds early to a request
36 * notices when the server responds early to a request
37 * implements ssl inline instead of in a different class
37 * implements ssl inline instead of in a different class
38 """
38 """
39
39
40 import cStringIO
40 import cStringIO
41 import errno
41 import errno
42 import httplib
42 import httplib
43 import logging
43 import logging
44 import rfc822
44 import rfc822
45 import select
45 import select
46 import socket
46 import socket
47
47
48 import _readers
48 import _readers
49 import socketutil
49 import socketutil
50
50
51 logger = logging.getLogger(__name__)
51 logger = logging.getLogger(__name__)
52
52
53 __all__ = ['HTTPConnection', 'HTTPResponse']
53 __all__ = ['HTTPConnection', 'HTTPResponse']
54
54
55 HTTP_VER_1_0 = 'HTTP/1.0'
55 HTTP_VER_1_0 = 'HTTP/1.0'
56 HTTP_VER_1_1 = 'HTTP/1.1'
56 HTTP_VER_1_1 = 'HTTP/1.1'
57
57
58 OUTGOING_BUFFER_SIZE = 1 << 15
58 OUTGOING_BUFFER_SIZE = 1 << 15
59 INCOMING_BUFFER_SIZE = 1 << 20
59 INCOMING_BUFFER_SIZE = 1 << 20
60
60
61 HDR_ACCEPT_ENCODING = 'accept-encoding'
61 HDR_ACCEPT_ENCODING = 'accept-encoding'
62 HDR_CONNECTION_CTRL = 'connection'
62 HDR_CONNECTION_CTRL = 'connection'
63 HDR_CONTENT_LENGTH = 'content-length'
63 HDR_CONTENT_LENGTH = 'content-length'
64 HDR_XFER_ENCODING = 'transfer-encoding'
64 HDR_XFER_ENCODING = 'transfer-encoding'
65
65
66 XFER_ENCODING_CHUNKED = 'chunked'
66 XFER_ENCODING_CHUNKED = 'chunked'
67
67
68 CONNECTION_CLOSE = 'close'
68 CONNECTION_CLOSE = 'close'
69
69
70 EOL = '\r\n'
70 EOL = '\r\n'
71 _END_HEADERS = EOL * 2
71 _END_HEADERS = EOL * 2
72
72
73 # Based on some searching around, 1 second seems like a reasonable
73 # Based on some searching around, 1 second seems like a reasonable
74 # default here.
74 # default here.
75 TIMEOUT_ASSUME_CONTINUE = 1
75 TIMEOUT_ASSUME_CONTINUE = 1
76 TIMEOUT_DEFAULT = None
76 TIMEOUT_DEFAULT = None
77
77
78
78
79 class HTTPResponse(object):
79 class HTTPResponse(object):
80 """Response from an HTTP server.
80 """Response from an HTTP server.
81
81
82 The response will continue to load as available. If you need the
82 The response will continue to load as available. If you need the
83 complete response before continuing, check the .complete() method.
83 complete response before continuing, check the .complete() method.
84 """
84 """
85 def __init__(self, sock, timeout, method):
85 def __init__(self, sock, timeout, method):
86 self.sock = sock
86 self.sock = sock
87 self.method = method
87 self.method = method
88 self.raw_response = ''
88 self.raw_response = ''
89 self._headers_len = 0
89 self._headers_len = 0
90 self.headers = None
90 self.headers = None
91 self.will_close = False
91 self.will_close = False
92 self.status_line = ''
92 self.status_line = ''
93 self.status = None
93 self.status = None
94 self.continued = False
94 self.continued = False
95 self.http_version = None
95 self.http_version = None
96 self.reason = None
96 self.reason = None
97 self._reader = None
97 self._reader = None
98
98
99 self._read_location = 0
99 self._read_location = 0
100 self._eol = EOL
100 self._eol = EOL
101
101
102 self._timeout = timeout
102 self._timeout = timeout
103
103
104 @property
104 @property
105 def _end_headers(self):
105 def _end_headers(self):
106 return self._eol * 2
106 return self._eol * 2
107
107
108 def complete(self):
108 def complete(self):
109 """Returns true if this response is completely loaded.
109 """Returns true if this response is completely loaded.
110
110
111 Note that if this is a connection where complete means the
111 Note that if this is a connection where complete means the
112 socket is closed, this will nearly always return False, even
112 socket is closed, this will nearly always return False, even
113 in cases where all the data has actually been loaded.
113 in cases where all the data has actually been loaded.
114 """
114 """
115 if self._reader:
115 if self._reader:
116 return self._reader.done()
116 return self._reader.done()
117
117
118 def _close(self):
118 def _close(self):
119 if self._reader is not None:
119 if self._reader is not None:
120 self._reader._close()
120 self._reader._close()
121
121
122 def readline(self):
122 def readline(self):
123 """Read a single line from the response body.
123 """Read a single line from the response body.
124
124
125 This may block until either a line ending is found or the
125 This may block until either a line ending is found or the
126 response is complete.
126 response is complete.
127 """
127 """
128 # TODO: move this into the reader interface where it can be
128 blocks = []
129 # smarter (and probably avoid copies)
129 while True:
130 bytes = []
130 self._reader.readto('\n', blocks)
131 while not bytes:
131
132 try:
132 if blocks and blocks[-1][-1] == '\n' or self.complete():
133 bytes = [self._reader.read(1)]
133 break
134 except _readers.ReadNotReady:
134
135 self._select()
136 while bytes[-1] != '\n' and not self.complete():
137 self._select()
135 self._select()
138 bytes.append(self._reader.read(1))
136
139 if bytes[-1] != '\n':
137 return ''.join(blocks)
140 next = self._reader.read(1)
141 while next and next != '\n':
142 bytes.append(next)
143 next = self._reader.read(1)
144 bytes.append(next)
145 return ''.join(bytes)
146
138
147 def read(self, length=None):
139 def read(self, length=None):
148 # if length is None, unbounded read
140 # if length is None, unbounded read
149 while (not self.complete() # never select on a finished read
141 while (not self.complete() # never select on a finished read
150 and (not length # unbounded, so we wait for complete()
142 and (not length # unbounded, so we wait for complete()
151 or length > self._reader.available_data)):
143 or length > self._reader.available_data)):
152 self._select()
144 self._select()
153 if not length:
145 if not length:
154 length = self._reader.available_data
146 length = self._reader.available_data
155 r = self._reader.read(length)
147 r = self._reader.read(length)
156 if self.complete() and self.will_close:
148 if self.complete() and self.will_close:
157 self.sock.close()
149 self.sock.close()
158 return r
150 return r
159
151
160 def _select(self):
152 def _select(self):
161 r, _, _ = select.select([self.sock], [], [], self._timeout)
153 r, _, _ = select.select([self.sock], [], [], self._timeout)
162 if not r:
154 if not r:
163 # socket was not readable. If the response is not
155 # socket was not readable. If the response is not
164 # complete, raise a timeout.
156 # complete, raise a timeout.
165 if not self.complete():
157 if not self.complete():
166 logger.info('timed out with timeout of %s', self._timeout)
158 logger.info('timed out with timeout of %s', self._timeout)
167 raise HTTPTimeoutException('timeout reading data')
159 raise HTTPTimeoutException('timeout reading data')
168 try:
160 try:
169 data = self.sock.recv(INCOMING_BUFFER_SIZE)
161 data = self.sock.recv(INCOMING_BUFFER_SIZE)
170 except socket.sslerror, e:
162 except socket.sslerror, e:
171 if e.args[0] != socket.SSL_ERROR_WANT_READ:
163 if e.args[0] != socket.SSL_ERROR_WANT_READ:
172 raise
164 raise
173 logger.debug('SSL_ERROR_WANT_READ in _select, should retry later')
165 logger.debug('SSL_ERROR_WANT_READ in _select, should retry later')
174 return True
166 return True
175 logger.debug('response read %d data during _select', len(data))
167 logger.debug('response read %d data during _select', len(data))
176 # If the socket was readable and no data was read, that means
168 # If the socket was readable and no data was read, that means
177 # the socket was closed. Inform the reader (if any) so it can
169 # the socket was closed. Inform the reader (if any) so it can
178 # raise an exception if this is an invalid situation.
170 # raise an exception if this is an invalid situation.
179 if not data:
171 if not data:
180 if self._reader:
172 if self._reader:
181 self._reader._close()
173 self._reader._close()
182 return False
174 return False
183 else:
175 else:
184 self._load_response(data)
176 self._load_response(data)
185 return True
177 return True
186
178
187 def _load_response(self, data):
179 def _load_response(self, data):
188 # Being here implies we're not at the end of the headers yet,
180 # Being here implies we're not at the end of the headers yet,
189 # since at the end of this method if headers were completely
181 # since at the end of this method if headers were completely
190 # loaded we replace this method with the load() method of the
182 # loaded we replace this method with the load() method of the
191 # reader we created.
183 # reader we created.
192 self.raw_response += data
184 self.raw_response += data
193 # This is a bogus server with bad line endings
185 # This is a bogus server with bad line endings
194 if self._eol not in self.raw_response:
186 if self._eol not in self.raw_response:
195 for bad_eol in ('\n', '\r'):
187 for bad_eol in ('\n', '\r'):
196 if (bad_eol in self.raw_response
188 if (bad_eol in self.raw_response
197 # verify that bad_eol is not the end of the incoming data
189 # verify that bad_eol is not the end of the incoming data
198 # as this could be a response line that just got
190 # as this could be a response line that just got
199 # split between \r and \n.
191 # split between \r and \n.
200 and (self.raw_response.index(bad_eol) <
192 and (self.raw_response.index(bad_eol) <
201 (len(self.raw_response) - 1))):
193 (len(self.raw_response) - 1))):
202 logger.info('bogus line endings detected, '
194 logger.info('bogus line endings detected, '
203 'using %r for EOL', bad_eol)
195 'using %r for EOL', bad_eol)
204 self._eol = bad_eol
196 self._eol = bad_eol
205 break
197 break
206 # exit early if not at end of headers
198 # exit early if not at end of headers
207 if self._end_headers not in self.raw_response or self.headers:
199 if self._end_headers not in self.raw_response or self.headers:
208 return
200 return
209
201
210 # handle 100-continue response
202 # handle 100-continue response
211 hdrs, body = self.raw_response.split(self._end_headers, 1)
203 hdrs, body = self.raw_response.split(self._end_headers, 1)
212 http_ver, status = hdrs.split(' ', 1)
204 http_ver, status = hdrs.split(' ', 1)
213 if status.startswith('100'):
205 if status.startswith('100'):
214 self.raw_response = body
206 self.raw_response = body
215 self.continued = True
207 self.continued = True
216 logger.debug('continue seen, setting body to %r', body)
208 logger.debug('continue seen, setting body to %r', body)
217 return
209 return
218
210
219 # arriving here means we should parse response headers
211 # arriving here means we should parse response headers
220 # as all headers have arrived completely
212 # as all headers have arrived completely
221 hdrs, body = self.raw_response.split(self._end_headers, 1)
213 hdrs, body = self.raw_response.split(self._end_headers, 1)
222 del self.raw_response
214 del self.raw_response
223 if self._eol in hdrs:
215 if self._eol in hdrs:
224 self.status_line, hdrs = hdrs.split(self._eol, 1)
216 self.status_line, hdrs = hdrs.split(self._eol, 1)
225 else:
217 else:
226 self.status_line = hdrs
218 self.status_line = hdrs
227 hdrs = ''
219 hdrs = ''
228 # TODO HTTP < 1.0 support
220 # TODO HTTP < 1.0 support
229 (self.http_version, self.status,
221 (self.http_version, self.status,
230 self.reason) = self.status_line.split(' ', 2)
222 self.reason) = self.status_line.split(' ', 2)
231 self.status = int(self.status)
223 self.status = int(self.status)
232 if self._eol != EOL:
224 if self._eol != EOL:
233 hdrs = hdrs.replace(self._eol, '\r\n')
225 hdrs = hdrs.replace(self._eol, '\r\n')
234 headers = rfc822.Message(cStringIO.StringIO(hdrs))
226 headers = rfc822.Message(cStringIO.StringIO(hdrs))
235 content_len = None
227 content_len = None
236 if HDR_CONTENT_LENGTH in headers:
228 if HDR_CONTENT_LENGTH in headers:
237 content_len = int(headers[HDR_CONTENT_LENGTH])
229 content_len = int(headers[HDR_CONTENT_LENGTH])
238 if self.http_version == HTTP_VER_1_0:
230 if self.http_version == HTTP_VER_1_0:
239 self.will_close = True
231 self.will_close = True
240 elif HDR_CONNECTION_CTRL in headers:
232 elif HDR_CONNECTION_CTRL in headers:
241 self.will_close = (
233 self.will_close = (
242 headers[HDR_CONNECTION_CTRL].lower() == CONNECTION_CLOSE)
234 headers[HDR_CONNECTION_CTRL].lower() == CONNECTION_CLOSE)
243 if (HDR_XFER_ENCODING in headers
235 if (HDR_XFER_ENCODING in headers
244 and headers[HDR_XFER_ENCODING].lower() == XFER_ENCODING_CHUNKED):
236 and headers[HDR_XFER_ENCODING].lower() == XFER_ENCODING_CHUNKED):
245 self._reader = _readers.ChunkedReader(self._eol)
237 self._reader = _readers.ChunkedReader(self._eol)
246 logger.debug('using a chunked reader')
238 logger.debug('using a chunked reader')
247 else:
239 else:
248 # HEAD responses are forbidden from returning a body, and
240 # HEAD responses are forbidden from returning a body, and
249 # it's implausible for a CONNECT response to use
241 # it's implausible for a CONNECT response to use
250 # close-is-end logic for an OK response.
242 # close-is-end logic for an OK response.
251 if (self.method == 'HEAD' or
243 if (self.method == 'HEAD' or
252 (self.method == 'CONNECT' and content_len is None)):
244 (self.method == 'CONNECT' and content_len is None)):
253 content_len = 0
245 content_len = 0
254 if content_len is not None:
246 if content_len is not None:
255 logger.debug('using a content-length reader with length %d',
247 logger.debug('using a content-length reader with length %d',
256 content_len)
248 content_len)
257 self._reader = _readers.ContentLengthReader(content_len)
249 self._reader = _readers.ContentLengthReader(content_len)
258 else:
250 else:
259 # Response body had no length specified and is not
251 # Response body had no length specified and is not
260 # chunked, so the end of the body will only be
252 # chunked, so the end of the body will only be
261 # identifiable by the termination of the socket by the
253 # identifiable by the termination of the socket by the
262 # server. My interpretation of the spec means that we
254 # server. My interpretation of the spec means that we
263 # are correct in hitting this case if
255 # are correct in hitting this case if
264 # transfer-encoding, content-length, and
256 # transfer-encoding, content-length, and
265 # connection-control were left unspecified.
257 # connection-control were left unspecified.
266 self._reader = _readers.CloseIsEndReader()
258 self._reader = _readers.CloseIsEndReader()
267 logger.debug('using a close-is-end reader')
259 logger.debug('using a close-is-end reader')
268 self.will_close = True
260 self.will_close = True
269
261
270 if body:
262 if body:
271 self._reader._load(body)
263 self._reader._load(body)
272 logger.debug('headers complete')
264 logger.debug('headers complete')
273 self.headers = headers
265 self.headers = headers
274 self._load_response = self._reader._load
266 self._load_response = self._reader._load
275
267
276
268
277 class HTTPConnection(object):
269 class HTTPConnection(object):
278 """Connection to a single http server.
270 """Connection to a single http server.
279
271
280 Supports 100-continue and keepalives natively. Uses select() for
272 Supports 100-continue and keepalives natively. Uses select() for
281 non-blocking socket operations.
273 non-blocking socket operations.
282 """
274 """
283 http_version = HTTP_VER_1_1
275 http_version = HTTP_VER_1_1
284 response_class = HTTPResponse
276 response_class = HTTPResponse
285
277
286 def __init__(self, host, port=None, use_ssl=None, ssl_validator=None,
278 def __init__(self, host, port=None, use_ssl=None, ssl_validator=None,
287 timeout=TIMEOUT_DEFAULT,
279 timeout=TIMEOUT_DEFAULT,
288 continue_timeout=TIMEOUT_ASSUME_CONTINUE,
280 continue_timeout=TIMEOUT_ASSUME_CONTINUE,
289 proxy_hostport=None, **ssl_opts):
281 proxy_hostport=None, **ssl_opts):
290 """Create a new HTTPConnection.
282 """Create a new HTTPConnection.
291
283
292 Args:
284 Args:
293 host: The host to which we'll connect.
285 host: The host to which we'll connect.
294 port: Optional. The port over which we'll connect. Default 80 for
286 port: Optional. The port over which we'll connect. Default 80 for
295 non-ssl, 443 for ssl.
287 non-ssl, 443 for ssl.
296 use_ssl: Optional. Whether to use ssl. Defaults to False if port is
288 use_ssl: Optional. Whether to use ssl. Defaults to False if port is
297 not 443, true if port is 443.
289 not 443, true if port is 443.
298 ssl_validator: a function(socket) to validate the ssl cert
290 ssl_validator: a function(socket) to validate the ssl cert
299 timeout: Optional. Connection timeout, default is TIMEOUT_DEFAULT.
291 timeout: Optional. Connection timeout, default is TIMEOUT_DEFAULT.
300 continue_timeout: Optional. Timeout for waiting on an expected
292 continue_timeout: Optional. Timeout for waiting on an expected
301 "100 Continue" response. Default is TIMEOUT_ASSUME_CONTINUE.
293 "100 Continue" response. Default is TIMEOUT_ASSUME_CONTINUE.
302 proxy_hostport: Optional. Tuple of (host, port) to use as an http
294 proxy_hostport: Optional. Tuple of (host, port) to use as an http
303 proxy for the connection. Default is to not use a proxy.
295 proxy for the connection. Default is to not use a proxy.
304 """
296 """
305 if port is None and host.count(':') == 1 or ']:' in host:
297 if port is None and host.count(':') == 1 or ']:' in host:
306 host, port = host.rsplit(':', 1)
298 host, port = host.rsplit(':', 1)
307 port = int(port)
299 port = int(port)
308 if '[' in host:
300 if '[' in host:
309 host = host[1:-1]
301 host = host[1:-1]
310 if use_ssl is None and port is None:
302 if use_ssl is None and port is None:
311 use_ssl = False
303 use_ssl = False
312 port = 80
304 port = 80
313 elif use_ssl is None:
305 elif use_ssl is None:
314 use_ssl = (port == 443)
306 use_ssl = (port == 443)
315 elif port is None:
307 elif port is None:
316 port = (use_ssl and 443 or 80)
308 port = (use_ssl and 443 or 80)
317 self.port = port
309 self.port = port
318 if use_ssl and not socketutil.have_ssl:
310 if use_ssl and not socketutil.have_ssl:
319 raise Exception('ssl requested but unavailable on this Python')
311 raise Exception('ssl requested but unavailable on this Python')
320 self.ssl = use_ssl
312 self.ssl = use_ssl
321 self.ssl_opts = ssl_opts
313 self.ssl_opts = ssl_opts
322 self._ssl_validator = ssl_validator
314 self._ssl_validator = ssl_validator
323 self.host = host
315 self.host = host
324 self.sock = None
316 self.sock = None
325 self._current_response = None
317 self._current_response = None
326 self._current_response_taken = False
318 self._current_response_taken = False
327 if proxy_hostport is None:
319 if proxy_hostport is None:
328 self._proxy_host = self._proxy_port = None
320 self._proxy_host = self._proxy_port = None
329 else:
321 else:
330 self._proxy_host, self._proxy_port = proxy_hostport
322 self._proxy_host, self._proxy_port = proxy_hostport
331
323
332 self.timeout = timeout
324 self.timeout = timeout
333 self.continue_timeout = continue_timeout
325 self.continue_timeout = continue_timeout
334
326
335 def _connect(self):
327 def _connect(self):
336 """Connect to the host and port specified in __init__."""
328 """Connect to the host and port specified in __init__."""
337 if self.sock:
329 if self.sock:
338 return
330 return
339 if self._proxy_host is not None:
331 if self._proxy_host is not None:
340 logger.info('Connecting to http proxy %s:%s',
332 logger.info('Connecting to http proxy %s:%s',
341 self._proxy_host, self._proxy_port)
333 self._proxy_host, self._proxy_port)
342 sock = socketutil.create_connection((self._proxy_host,
334 sock = socketutil.create_connection((self._proxy_host,
343 self._proxy_port))
335 self._proxy_port))
344 if self.ssl:
336 if self.ssl:
345 # TODO proxy header support
337 # TODO proxy header support
346 data = self.buildheaders('CONNECT', '%s:%d' % (self.host,
338 data = self.buildheaders('CONNECT', '%s:%d' % (self.host,
347 self.port),
339 self.port),
348 {}, HTTP_VER_1_0)
340 {}, HTTP_VER_1_0)
349 sock.send(data)
341 sock.send(data)
350 sock.setblocking(0)
342 sock.setblocking(0)
351 r = self.response_class(sock, self.timeout, 'CONNECT')
343 r = self.response_class(sock, self.timeout, 'CONNECT')
352 timeout_exc = HTTPTimeoutException(
344 timeout_exc = HTTPTimeoutException(
353 'Timed out waiting for CONNECT response from proxy')
345 'Timed out waiting for CONNECT response from proxy')
354 while not r.complete():
346 while not r.complete():
355 try:
347 try:
356 if not r._select():
348 if not r._select():
357 if not r.complete():
349 if not r.complete():
358 raise timeout_exc
350 raise timeout_exc
359 except HTTPTimeoutException:
351 except HTTPTimeoutException:
360 # This raise/except pattern looks goofy, but
352 # This raise/except pattern looks goofy, but
361 # _select can raise the timeout as well as the
353 # _select can raise the timeout as well as the
362 # loop body. I wish it wasn't this convoluted,
354 # loop body. I wish it wasn't this convoluted,
363 # but I don't have a better solution
355 # but I don't have a better solution
364 # immediately handy.
356 # immediately handy.
365 raise timeout_exc
357 raise timeout_exc
366 if r.status != 200:
358 if r.status != 200:
367 raise HTTPProxyConnectFailedException(
359 raise HTTPProxyConnectFailedException(
368 'Proxy connection failed: %d %s' % (r.status,
360 'Proxy connection failed: %d %s' % (r.status,
369 r.read()))
361 r.read()))
370 logger.info('CONNECT (for SSL) to %s:%s via proxy succeeded.',
362 logger.info('CONNECT (for SSL) to %s:%s via proxy succeeded.',
371 self.host, self.port)
363 self.host, self.port)
372 else:
364 else:
373 sock = socketutil.create_connection((self.host, self.port))
365 sock = socketutil.create_connection((self.host, self.port))
374 if self.ssl:
366 if self.ssl:
375 # This is the default, but in the case of proxied SSL
367 # This is the default, but in the case of proxied SSL
376 # requests the proxy logic above will have cleared
368 # requests the proxy logic above will have cleared
377 # blocking mode, so re-enable it just to be safe.
369 # blocking mode, so re-enable it just to be safe.
378 sock.setblocking(1)
370 sock.setblocking(1)
379 logger.debug('wrapping socket for ssl with options %r',
371 logger.debug('wrapping socket for ssl with options %r',
380 self.ssl_opts)
372 self.ssl_opts)
381 sock = socketutil.wrap_socket(sock, **self.ssl_opts)
373 sock = socketutil.wrap_socket(sock, **self.ssl_opts)
382 if self._ssl_validator:
374 if self._ssl_validator:
383 self._ssl_validator(sock)
375 self._ssl_validator(sock)
384 sock.setblocking(0)
376 sock.setblocking(0)
385 self.sock = sock
377 self.sock = sock
386
378
387 def buildheaders(self, method, path, headers, http_ver):
379 def buildheaders(self, method, path, headers, http_ver):
388 if self.ssl and self.port == 443 or self.port == 80:
380 if self.ssl and self.port == 443 or self.port == 80:
389 # default port for protocol, so leave it out
381 # default port for protocol, so leave it out
390 hdrhost = self.host
382 hdrhost = self.host
391 else:
383 else:
392 # include nonstandard port in header
384 # include nonstandard port in header
393 if ':' in self.host: # must be IPv6
385 if ':' in self.host: # must be IPv6
394 hdrhost = '[%s]:%d' % (self.host, self.port)
386 hdrhost = '[%s]:%d' % (self.host, self.port)
395 else:
387 else:
396 hdrhost = '%s:%d' % (self.host, self.port)
388 hdrhost = '%s:%d' % (self.host, self.port)
397 if self._proxy_host and not self.ssl:
389 if self._proxy_host and not self.ssl:
398 # When talking to a regular http proxy we must send the
390 # When talking to a regular http proxy we must send the
399 # full URI, but in all other cases we must not (although
391 # full URI, but in all other cases we must not (although
400 # technically RFC 2616 says servers must accept our
392 # technically RFC 2616 says servers must accept our
401 # request if we screw up, experimentally few do that
393 # request if we screw up, experimentally few do that
402 # correctly.)
394 # correctly.)
403 assert path[0] == '/', 'path must start with a /'
395 assert path[0] == '/', 'path must start with a /'
404 path = 'http://%s%s' % (hdrhost, path)
396 path = 'http://%s%s' % (hdrhost, path)
405 outgoing = ['%s %s %s%s' % (method, path, http_ver, EOL)]
397 outgoing = ['%s %s %s%s' % (method, path, http_ver, EOL)]
406 headers['host'] = ('Host', hdrhost)
398 headers['host'] = ('Host', hdrhost)
407 headers[HDR_ACCEPT_ENCODING] = (HDR_ACCEPT_ENCODING, 'identity')
399 headers[HDR_ACCEPT_ENCODING] = (HDR_ACCEPT_ENCODING, 'identity')
408 for hdr, val in headers.itervalues():
400 for hdr, val in headers.itervalues():
409 outgoing.append('%s: %s%s' % (hdr, val, EOL))
401 outgoing.append('%s: %s%s' % (hdr, val, EOL))
410 outgoing.append(EOL)
402 outgoing.append(EOL)
411 return ''.join(outgoing)
403 return ''.join(outgoing)
412
404
413 def close(self):
405 def close(self):
414 """Close the connection to the server.
406 """Close the connection to the server.
415
407
416 This is a no-op if the connection is already closed. The
408 This is a no-op if the connection is already closed. The
417 connection may automatically close if requested by the server
409 connection may automatically close if requested by the server
418 or required by the nature of a response.
410 or required by the nature of a response.
419 """
411 """
420 if self.sock is None:
412 if self.sock is None:
421 return
413 return
422 self.sock.close()
414 self.sock.close()
423 self.sock = None
415 self.sock = None
424 logger.info('closed connection to %s on %s', self.host, self.port)
416 logger.info('closed connection to %s on %s', self.host, self.port)
425
417
426 def busy(self):
418 def busy(self):
427 """Returns True if this connection object is currently in use.
419 """Returns True if this connection object is currently in use.
428
420
429 If a response is still pending, this will return True, even if
421 If a response is still pending, this will return True, even if
430 the request has finished sending. In the future,
422 the request has finished sending. In the future,
431 HTTPConnection may transparently juggle multiple connections
423 HTTPConnection may transparently juggle multiple connections
432 to the server, in which case this will be useful to detect if
424 to the server, in which case this will be useful to detect if
433 any of those connections is ready for use.
425 any of those connections is ready for use.
434 """
426 """
435 cr = self._current_response
427 cr = self._current_response
436 if cr is not None:
428 if cr is not None:
437 if self._current_response_taken:
429 if self._current_response_taken:
438 if cr.will_close:
430 if cr.will_close:
439 self.sock = None
431 self.sock = None
440 self._current_response = None
432 self._current_response = None
441 return False
433 return False
442 elif cr.complete():
434 elif cr.complete():
443 self._current_response = None
435 self._current_response = None
444 return False
436 return False
445 return True
437 return True
446 return False
438 return False
447
439
448 def request(self, method, path, body=None, headers={},
440 def request(self, method, path, body=None, headers={},
449 expect_continue=False):
441 expect_continue=False):
450 """Send a request to the server.
442 """Send a request to the server.
451
443
452 For increased flexibility, this does not return the response
444 For increased flexibility, this does not return the response
453 object. Future versions of HTTPConnection that juggle multiple
445 object. Future versions of HTTPConnection that juggle multiple
454 sockets will be able to send (for example) 5 requests all at
446 sockets will be able to send (for example) 5 requests all at
455 once, and then let the requests arrive as data is
447 once, and then let the requests arrive as data is
456 available. Use the `getresponse()` method to retrieve the
448 available. Use the `getresponse()` method to retrieve the
457 response.
449 response.
458 """
450 """
459 if self.busy():
451 if self.busy():
460 raise httplib.CannotSendRequest(
452 raise httplib.CannotSendRequest(
461 'Can not send another request before '
453 'Can not send another request before '
462 'current response is read!')
454 'current response is read!')
463 self._current_response_taken = False
455 self._current_response_taken = False
464
456
465 logger.info('sending %s request for %s to %s on port %s',
457 logger.info('sending %s request for %s to %s on port %s',
466 method, path, self.host, self.port)
458 method, path, self.host, self.port)
467 hdrs = dict((k.lower(), (k, v)) for k, v in headers.iteritems())
459 hdrs = dict((k.lower(), (k, v)) for k, v in headers.iteritems())
468 if hdrs.get('expect', ('', ''))[1].lower() == '100-continue':
460 if hdrs.get('expect', ('', ''))[1].lower() == '100-continue':
469 expect_continue = True
461 expect_continue = True
470 elif expect_continue:
462 elif expect_continue:
471 hdrs['expect'] = ('Expect', '100-Continue')
463 hdrs['expect'] = ('Expect', '100-Continue')
472
464
473 chunked = False
465 chunked = False
474 if body and HDR_CONTENT_LENGTH not in hdrs:
466 if body and HDR_CONTENT_LENGTH not in hdrs:
475 if getattr(body, '__len__', False):
467 if getattr(body, '__len__', False):
476 hdrs[HDR_CONTENT_LENGTH] = (HDR_CONTENT_LENGTH, len(body))
468 hdrs[HDR_CONTENT_LENGTH] = (HDR_CONTENT_LENGTH, len(body))
477 elif getattr(body, 'read', False):
469 elif getattr(body, 'read', False):
478 hdrs[HDR_XFER_ENCODING] = (HDR_XFER_ENCODING,
470 hdrs[HDR_XFER_ENCODING] = (HDR_XFER_ENCODING,
479 XFER_ENCODING_CHUNKED)
471 XFER_ENCODING_CHUNKED)
480 chunked = True
472 chunked = True
481 else:
473 else:
482 raise BadRequestData('body has no __len__() nor read()')
474 raise BadRequestData('body has no __len__() nor read()')
483
475
484 self._connect()
476 self._connect()
485 outgoing_headers = self.buildheaders(
477 outgoing_headers = self.buildheaders(
486 method, path, hdrs, self.http_version)
478 method, path, hdrs, self.http_version)
487 response = None
479 response = None
488 first = True
480 first = True
489
481
490 def reconnect(where):
482 def reconnect(where):
491 logger.info('reconnecting during %s', where)
483 logger.info('reconnecting during %s', where)
492 self.close()
484 self.close()
493 self._connect()
485 self._connect()
494
486
495 while ((outgoing_headers or body)
487 while ((outgoing_headers or body)
496 and not (response and response.complete())):
488 and not (response and response.complete())):
497 select_timeout = self.timeout
489 select_timeout = self.timeout
498 out = outgoing_headers or body
490 out = outgoing_headers or body
499 blocking_on_continue = False
491 blocking_on_continue = False
500 if expect_continue and not outgoing_headers and not (
492 if expect_continue and not outgoing_headers and not (
501 response and (response.headers or response.continued)):
493 response and (response.headers or response.continued)):
502 logger.info(
494 logger.info(
503 'waiting up to %s seconds for'
495 'waiting up to %s seconds for'
504 ' continue response from server',
496 ' continue response from server',
505 self.continue_timeout)
497 self.continue_timeout)
506 select_timeout = self.continue_timeout
498 select_timeout = self.continue_timeout
507 blocking_on_continue = True
499 blocking_on_continue = True
508 out = False
500 out = False
509 if out:
501 if out:
510 w = [self.sock]
502 w = [self.sock]
511 else:
503 else:
512 w = []
504 w = []
513 r, w, x = select.select([self.sock], w, [], select_timeout)
505 r, w, x = select.select([self.sock], w, [], select_timeout)
514 # if we were expecting a 100 continue and it's been long
506 # if we were expecting a 100 continue and it's been long
515 # enough, just go ahead and assume it's ok. This is the
507 # enough, just go ahead and assume it's ok. This is the
516 # recommended behavior from the RFC.
508 # recommended behavior from the RFC.
517 if r == w == x == []:
509 if r == w == x == []:
518 if blocking_on_continue:
510 if blocking_on_continue:
519 expect_continue = False
511 expect_continue = False
520 logger.info('no response to continue expectation from '
512 logger.info('no response to continue expectation from '
521 'server, optimistically sending request body')
513 'server, optimistically sending request body')
522 else:
514 else:
523 raise HTTPTimeoutException('timeout sending data')
515 raise HTTPTimeoutException('timeout sending data')
524 was_first = first
516 was_first = first
525
517
526 # incoming data
518 # incoming data
527 if r:
519 if r:
528 try:
520 try:
529 try:
521 try:
530 data = r[0].recv(INCOMING_BUFFER_SIZE)
522 data = r[0].recv(INCOMING_BUFFER_SIZE)
531 except socket.sslerror, e:
523 except socket.sslerror, e:
532 if e.args[0] != socket.SSL_ERROR_WANT_READ:
524 if e.args[0] != socket.SSL_ERROR_WANT_READ:
533 raise
525 raise
534 logger.debug(
526 logger.debug(
535 'SSL_ERROR_WANT_READ while sending data, retrying...')
527 'SSL_ERROR_WANT_READ while sending data, retrying...')
536 continue
528 continue
537 if not data:
529 if not data:
538 logger.info('socket appears closed in read')
530 logger.info('socket appears closed in read')
539 self.sock = None
531 self.sock = None
540 self._current_response = None
532 self._current_response = None
541 if response is not None:
533 if response is not None:
542 response._close()
534 response._close()
543 # This if/elif ladder is a bit subtle,
535 # This if/elif ladder is a bit subtle,
544 # comments in each branch should help.
536 # comments in each branch should help.
545 if response is not None and response.complete():
537 if response is not None and response.complete():
546 # Server responded completely and then
538 # Server responded completely and then
547 # closed the socket. We should just shut
539 # closed the socket. We should just shut
548 # things down and let the caller get their
540 # things down and let the caller get their
549 # response.
541 # response.
550 logger.info('Got an early response, '
542 logger.info('Got an early response, '
551 'aborting remaining request.')
543 'aborting remaining request.')
552 break
544 break
553 elif was_first and response is None:
545 elif was_first and response is None:
554 # Most likely a keepalive that got killed
546 # Most likely a keepalive that got killed
555 # on the server's end. Commonly happens
547 # on the server's end. Commonly happens
556 # after getting a really large response
548 # after getting a really large response
557 # from the server.
549 # from the server.
558 logger.info(
550 logger.info(
559 'Connection appeared closed in read on first'
551 'Connection appeared closed in read on first'
560 ' request loop iteration, will retry.')
552 ' request loop iteration, will retry.')
561 reconnect('read')
553 reconnect('read')
562 continue
554 continue
563 else:
555 else:
564 # We didn't just send the first data hunk,
556 # We didn't just send the first data hunk,
565 # and either have a partial response or no
557 # and either have a partial response or no
566 # response at all. There's really nothing
558 # response at all. There's really nothing
567 # meaningful we can do here.
559 # meaningful we can do here.
568 raise HTTPStateError(
560 raise HTTPStateError(
569 'Connection appears closed after '
561 'Connection appears closed after '
570 'some request data was written, but the '
562 'some request data was written, but the '
571 'response was missing or incomplete!')
563 'response was missing or incomplete!')
572 logger.debug('read %d bytes in request()', len(data))
564 logger.debug('read %d bytes in request()', len(data))
573 if response is None:
565 if response is None:
574 response = self.response_class(r[0], self.timeout, method)
566 response = self.response_class(r[0], self.timeout, method)
575 response._load_response(data)
567 response._load_response(data)
576 # Jump to the next select() call so we load more
568 # Jump to the next select() call so we load more
577 # data if the server is still sending us content.
569 # data if the server is still sending us content.
578 continue
570 continue
579 except socket.error, e:
571 except socket.error, e:
580 if e[0] != errno.EPIPE and not was_first:
572 if e[0] != errno.EPIPE and not was_first:
581 raise
573 raise
582
574
583 # outgoing data
575 # outgoing data
584 if w and out:
576 if w and out:
585 try:
577 try:
586 if getattr(out, 'read', False):
578 if getattr(out, 'read', False):
587 data = out.read(OUTGOING_BUFFER_SIZE)
579 data = out.read(OUTGOING_BUFFER_SIZE)
588 if not data:
580 if not data:
589 continue
581 continue
590 if len(data) < OUTGOING_BUFFER_SIZE:
582 if len(data) < OUTGOING_BUFFER_SIZE:
591 if chunked:
583 if chunked:
592 body = '0' + EOL + EOL
584 body = '0' + EOL + EOL
593 else:
585 else:
594 body = None
586 body = None
595 if chunked:
587 if chunked:
596 out = hex(len(data))[2:] + EOL + data + EOL
588 out = hex(len(data))[2:] + EOL + data + EOL
597 else:
589 else:
598 out = data
590 out = data
599 amt = w[0].send(out)
591 amt = w[0].send(out)
600 except socket.error, e:
592 except socket.error, e:
601 if e[0] == socket.SSL_ERROR_WANT_WRITE and self.ssl:
593 if e[0] == socket.SSL_ERROR_WANT_WRITE and self.ssl:
602 # This means that SSL hasn't flushed its buffer into
594 # This means that SSL hasn't flushed its buffer into
603 # the socket yet.
595 # the socket yet.
604 # TODO: find a way to block on ssl flushing its buffer
596 # TODO: find a way to block on ssl flushing its buffer
605 # similar to selecting on a raw socket.
597 # similar to selecting on a raw socket.
606 continue
598 continue
607 elif (e[0] not in (errno.ECONNRESET, errno.EPIPE)
599 elif (e[0] not in (errno.ECONNRESET, errno.EPIPE)
608 and not first):
600 and not first):
609 raise
601 raise
610 reconnect('write')
602 reconnect('write')
611 amt = self.sock.send(out)
603 amt = self.sock.send(out)
612 logger.debug('sent %d', amt)
604 logger.debug('sent %d', amt)
613 first = False
605 first = False
614 # stash data we think we sent in case the socket breaks
606 # stash data we think we sent in case the socket breaks
615 # when we read from it
607 # when we read from it
616 if was_first:
608 if was_first:
617 sent_data = out[:amt]
609 sent_data = out[:amt]
618 if out is body:
610 if out is body:
619 body = out[amt:]
611 body = out[amt:]
620 else:
612 else:
621 outgoing_headers = out[amt:]
613 outgoing_headers = out[amt:]
622
614
623 # close if the server response said to or responded before eating
615 # close if the server response said to or responded before eating
624 # the whole request
616 # the whole request
625 if response is None:
617 if response is None:
626 response = self.response_class(self.sock, self.timeout, method)
618 response = self.response_class(self.sock, self.timeout, method)
627 complete = response.complete()
619 complete = response.complete()
628 data_left = bool(outgoing_headers or body)
620 data_left = bool(outgoing_headers or body)
629 if data_left:
621 if data_left:
630 logger.info('stopped sending request early, '
622 logger.info('stopped sending request early, '
631 'will close the socket to be safe.')
623 'will close the socket to be safe.')
632 response.will_close = True
624 response.will_close = True
633 if response.will_close:
625 if response.will_close:
634 # The socket will be closed by the response, so we disown
626 # The socket will be closed by the response, so we disown
635 # the socket
627 # the socket
636 self.sock = None
628 self.sock = None
637 self._current_response = response
629 self._current_response = response
638
630
639 def getresponse(self):
631 def getresponse(self):
640 if self._current_response is None:
632 if self._current_response is None:
641 raise httplib.ResponseNotReady()
633 raise httplib.ResponseNotReady()
642 r = self._current_response
634 r = self._current_response
643 while r.headers is None:
635 while r.headers is None:
644 if not r._select() and not r.complete():
636 if not r._select() and not r.complete():
645 raise _readers.HTTPRemoteClosedError()
637 raise _readers.HTTPRemoteClosedError()
646 if r.will_close:
638 if r.will_close:
647 self.sock = None
639 self.sock = None
648 self._current_response = None
640 self._current_response = None
649 elif r.complete():
641 elif r.complete():
650 self._current_response = None
642 self._current_response = None
651 else:
643 else:
652 self._current_response_taken = True
644 self._current_response_taken = True
653 return r
645 return r
654
646
655
647
656 class HTTPTimeoutException(httplib.HTTPException):
648 class HTTPTimeoutException(httplib.HTTPException):
657 """A timeout occurred while waiting on the server."""
649 """A timeout occurred while waiting on the server."""
658
650
659
651
660 class BadRequestData(httplib.HTTPException):
652 class BadRequestData(httplib.HTTPException):
661 """Request body object has neither __len__ nor read."""
653 """Request body object has neither __len__ nor read."""
662
654
663
655
664 class HTTPProxyConnectFailedException(httplib.HTTPException):
656 class HTTPProxyConnectFailedException(httplib.HTTPException):
665 """Connecting to the HTTP proxy failed."""
657 """Connecting to the HTTP proxy failed."""
666
658
667
659
668 class HTTPStateError(httplib.HTTPException):
660 class HTTPStateError(httplib.HTTPException):
669 """Invalid internal state encountered."""
661 """Invalid internal state encountered."""
670
662
671 # Forward this exception type from _readers since it needs to be part
663 # Forward this exception type from _readers since it needs to be part
672 # of the public API.
664 # of the public API.
673 HTTPRemoteClosedError = _readers.HTTPRemoteClosedError
665 HTTPRemoteClosedError = _readers.HTTPRemoteClosedError
674 # no-check-code
666 # no-check-code
@@ -1,206 +1,229 b''
1 # Copyright 2011, Google Inc.
1 # Copyright 2011, Google Inc.
2 # All rights reserved.
2 # All rights reserved.
3 #
3 #
4 # Redistribution and use in source and binary forms, with or without
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are
5 # modification, are permitted provided that the following conditions are
6 # met:
6 # met:
7 #
7 #
8 # * Redistributions of source code must retain the above copyright
8 # * Redistributions of source code must retain the above copyright
9 # notice, this list of conditions and the following disclaimer.
9 # notice, this list of conditions and the following disclaimer.
10 # * Redistributions in binary form must reproduce the above
10 # * Redistributions in binary form must reproduce the above
11 # copyright notice, this list of conditions and the following disclaimer
11 # copyright notice, this list of conditions and the following disclaimer
12 # in the documentation and/or other materials provided with the
12 # in the documentation and/or other materials provided with the
13 # distribution.
13 # distribution.
14 # * Neither the name of Google Inc. nor the names of its
14 # * Neither the name of Google Inc. nor the names of its
15 # contributors may be used to endorse or promote products derived from
15 # contributors may be used to endorse or promote products derived from
16 # this software without specific prior written permission.
16 # this software without specific prior written permission.
17
17
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 """Reader objects to abstract out different body response types.
29 """Reader objects to abstract out different body response types.
30
30
31 This module is package-private. It is not expected that these will
31 This module is package-private. It is not expected that these will
32 have any clients outside of httpplus.
32 have any clients outside of httpplus.
33 """
33 """
34
34
35 import httplib
35 import httplib
36 import itertools
36 import itertools
37 import logging
37 import logging
38
38
39 logger = logging.getLogger(__name__)
39 logger = logging.getLogger(__name__)
40
40
41
41
42 class ReadNotReady(Exception):
42 class ReadNotReady(Exception):
43 """Raised when read() is attempted but not enough data is loaded."""
43 """Raised when read() is attempted but not enough data is loaded."""
44
44
45
45
46 class HTTPRemoteClosedError(httplib.HTTPException):
46 class HTTPRemoteClosedError(httplib.HTTPException):
47 """The server closed the remote socket in the middle of a response."""
47 """The server closed the remote socket in the middle of a response."""
48
48
49
49
50 class AbstractReader(object):
50 class AbstractReader(object):
51 """Abstract base class for response readers.
51 """Abstract base class for response readers.
52
52
53 Subclasses must implement _load, and should implement _close if
53 Subclasses must implement _load, and should implement _close if
54 it's not an error for the server to close their socket without
54 it's not an error for the server to close their socket without
55 some termination condition being detected during _load.
55 some termination condition being detected during _load.
56 """
56 """
57 def __init__(self):
57 def __init__(self):
58 self._finished = False
58 self._finished = False
59 self._done_chunks = []
59 self._done_chunks = []
60 self.available_data = 0
60 self.available_data = 0
61
61
62 def addchunk(self, data):
62 def addchunk(self, data):
63 self._done_chunks.append(data)
63 self._done_chunks.append(data)
64 self.available_data += len(data)
64 self.available_data += len(data)
65
65
66 def pushchunk(self, data):
66 def pushchunk(self, data):
67 self._done_chunks.insert(0, data)
67 self._done_chunks.insert(0, data)
68 self.available_data += len(data)
68 self.available_data += len(data)
69
69
70 def popchunk(self):
70 def popchunk(self):
71 b = self._done_chunks.pop(0)
71 b = self._done_chunks.pop(0)
72 self.available_data -= len(b)
72 self.available_data -= len(b)
73
73
74 return b
74 return b
75
75
76 def done(self):
76 def done(self):
77 return self._finished
77 return self._finished
78
78
79 def read(self, amt):
79 def read(self, amt):
80 if self.available_data < amt and not self._finished:
80 if self.available_data < amt and not self._finished:
81 raise ReadNotReady()
81 raise ReadNotReady()
82 blocks = []
82 blocks = []
83 need = amt
83 need = amt
84 while self._done_chunks:
84 while self._done_chunks:
85 b = self.popchunk()
85 b = self.popchunk()
86 if len(b) > need:
86 if len(b) > need:
87 nb = b[:need]
87 nb = b[:need]
88 self.pushchunk(b[need:])
88 self.pushchunk(b[need:])
89 b = nb
89 b = nb
90 blocks.append(b)
90 blocks.append(b)
91 need -= len(b)
91 need -= len(b)
92 if need == 0:
92 if need == 0:
93 break
93 break
94 result = ''.join(blocks)
94 result = ''.join(blocks)
95 assert len(result) == amt or (self._finished and len(result) < amt)
95 assert len(result) == amt or (self._finished and len(result) < amt)
96
96
97 return result
97 return result
98
98
99 def readto(self, delimstr, blocks = None):
100 """return available data chunks up to the first one in which delimstr
101 occurs. No data will be returned after delimstr -- the chunk in which
102 it occurs will be split and the remainder pushed back onto the available
103 data queue. If blocks is supplied chunks will be added to blocks, otherwise
104 a new list will be allocated.
105 """
106 if blocks is None:
107 blocks = []
108
109 while self._done_chunks:
110 b = self.popchunk()
111 i = b.find(delimstr) + len(delimstr)
112 if i:
113 if i < len(b):
114 self.pushchunk(b[i:])
115 blocks.append(b[:i])
116 break
117 else:
118 blocks.append(b)
119
120 return blocks
121
99 def _load(self, data): # pragma: no cover
122 def _load(self, data): # pragma: no cover
100 """Subclasses must implement this.
123 """Subclasses must implement this.
101
124
102 As data is available to be read out of this object, it should
125 As data is available to be read out of this object, it should
103 be placed into the _done_chunks list. Subclasses should not
126 be placed into the _done_chunks list. Subclasses should not
104 rely on data remaining in _done_chunks forever, as it may be
127 rely on data remaining in _done_chunks forever, as it may be
105 reaped if the client is parsing data as it comes in.
128 reaped if the client is parsing data as it comes in.
106 """
129 """
107 raise NotImplementedError
130 raise NotImplementedError
108
131
109 def _close(self):
132 def _close(self):
110 """Default implementation of close.
133 """Default implementation of close.
111
134
112 The default implementation assumes that the reader will mark
135 The default implementation assumes that the reader will mark
113 the response as finished on the _finished attribute once the
136 the response as finished on the _finished attribute once the
114 entire response body has been read. In the event that this is
137 entire response body has been read. In the event that this is
115 not true, the subclass should override the implementation of
138 not true, the subclass should override the implementation of
116 close (for example, close-is-end responses have to set
139 close (for example, close-is-end responses have to set
117 self._finished in the close handler.)
140 self._finished in the close handler.)
118 """
141 """
119 if not self._finished:
142 if not self._finished:
120 raise HTTPRemoteClosedError(
143 raise HTTPRemoteClosedError(
121 'server appears to have closed the socket mid-response')
144 'server appears to have closed the socket mid-response')
122
145
123
146
124 class AbstractSimpleReader(AbstractReader):
147 class AbstractSimpleReader(AbstractReader):
125 """Abstract base class for simple readers that require no response decoding.
148 """Abstract base class for simple readers that require no response decoding.
126
149
127 Examples of such responses are Connection: Close (close-is-end)
150 Examples of such responses are Connection: Close (close-is-end)
128 and responses that specify a content length.
151 and responses that specify a content length.
129 """
152 """
130 def _load(self, data):
153 def _load(self, data):
131 if data:
154 if data:
132 assert not self._finished, (
155 assert not self._finished, (
133 'tried to add data (%r) to a closed reader!' % data)
156 'tried to add data (%r) to a closed reader!' % data)
134 logger.debug('%s read an additional %d data', self.name, len(data))
157 logger.debug('%s read an additional %d data', self.name, len(data))
135 self.addchunk(data)
158 self.addchunk(data)
136
159
137
160
138 class CloseIsEndReader(AbstractSimpleReader):
161 class CloseIsEndReader(AbstractSimpleReader):
139 """Reader for responses that specify Connection: Close for length."""
162 """Reader for responses that specify Connection: Close for length."""
140 name = 'close-is-end'
163 name = 'close-is-end'
141
164
142 def _close(self):
165 def _close(self):
143 logger.info('Marking close-is-end reader as closed.')
166 logger.info('Marking close-is-end reader as closed.')
144 self._finished = True
167 self._finished = True
145
168
146
169
147 class ContentLengthReader(AbstractSimpleReader):
170 class ContentLengthReader(AbstractSimpleReader):
148 """Reader for responses that specify an exact content length."""
171 """Reader for responses that specify an exact content length."""
149 name = 'content-length'
172 name = 'content-length'
150
173
151 def __init__(self, amount):
174 def __init__(self, amount):
152 AbstractReader.__init__(self)
175 AbstractReader.__init__(self)
153 self._amount = amount
176 self._amount = amount
154 if amount == 0:
177 if amount == 0:
155 self._finished = True
178 self._finished = True
156 self._amount_seen = 0
179 self._amount_seen = 0
157
180
158 def _load(self, data):
181 def _load(self, data):
159 AbstractSimpleReader._load(self, data)
182 AbstractSimpleReader._load(self, data)
160 self._amount_seen += len(data)
183 self._amount_seen += len(data)
161 if self._amount_seen >= self._amount:
184 if self._amount_seen >= self._amount:
162 self._finished = True
185 self._finished = True
163 logger.debug('content-length read complete')
186 logger.debug('content-length read complete')
164
187
165
188
166 class ChunkedReader(AbstractReader):
189 class ChunkedReader(AbstractReader):
167 """Reader for chunked transfer encoding responses."""
190 """Reader for chunked transfer encoding responses."""
168 def __init__(self, eol):
191 def __init__(self, eol):
169 AbstractReader.__init__(self)
192 AbstractReader.__init__(self)
170 self._eol = eol
193 self._eol = eol
171 self._leftover_skip_amt = 0
194 self._leftover_skip_amt = 0
172 self._leftover_data = ''
195 self._leftover_data = ''
173
196
174 def _load(self, data):
197 def _load(self, data):
175 assert not self._finished, 'tried to add data to a closed reader!'
198 assert not self._finished, 'tried to add data to a closed reader!'
176 logger.debug('chunked read an additional %d data', len(data))
199 logger.debug('chunked read an additional %d data', len(data))
177 position = 0
200 position = 0
178 if self._leftover_data:
201 if self._leftover_data:
179 logger.debug('chunked reader trying to finish block from leftover data')
202 logger.debug('chunked reader trying to finish block from leftover data')
180 # TODO: avoid this string concatenation if possible
203 # TODO: avoid this string concatenation if possible
181 data = self._leftover_data + data
204 data = self._leftover_data + data
182 position = self._leftover_skip_amt
205 position = self._leftover_skip_amt
183 self._leftover_data = ''
206 self._leftover_data = ''
184 self._leftover_skip_amt = 0
207 self._leftover_skip_amt = 0
185 datalen = len(data)
208 datalen = len(data)
186 while position < datalen:
209 while position < datalen:
187 split = data.find(self._eol, position)
210 split = data.find(self._eol, position)
188 if split == -1:
211 if split == -1:
189 self._leftover_data = data
212 self._leftover_data = data
190 self._leftover_skip_amt = position
213 self._leftover_skip_amt = position
191 return
214 return
192 amt = int(data[position:split], base=16)
215 amt = int(data[position:split], base=16)
193 block_start = split + len(self._eol)
216 block_start = split + len(self._eol)
194 # If the whole data chunk plus the eol trailer hasn't
217 # If the whole data chunk plus the eol trailer hasn't
195 # loaded, we'll wait for the next load.
218 # loaded, we'll wait for the next load.
196 if block_start + amt + len(self._eol) > len(data):
219 if block_start + amt + len(self._eol) > len(data):
197 self._leftover_data = data
220 self._leftover_data = data
198 self._leftover_skip_amt = position
221 self._leftover_skip_amt = position
199 return
222 return
200 if amt == 0:
223 if amt == 0:
201 self._finished = True
224 self._finished = True
202 logger.debug('closing chunked reader due to chunk of length 0')
225 logger.debug('closing chunked reader due to chunk of length 0')
203 return
226 return
204 self.addchunk(data[block_start:block_start + amt])
227 self.addchunk(data[block_start:block_start + amt])
205 position = block_start + amt + len(self._eol)
228 position = block_start + amt + len(self._eol)
206 # no-check-code
229 # no-check-code
General Comments 0
You need to be logged in to leave comments. Login now