##// END OF EJS Templates
spelling: requested
timeless@mozdev.org -
r17536:dc6364a8 default
parent child Browse files
Show More
@@ -1,674 +1,674 b''
1 # Copyright 2010, Google Inc.
1 # Copyright 2010, Google Inc.
2 # All rights reserved.
2 # All rights reserved.
3 #
3 #
4 # Redistribution and use in source and binary forms, with or without
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are
5 # modification, are permitted provided that the following conditions are
6 # met:
6 # met:
7 #
7 #
8 # * Redistributions of source code must retain the above copyright
8 # * Redistributions of source code must retain the above copyright
9 # notice, this list of conditions and the following disclaimer.
9 # notice, this list of conditions and the following disclaimer.
10 # * Redistributions in binary form must reproduce the above
10 # * Redistributions in binary form must reproduce the above
11 # copyright notice, this list of conditions and the following disclaimer
11 # copyright notice, this list of conditions and the following disclaimer
12 # in the documentation and/or other materials provided with the
12 # in the documentation and/or other materials provided with the
13 # distribution.
13 # distribution.
14 # * Neither the name of Google Inc. nor the names of its
14 # * Neither the name of Google Inc. nor the names of its
15 # contributors may be used to endorse or promote products derived from
15 # contributors may be used to endorse or promote products derived from
16 # this software without specific prior written permission.
16 # this software without specific prior written permission.
17
17
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 """Improved HTTP/1.1 client library
29 """Improved HTTP/1.1 client library
30
30
31 This library contains an HTTPConnection which is similar to the one in
31 This library contains an HTTPConnection which is similar to the one in
32 httplib, but has several additional features:
32 httplib, but has several additional features:
33
33
34 * supports keepalives natively
34 * supports keepalives natively
35 * uses select() to block for incoming data
35 * uses select() to block for incoming data
36 * notices when the server responds early to a request
36 * notices when the server responds early to a request
37 * implements ssl inline instead of in a different class
37 * implements ssl inline instead of in a different class
38 """
38 """
39
39
40 import cStringIO
40 import cStringIO
41 import errno
41 import errno
42 import httplib
42 import httplib
43 import logging
43 import logging
44 import rfc822
44 import rfc822
45 import select
45 import select
46 import socket
46 import socket
47
47
48 import _readers
48 import _readers
49 import socketutil
49 import socketutil
50
50
51 logger = logging.getLogger(__name__)
51 logger = logging.getLogger(__name__)
52
52
53 __all__ = ['HTTPConnection', 'HTTPResponse']
53 __all__ = ['HTTPConnection', 'HTTPResponse']
54
54
55 HTTP_VER_1_0 = 'HTTP/1.0'
55 HTTP_VER_1_0 = 'HTTP/1.0'
56 HTTP_VER_1_1 = 'HTTP/1.1'
56 HTTP_VER_1_1 = 'HTTP/1.1'
57
57
58 OUTGOING_BUFFER_SIZE = 1 << 15
58 OUTGOING_BUFFER_SIZE = 1 << 15
59 INCOMING_BUFFER_SIZE = 1 << 20
59 INCOMING_BUFFER_SIZE = 1 << 20
60
60
61 HDR_ACCEPT_ENCODING = 'accept-encoding'
61 HDR_ACCEPT_ENCODING = 'accept-encoding'
62 HDR_CONNECTION_CTRL = 'connection'
62 HDR_CONNECTION_CTRL = 'connection'
63 HDR_CONTENT_LENGTH = 'content-length'
63 HDR_CONTENT_LENGTH = 'content-length'
64 HDR_XFER_ENCODING = 'transfer-encoding'
64 HDR_XFER_ENCODING = 'transfer-encoding'
65
65
66 XFER_ENCODING_CHUNKED = 'chunked'
66 XFER_ENCODING_CHUNKED = 'chunked'
67
67
68 CONNECTION_CLOSE = 'close'
68 CONNECTION_CLOSE = 'close'
69
69
70 EOL = '\r\n'
70 EOL = '\r\n'
71 _END_HEADERS = EOL * 2
71 _END_HEADERS = EOL * 2
72
72
73 # Based on some searching around, 1 second seems like a reasonable
73 # Based on some searching around, 1 second seems like a reasonable
74 # default here.
74 # default here.
75 TIMEOUT_ASSUME_CONTINUE = 1
75 TIMEOUT_ASSUME_CONTINUE = 1
76 TIMEOUT_DEFAULT = None
76 TIMEOUT_DEFAULT = None
77
77
78
78
79 class HTTPResponse(object):
79 class HTTPResponse(object):
80 """Response from an HTTP server.
80 """Response from an HTTP server.
81
81
82 The response will continue to load as available. If you need the
82 The response will continue to load as available. If you need the
83 complete response before continuing, check the .complete() method.
83 complete response before continuing, check the .complete() method.
84 """
84 """
85 def __init__(self, sock, timeout, method):
85 def __init__(self, sock, timeout, method):
86 self.sock = sock
86 self.sock = sock
87 self.method = method
87 self.method = method
88 self.raw_response = ''
88 self.raw_response = ''
89 self._headers_len = 0
89 self._headers_len = 0
90 self.headers = None
90 self.headers = None
91 self.will_close = False
91 self.will_close = False
92 self.status_line = ''
92 self.status_line = ''
93 self.status = None
93 self.status = None
94 self.continued = False
94 self.continued = False
95 self.http_version = None
95 self.http_version = None
96 self.reason = None
96 self.reason = None
97 self._reader = None
97 self._reader = None
98
98
99 self._read_location = 0
99 self._read_location = 0
100 self._eol = EOL
100 self._eol = EOL
101
101
102 self._timeout = timeout
102 self._timeout = timeout
103
103
104 @property
104 @property
105 def _end_headers(self):
105 def _end_headers(self):
106 return self._eol * 2
106 return self._eol * 2
107
107
108 def complete(self):
108 def complete(self):
109 """Returns true if this response is completely loaded.
109 """Returns true if this response is completely loaded.
110
110
111 Note that if this is a connection where complete means the
111 Note that if this is a connection where complete means the
112 socket is closed, this will nearly always return False, even
112 socket is closed, this will nearly always return False, even
113 in cases where all the data has actually been loaded.
113 in cases where all the data has actually been loaded.
114 """
114 """
115 if self._reader:
115 if self._reader:
116 return self._reader.done()
116 return self._reader.done()
117
117
118 def _close(self):
118 def _close(self):
119 if self._reader is not None:
119 if self._reader is not None:
120 self._reader._close()
120 self._reader._close()
121
121
122 def readline(self):
122 def readline(self):
123 """Read a single line from the response body.
123 """Read a single line from the response body.
124
124
125 This may block until either a line ending is found or the
125 This may block until either a line ending is found or the
126 response is complete.
126 response is complete.
127 """
127 """
128 # TODO: move this into the reader interface where it can be
128 # TODO: move this into the reader interface where it can be
129 # smarter (and probably avoid copies)
129 # smarter (and probably avoid copies)
130 bytes = []
130 bytes = []
131 while not bytes:
131 while not bytes:
132 try:
132 try:
133 bytes = [self._reader.read(1)]
133 bytes = [self._reader.read(1)]
134 except _readers.ReadNotReady:
134 except _readers.ReadNotReady:
135 self._select()
135 self._select()
136 while bytes[-1] != '\n' and not self.complete():
136 while bytes[-1] != '\n' and not self.complete():
137 self._select()
137 self._select()
138 bytes.append(self._reader.read(1))
138 bytes.append(self._reader.read(1))
139 if bytes[-1] != '\n':
139 if bytes[-1] != '\n':
140 next = self._reader.read(1)
140 next = self._reader.read(1)
141 while next and next != '\n':
141 while next and next != '\n':
142 bytes.append(next)
142 bytes.append(next)
143 next = self._reader.read(1)
143 next = self._reader.read(1)
144 bytes.append(next)
144 bytes.append(next)
145 return ''.join(bytes)
145 return ''.join(bytes)
146
146
147 def read(self, length=None):
147 def read(self, length=None):
148 # if length is None, unbounded read
148 # if length is None, unbounded read
149 while (not self.complete() # never select on a finished read
149 while (not self.complete() # never select on a finished read
150 and (not length # unbounded, so we wait for complete()
150 and (not length # unbounded, so we wait for complete()
151 or length > self._reader.available_data)):
151 or length > self._reader.available_data)):
152 self._select()
152 self._select()
153 if not length:
153 if not length:
154 length = self._reader.available_data
154 length = self._reader.available_data
155 r = self._reader.read(length)
155 r = self._reader.read(length)
156 if self.complete() and self.will_close:
156 if self.complete() and self.will_close:
157 self.sock.close()
157 self.sock.close()
158 return r
158 return r
159
159
160 def _select(self):
160 def _select(self):
161 r, _, _ = select.select([self.sock], [], [], self._timeout)
161 r, _, _ = select.select([self.sock], [], [], self._timeout)
162 if not r:
162 if not r:
163 # socket was not readable. If the response is not
163 # socket was not readable. If the response is not
164 # complete, raise a timeout.
164 # complete, raise a timeout.
165 if not self.complete():
165 if not self.complete():
166 logger.info('timed out with timeout of %s', self._timeout)
166 logger.info('timed out with timeout of %s', self._timeout)
167 raise HTTPTimeoutException('timeout reading data')
167 raise HTTPTimeoutException('timeout reading data')
168 try:
168 try:
169 data = self.sock.recv(INCOMING_BUFFER_SIZE)
169 data = self.sock.recv(INCOMING_BUFFER_SIZE)
170 except socket.sslerror, e:
170 except socket.sslerror, e:
171 if e.args[0] != socket.SSL_ERROR_WANT_READ:
171 if e.args[0] != socket.SSL_ERROR_WANT_READ:
172 raise
172 raise
173 logger.debug('SSL_WANT_READ in _select, should retry later')
173 logger.debug('SSL_WANT_READ in _select, should retry later')
174 return True
174 return True
175 logger.debug('response read %d data during _select', len(data))
175 logger.debug('response read %d data during _select', len(data))
176 # If the socket was readable and no data was read, that means
176 # If the socket was readable and no data was read, that means
177 # the socket was closed. Inform the reader (if any) so it can
177 # the socket was closed. Inform the reader (if any) so it can
178 # raise an exception if this is an invalid situation.
178 # raise an exception if this is an invalid situation.
179 if not data:
179 if not data:
180 if self._reader:
180 if self._reader:
181 self._reader._close()
181 self._reader._close()
182 return False
182 return False
183 else:
183 else:
184 self._load_response(data)
184 self._load_response(data)
185 return True
185 return True
186
186
187 def _load_response(self, data):
187 def _load_response(self, data):
188 # Being here implies we're not at the end of the headers yet,
188 # Being here implies we're not at the end of the headers yet,
189 # since at the end of this method if headers were completely
189 # since at the end of this method if headers were completely
190 # loaded we replace this method with the load() method of the
190 # loaded we replace this method with the load() method of the
191 # reader we created.
191 # reader we created.
192 self.raw_response += data
192 self.raw_response += data
193 # This is a bogus server with bad line endings
193 # This is a bogus server with bad line endings
194 if self._eol not in self.raw_response:
194 if self._eol not in self.raw_response:
195 for bad_eol in ('\n', '\r'):
195 for bad_eol in ('\n', '\r'):
196 if (bad_eol in self.raw_response
196 if (bad_eol in self.raw_response
197 # verify that bad_eol is not the end of the incoming data
197 # verify that bad_eol is not the end of the incoming data
198 # as this could be a response line that just got
198 # as this could be a response line that just got
199 # split between \r and \n.
199 # split between \r and \n.
200 and (self.raw_response.index(bad_eol) <
200 and (self.raw_response.index(bad_eol) <
201 (len(self.raw_response) - 1))):
201 (len(self.raw_response) - 1))):
202 logger.info('bogus line endings detected, '
202 logger.info('bogus line endings detected, '
203 'using %r for EOL', bad_eol)
203 'using %r for EOL', bad_eol)
204 self._eol = bad_eol
204 self._eol = bad_eol
205 break
205 break
206 # exit early if not at end of headers
206 # exit early if not at end of headers
207 if self._end_headers not in self.raw_response or self.headers:
207 if self._end_headers not in self.raw_response or self.headers:
208 return
208 return
209
209
210 # handle 100-continue response
210 # handle 100-continue response
211 hdrs, body = self.raw_response.split(self._end_headers, 1)
211 hdrs, body = self.raw_response.split(self._end_headers, 1)
212 http_ver, status = hdrs.split(' ', 1)
212 http_ver, status = hdrs.split(' ', 1)
213 if status.startswith('100'):
213 if status.startswith('100'):
214 self.raw_response = body
214 self.raw_response = body
215 self.continued = True
215 self.continued = True
216 logger.debug('continue seen, setting body to %r', body)
216 logger.debug('continue seen, setting body to %r', body)
217 return
217 return
218
218
219 # arriving here means we should parse response headers
219 # arriving here means we should parse response headers
220 # as all headers have arrived completely
220 # as all headers have arrived completely
221 hdrs, body = self.raw_response.split(self._end_headers, 1)
221 hdrs, body = self.raw_response.split(self._end_headers, 1)
222 del self.raw_response
222 del self.raw_response
223 if self._eol in hdrs:
223 if self._eol in hdrs:
224 self.status_line, hdrs = hdrs.split(self._eol, 1)
224 self.status_line, hdrs = hdrs.split(self._eol, 1)
225 else:
225 else:
226 self.status_line = hdrs
226 self.status_line = hdrs
227 hdrs = ''
227 hdrs = ''
228 # TODO HTTP < 1.0 support
228 # TODO HTTP < 1.0 support
229 (self.http_version, self.status,
229 (self.http_version, self.status,
230 self.reason) = self.status_line.split(' ', 2)
230 self.reason) = self.status_line.split(' ', 2)
231 self.status = int(self.status)
231 self.status = int(self.status)
232 if self._eol != EOL:
232 if self._eol != EOL:
233 hdrs = hdrs.replace(self._eol, '\r\n')
233 hdrs = hdrs.replace(self._eol, '\r\n')
234 headers = rfc822.Message(cStringIO.StringIO(hdrs))
234 headers = rfc822.Message(cStringIO.StringIO(hdrs))
235 content_len = None
235 content_len = None
236 if HDR_CONTENT_LENGTH in headers:
236 if HDR_CONTENT_LENGTH in headers:
237 content_len = int(headers[HDR_CONTENT_LENGTH])
237 content_len = int(headers[HDR_CONTENT_LENGTH])
238 if self.http_version == HTTP_VER_1_0:
238 if self.http_version == HTTP_VER_1_0:
239 self.will_close = True
239 self.will_close = True
240 elif HDR_CONNECTION_CTRL in headers:
240 elif HDR_CONNECTION_CTRL in headers:
241 self.will_close = (
241 self.will_close = (
242 headers[HDR_CONNECTION_CTRL].lower() == CONNECTION_CLOSE)
242 headers[HDR_CONNECTION_CTRL].lower() == CONNECTION_CLOSE)
243 if (HDR_XFER_ENCODING in headers
243 if (HDR_XFER_ENCODING in headers
244 and headers[HDR_XFER_ENCODING].lower() == XFER_ENCODING_CHUNKED):
244 and headers[HDR_XFER_ENCODING].lower() == XFER_ENCODING_CHUNKED):
245 self._reader = _readers.ChunkedReader(self._eol)
245 self._reader = _readers.ChunkedReader(self._eol)
246 logger.debug('using a chunked reader')
246 logger.debug('using a chunked reader')
247 else:
247 else:
248 # HEAD responses are forbidden from returning a body, and
248 # HEAD responses are forbidden from returning a body, and
249 # it's implausible for a CONNECT response to use
249 # it's implausible for a CONNECT response to use
250 # close-is-end logic for an OK response.
250 # close-is-end logic for an OK response.
251 if (self.method == 'HEAD' or
251 if (self.method == 'HEAD' or
252 (self.method == 'CONNECT' and content_len is None)):
252 (self.method == 'CONNECT' and content_len is None)):
253 content_len = 0
253 content_len = 0
254 if content_len is not None:
254 if content_len is not None:
255 logger.debug('using a content-length reader with length %d',
255 logger.debug('using a content-length reader with length %d',
256 content_len)
256 content_len)
257 self._reader = _readers.ContentLengthReader(content_len)
257 self._reader = _readers.ContentLengthReader(content_len)
258 else:
258 else:
259 # Response body had no length specified and is not
259 # Response body had no length specified and is not
260 # chunked, so the end of the body will only be
260 # chunked, so the end of the body will only be
261 # identifiable by the termination of the socket by the
261 # identifiable by the termination of the socket by the
262 # server. My interpretation of the spec means that we
262 # server. My interpretation of the spec means that we
263 # are correct in hitting this case if
263 # are correct in hitting this case if
264 # transfer-encoding, content-length, and
264 # transfer-encoding, content-length, and
265 # connection-control were left unspecified.
265 # connection-control were left unspecified.
266 self._reader = _readers.CloseIsEndReader()
266 self._reader = _readers.CloseIsEndReader()
267 logger.debug('using a close-is-end reader')
267 logger.debug('using a close-is-end reader')
268 self.will_close = True
268 self.will_close = True
269
269
270 if body:
270 if body:
271 self._reader._load(body)
271 self._reader._load(body)
272 logger.debug('headers complete')
272 logger.debug('headers complete')
273 self.headers = headers
273 self.headers = headers
274 self._load_response = self._reader._load
274 self._load_response = self._reader._load
275
275
276
276
277 class HTTPConnection(object):
277 class HTTPConnection(object):
278 """Connection to a single http server.
278 """Connection to a single http server.
279
279
280 Supports 100-continue and keepalives natively. Uses select() for
280 Supports 100-continue and keepalives natively. Uses select() for
281 non-blocking socket operations.
281 non-blocking socket operations.
282 """
282 """
283 http_version = HTTP_VER_1_1
283 http_version = HTTP_VER_1_1
284 response_class = HTTPResponse
284 response_class = HTTPResponse
285
285
286 def __init__(self, host, port=None, use_ssl=None, ssl_validator=None,
286 def __init__(self, host, port=None, use_ssl=None, ssl_validator=None,
287 timeout=TIMEOUT_DEFAULT,
287 timeout=TIMEOUT_DEFAULT,
288 continue_timeout=TIMEOUT_ASSUME_CONTINUE,
288 continue_timeout=TIMEOUT_ASSUME_CONTINUE,
289 proxy_hostport=None, **ssl_opts):
289 proxy_hostport=None, **ssl_opts):
290 """Create a new HTTPConnection.
290 """Create a new HTTPConnection.
291
291
292 Args:
292 Args:
293 host: The host to which we'll connect.
293 host: The host to which we'll connect.
294 port: Optional. The port over which we'll connect. Default 80 for
294 port: Optional. The port over which we'll connect. Default 80 for
295 non-ssl, 443 for ssl.
295 non-ssl, 443 for ssl.
296 use_ssl: Optional. Wether to use ssl. Defaults to False if port is
296 use_ssl: Optional. Wether to use ssl. Defaults to False if port is
297 not 443, true if port is 443.
297 not 443, true if port is 443.
298 ssl_validator: a function(socket) to validate the ssl cert
298 ssl_validator: a function(socket) to validate the ssl cert
299 timeout: Optional. Connection timeout, default is TIMEOUT_DEFAULT.
299 timeout: Optional. Connection timeout, default is TIMEOUT_DEFAULT.
300 continue_timeout: Optional. Timeout for waiting on an expected
300 continue_timeout: Optional. Timeout for waiting on an expected
301 "100 Continue" response. Default is TIMEOUT_ASSUME_CONTINUE.
301 "100 Continue" response. Default is TIMEOUT_ASSUME_CONTINUE.
302 proxy_hostport: Optional. Tuple of (host, port) to use as an http
302 proxy_hostport: Optional. Tuple of (host, port) to use as an http
303 proxy for the connection. Default is to not use a proxy.
303 proxy for the connection. Default is to not use a proxy.
304 """
304 """
305 if port is None and host.count(':') == 1 or ']:' in host:
305 if port is None and host.count(':') == 1 or ']:' in host:
306 host, port = host.rsplit(':', 1)
306 host, port = host.rsplit(':', 1)
307 port = int(port)
307 port = int(port)
308 if '[' in host:
308 if '[' in host:
309 host = host[1:-1]
309 host = host[1:-1]
310 if use_ssl is None and port is None:
310 if use_ssl is None and port is None:
311 use_ssl = False
311 use_ssl = False
312 port = 80
312 port = 80
313 elif use_ssl is None:
313 elif use_ssl is None:
314 use_ssl = (port == 443)
314 use_ssl = (port == 443)
315 elif port is None:
315 elif port is None:
316 port = (use_ssl and 443 or 80)
316 port = (use_ssl and 443 or 80)
317 self.port = port
317 self.port = port
318 if use_ssl and not socketutil.have_ssl:
318 if use_ssl and not socketutil.have_ssl:
319 raise Exception('ssl requested but unavailable on this Python')
319 raise Exception('ssl requested but unavailable on this Python')
320 self.ssl = use_ssl
320 self.ssl = use_ssl
321 self.ssl_opts = ssl_opts
321 self.ssl_opts = ssl_opts
322 self._ssl_validator = ssl_validator
322 self._ssl_validator = ssl_validator
323 self.host = host
323 self.host = host
324 self.sock = None
324 self.sock = None
325 self._current_response = None
325 self._current_response = None
326 self._current_response_taken = False
326 self._current_response_taken = False
327 if proxy_hostport is None:
327 if proxy_hostport is None:
328 self._proxy_host = self._proxy_port = None
328 self._proxy_host = self._proxy_port = None
329 else:
329 else:
330 self._proxy_host, self._proxy_port = proxy_hostport
330 self._proxy_host, self._proxy_port = proxy_hostport
331
331
332 self.timeout = timeout
332 self.timeout = timeout
333 self.continue_timeout = continue_timeout
333 self.continue_timeout = continue_timeout
334
334
335 def _connect(self):
335 def _connect(self):
336 """Connect to the host and port specified in __init__."""
336 """Connect to the host and port specified in __init__."""
337 if self.sock:
337 if self.sock:
338 return
338 return
339 if self._proxy_host is not None:
339 if self._proxy_host is not None:
340 logger.info('Connecting to http proxy %s:%s',
340 logger.info('Connecting to http proxy %s:%s',
341 self._proxy_host, self._proxy_port)
341 self._proxy_host, self._proxy_port)
342 sock = socketutil.create_connection((self._proxy_host,
342 sock = socketutil.create_connection((self._proxy_host,
343 self._proxy_port))
343 self._proxy_port))
344 if self.ssl:
344 if self.ssl:
345 # TODO proxy header support
345 # TODO proxy header support
346 data = self.buildheaders('CONNECT', '%s:%d' % (self.host,
346 data = self.buildheaders('CONNECT', '%s:%d' % (self.host,
347 self.port),
347 self.port),
348 {}, HTTP_VER_1_0)
348 {}, HTTP_VER_1_0)
349 sock.send(data)
349 sock.send(data)
350 sock.setblocking(0)
350 sock.setblocking(0)
351 r = self.response_class(sock, self.timeout, 'CONNECT')
351 r = self.response_class(sock, self.timeout, 'CONNECT')
352 timeout_exc = HTTPTimeoutException(
352 timeout_exc = HTTPTimeoutException(
353 'Timed out waiting for CONNECT response from proxy')
353 'Timed out waiting for CONNECT response from proxy')
354 while not r.complete():
354 while not r.complete():
355 try:
355 try:
356 if not r._select():
356 if not r._select():
357 if not r.complete():
357 if not r.complete():
358 raise timeout_exc
358 raise timeout_exc
359 except HTTPTimeoutException:
359 except HTTPTimeoutException:
360 # This raise/except pattern looks goofy, but
360 # This raise/except pattern looks goofy, but
361 # _select can raise the timeout as well as the
361 # _select can raise the timeout as well as the
362 # loop body. I wish it wasn't this convoluted,
362 # loop body. I wish it wasn't this convoluted,
363 # but I don't have a better solution
363 # but I don't have a better solution
364 # immediately handy.
364 # immediately handy.
365 raise timeout_exc
365 raise timeout_exc
366 if r.status != 200:
366 if r.status != 200:
367 raise HTTPProxyConnectFailedException(
367 raise HTTPProxyConnectFailedException(
368 'Proxy connection failed: %d %s' % (r.status,
368 'Proxy connection failed: %d %s' % (r.status,
369 r.read()))
369 r.read()))
370 logger.info('CONNECT (for SSL) to %s:%s via proxy succeeded.',
370 logger.info('CONNECT (for SSL) to %s:%s via proxy succeeded.',
371 self.host, self.port)
371 self.host, self.port)
372 else:
372 else:
373 sock = socketutil.create_connection((self.host, self.port))
373 sock = socketutil.create_connection((self.host, self.port))
374 if self.ssl:
374 if self.ssl:
375 # This is the default, but in the case of proxied SSL
375 # This is the default, but in the case of proxied SSL
376 # requests the proxy logic above will have cleared
376 # requests the proxy logic above will have cleared
377 # blocking mode, so reenable it just to be safe.
377 # blocking mode, so reenable it just to be safe.
378 sock.setblocking(1)
378 sock.setblocking(1)
379 logger.debug('wrapping socket for ssl with options %r',
379 logger.debug('wrapping socket for ssl with options %r',
380 self.ssl_opts)
380 self.ssl_opts)
381 sock = socketutil.wrap_socket(sock, **self.ssl_opts)
381 sock = socketutil.wrap_socket(sock, **self.ssl_opts)
382 if self._ssl_validator:
382 if self._ssl_validator:
383 self._ssl_validator(sock)
383 self._ssl_validator(sock)
384 sock.setblocking(0)
384 sock.setblocking(0)
385 self.sock = sock
385 self.sock = sock
386
386
387 def buildheaders(self, method, path, headers, http_ver):
387 def buildheaders(self, method, path, headers, http_ver):
388 if self.ssl and self.port == 443 or self.port == 80:
388 if self.ssl and self.port == 443 or self.port == 80:
389 # default port for protocol, so leave it out
389 # default port for protocol, so leave it out
390 hdrhost = self.host
390 hdrhost = self.host
391 else:
391 else:
392 # include nonstandard port in header
392 # include nonstandard port in header
393 if ':' in self.host: # must be IPv6
393 if ':' in self.host: # must be IPv6
394 hdrhost = '[%s]:%d' % (self.host, self.port)
394 hdrhost = '[%s]:%d' % (self.host, self.port)
395 else:
395 else:
396 hdrhost = '%s:%d' % (self.host, self.port)
396 hdrhost = '%s:%d' % (self.host, self.port)
397 if self._proxy_host and not self.ssl:
397 if self._proxy_host and not self.ssl:
398 # When talking to a regular http proxy we must send the
398 # When talking to a regular http proxy we must send the
399 # full URI, but in all other cases we must not (although
399 # full URI, but in all other cases we must not (although
400 # technically RFC 2616 says servers must accept our
400 # technically RFC 2616 says servers must accept our
401 # request if we screw up, experimentally few do that
401 # request if we screw up, experimentally few do that
402 # correctly.)
402 # correctly.)
403 assert path[0] == '/', 'path must start with a /'
403 assert path[0] == '/', 'path must start with a /'
404 path = 'http://%s%s' % (hdrhost, path)
404 path = 'http://%s%s' % (hdrhost, path)
405 outgoing = ['%s %s %s%s' % (method, path, http_ver, EOL)]
405 outgoing = ['%s %s %s%s' % (method, path, http_ver, EOL)]
406 headers['host'] = ('Host', hdrhost)
406 headers['host'] = ('Host', hdrhost)
407 headers[HDR_ACCEPT_ENCODING] = (HDR_ACCEPT_ENCODING, 'identity')
407 headers[HDR_ACCEPT_ENCODING] = (HDR_ACCEPT_ENCODING, 'identity')
408 for hdr, val in headers.itervalues():
408 for hdr, val in headers.itervalues():
409 outgoing.append('%s: %s%s' % (hdr, val, EOL))
409 outgoing.append('%s: %s%s' % (hdr, val, EOL))
410 outgoing.append(EOL)
410 outgoing.append(EOL)
411 return ''.join(outgoing)
411 return ''.join(outgoing)
412
412
413 def close(self):
413 def close(self):
414 """Close the connection to the server.
414 """Close the connection to the server.
415
415
416 This is a no-op if the connection is already closed. The
416 This is a no-op if the connection is already closed. The
417 connection may automatically close if requessted by the server
417 connection may automatically close if requested by the server
418 or required by the nature of a response.
418 or required by the nature of a response.
419 """
419 """
420 if self.sock is None:
420 if self.sock is None:
421 return
421 return
422 self.sock.close()
422 self.sock.close()
423 self.sock = None
423 self.sock = None
424 logger.info('closed connection to %s on %s', self.host, self.port)
424 logger.info('closed connection to %s on %s', self.host, self.port)
425
425
426 def busy(self):
426 def busy(self):
427 """Returns True if this connection object is currently in use.
427 """Returns True if this connection object is currently in use.
428
428
429 If a response is still pending, this will return True, even if
429 If a response is still pending, this will return True, even if
430 the request has finished sending. In the future,
430 the request has finished sending. In the future,
431 HTTPConnection may transparently juggle multiple connections
431 HTTPConnection may transparently juggle multiple connections
432 to the server, in which case this will be useful to detect if
432 to the server, in which case this will be useful to detect if
433 any of those connections is ready for use.
433 any of those connections is ready for use.
434 """
434 """
435 cr = self._current_response
435 cr = self._current_response
436 if cr is not None:
436 if cr is not None:
437 if self._current_response_taken:
437 if self._current_response_taken:
438 if cr.will_close:
438 if cr.will_close:
439 self.sock = None
439 self.sock = None
440 self._current_response = None
440 self._current_response = None
441 return False
441 return False
442 elif cr.complete():
442 elif cr.complete():
443 self._current_response = None
443 self._current_response = None
444 return False
444 return False
445 return True
445 return True
446 return False
446 return False
447
447
448 def request(self, method, path, body=None, headers={},
448 def request(self, method, path, body=None, headers={},
449 expect_continue=False):
449 expect_continue=False):
450 """Send a request to the server.
450 """Send a request to the server.
451
451
452 For increased flexibility, this does not return the response
452 For increased flexibility, this does not return the response
453 object. Future versions of HTTPConnection that juggle multiple
453 object. Future versions of HTTPConnection that juggle multiple
454 sockets will be able to send (for example) 5 requests all at
454 sockets will be able to send (for example) 5 requests all at
455 once, and then let the requests arrive as data is
455 once, and then let the requests arrive as data is
456 available. Use the `getresponse()` method to retrieve the
456 available. Use the `getresponse()` method to retrieve the
457 response.
457 response.
458 """
458 """
459 if self.busy():
459 if self.busy():
460 raise httplib.CannotSendRequest(
460 raise httplib.CannotSendRequest(
461 'Can not send another request before '
461 'Can not send another request before '
462 'current response is read!')
462 'current response is read!')
463 self._current_response_taken = False
463 self._current_response_taken = False
464
464
465 logger.info('sending %s request for %s to %s on port %s',
465 logger.info('sending %s request for %s to %s on port %s',
466 method, path, self.host, self.port)
466 method, path, self.host, self.port)
467 hdrs = dict((k.lower(), (k, v)) for k, v in headers.iteritems())
467 hdrs = dict((k.lower(), (k, v)) for k, v in headers.iteritems())
468 if hdrs.get('expect', ('', ''))[1].lower() == '100-continue':
468 if hdrs.get('expect', ('', ''))[1].lower() == '100-continue':
469 expect_continue = True
469 expect_continue = True
470 elif expect_continue:
470 elif expect_continue:
471 hdrs['expect'] = ('Expect', '100-Continue')
471 hdrs['expect'] = ('Expect', '100-Continue')
472
472
473 chunked = False
473 chunked = False
474 if body and HDR_CONTENT_LENGTH not in hdrs:
474 if body and HDR_CONTENT_LENGTH not in hdrs:
475 if getattr(body, '__len__', False):
475 if getattr(body, '__len__', False):
476 hdrs[HDR_CONTENT_LENGTH] = (HDR_CONTENT_LENGTH, len(body))
476 hdrs[HDR_CONTENT_LENGTH] = (HDR_CONTENT_LENGTH, len(body))
477 elif getattr(body, 'read', False):
477 elif getattr(body, 'read', False):
478 hdrs[HDR_XFER_ENCODING] = (HDR_XFER_ENCODING,
478 hdrs[HDR_XFER_ENCODING] = (HDR_XFER_ENCODING,
479 XFER_ENCODING_CHUNKED)
479 XFER_ENCODING_CHUNKED)
480 chunked = True
480 chunked = True
481 else:
481 else:
482 raise BadRequestData('body has no __len__() nor read()')
482 raise BadRequestData('body has no __len__() nor read()')
483
483
484 self._connect()
484 self._connect()
485 outgoing_headers = self.buildheaders(
485 outgoing_headers = self.buildheaders(
486 method, path, hdrs, self.http_version)
486 method, path, hdrs, self.http_version)
487 response = None
487 response = None
488 first = True
488 first = True
489
489
490 def reconnect(where):
490 def reconnect(where):
491 logger.info('reconnecting during %s', where)
491 logger.info('reconnecting during %s', where)
492 self.close()
492 self.close()
493 self._connect()
493 self._connect()
494
494
495 while ((outgoing_headers or body)
495 while ((outgoing_headers or body)
496 and not (response and response.complete())):
496 and not (response and response.complete())):
497 select_timeout = self.timeout
497 select_timeout = self.timeout
498 out = outgoing_headers or body
498 out = outgoing_headers or body
499 blocking_on_continue = False
499 blocking_on_continue = False
500 if expect_continue and not outgoing_headers and not (
500 if expect_continue and not outgoing_headers and not (
501 response and (response.headers or response.continued)):
501 response and (response.headers or response.continued)):
502 logger.info(
502 logger.info(
503 'waiting up to %s seconds for'
503 'waiting up to %s seconds for'
504 ' continue response from server',
504 ' continue response from server',
505 self.continue_timeout)
505 self.continue_timeout)
506 select_timeout = self.continue_timeout
506 select_timeout = self.continue_timeout
507 blocking_on_continue = True
507 blocking_on_continue = True
508 out = False
508 out = False
509 if out:
509 if out:
510 w = [self.sock]
510 w = [self.sock]
511 else:
511 else:
512 w = []
512 w = []
513 r, w, x = select.select([self.sock], w, [], select_timeout)
513 r, w, x = select.select([self.sock], w, [], select_timeout)
514 # if we were expecting a 100 continue and it's been long
514 # if we were expecting a 100 continue and it's been long
515 # enough, just go ahead and assume it's ok. This is the
515 # enough, just go ahead and assume it's ok. This is the
516 # recommended behavior from the RFC.
516 # recommended behavior from the RFC.
517 if r == w == x == []:
517 if r == w == x == []:
518 if blocking_on_continue:
518 if blocking_on_continue:
519 expect_continue = False
519 expect_continue = False
520 logger.info('no response to continue expectation from '
520 logger.info('no response to continue expectation from '
521 'server, optimistically sending request body')
521 'server, optimistically sending request body')
522 else:
522 else:
523 raise HTTPTimeoutException('timeout sending data')
523 raise HTTPTimeoutException('timeout sending data')
524 was_first = first
524 was_first = first
525
525
526 # incoming data
526 # incoming data
527 if r:
527 if r:
528 try:
528 try:
529 try:
529 try:
530 data = r[0].recv(INCOMING_BUFFER_SIZE)
530 data = r[0].recv(INCOMING_BUFFER_SIZE)
531 except socket.sslerror, e:
531 except socket.sslerror, e:
532 if e.args[0] != socket.SSL_ERROR_WANT_READ:
532 if e.args[0] != socket.SSL_ERROR_WANT_READ:
533 raise
533 raise
534 logger.debug(
534 logger.debug(
535 'SSL_WANT_READ while sending data, retrying...')
535 'SSL_WANT_READ while sending data, retrying...')
536 continue
536 continue
537 if not data:
537 if not data:
538 logger.info('socket appears closed in read')
538 logger.info('socket appears closed in read')
539 self.sock = None
539 self.sock = None
540 self._current_response = None
540 self._current_response = None
541 if response is not None:
541 if response is not None:
542 response._close()
542 response._close()
543 # This if/elif ladder is a bit subtle,
543 # This if/elif ladder is a bit subtle,
544 # comments in each branch should help.
544 # comments in each branch should help.
545 if response is not None and response.complete():
545 if response is not None and response.complete():
546 # Server responded completely and then
546 # Server responded completely and then
547 # closed the socket. We should just shut
547 # closed the socket. We should just shut
548 # things down and let the caller get their
548 # things down and let the caller get their
549 # response.
549 # response.
550 logger.info('Got an early response, '
550 logger.info('Got an early response, '
551 'aborting remaining request.')
551 'aborting remaining request.')
552 break
552 break
553 elif was_first and response is None:
553 elif was_first and response is None:
554 # Most likely a keepalive that got killed
554 # Most likely a keepalive that got killed
555 # on the server's end. Commonly happens
555 # on the server's end. Commonly happens
556 # after getting a really large response
556 # after getting a really large response
557 # from the server.
557 # from the server.
558 logger.info(
558 logger.info(
559 'Connection appeared closed in read on first'
559 'Connection appeared closed in read on first'
560 ' request loop iteration, will retry.')
560 ' request loop iteration, will retry.')
561 reconnect('read')
561 reconnect('read')
562 continue
562 continue
563 else:
563 else:
564 # We didn't just send the first data hunk,
564 # We didn't just send the first data hunk,
565 # and either have a partial response or no
565 # and either have a partial response or no
566 # response at all. There's really nothing
566 # response at all. There's really nothing
567 # meaningful we can do here.
567 # meaningful we can do here.
568 raise HTTPStateError(
568 raise HTTPStateError(
569 'Connection appears closed after '
569 'Connection appears closed after '
570 'some request data was written, but the '
570 'some request data was written, but the '
571 'response was missing or incomplete!')
571 'response was missing or incomplete!')
572 logger.debug('read %d bytes in request()', len(data))
572 logger.debug('read %d bytes in request()', len(data))
573 if response is None:
573 if response is None:
574 response = self.response_class(r[0], self.timeout, method)
574 response = self.response_class(r[0], self.timeout, method)
575 response._load_response(data)
575 response._load_response(data)
576 # Jump to the next select() call so we load more
576 # Jump to the next select() call so we load more
577 # data if the server is still sending us content.
577 # data if the server is still sending us content.
578 continue
578 continue
579 except socket.error, e:
579 except socket.error, e:
580 if e[0] != errno.EPIPE and not was_first:
580 if e[0] != errno.EPIPE and not was_first:
581 raise
581 raise
582
582
583 # outgoing data
583 # outgoing data
584 if w and out:
584 if w and out:
585 try:
585 try:
586 if getattr(out, 'read', False):
586 if getattr(out, 'read', False):
587 data = out.read(OUTGOING_BUFFER_SIZE)
587 data = out.read(OUTGOING_BUFFER_SIZE)
588 if not data:
588 if not data:
589 continue
589 continue
590 if len(data) < OUTGOING_BUFFER_SIZE:
590 if len(data) < OUTGOING_BUFFER_SIZE:
591 if chunked:
591 if chunked:
592 body = '0' + EOL + EOL
592 body = '0' + EOL + EOL
593 else:
593 else:
594 body = None
594 body = None
595 if chunked:
595 if chunked:
596 out = hex(len(data))[2:] + EOL + data + EOL
596 out = hex(len(data))[2:] + EOL + data + EOL
597 else:
597 else:
598 out = data
598 out = data
599 amt = w[0].send(out)
599 amt = w[0].send(out)
600 except socket.error, e:
600 except socket.error, e:
601 if e[0] == socket.SSL_ERROR_WANT_WRITE and self.ssl:
601 if e[0] == socket.SSL_ERROR_WANT_WRITE and self.ssl:
602 # This means that SSL hasn't flushed its buffer into
602 # This means that SSL hasn't flushed its buffer into
603 # the socket yet.
603 # the socket yet.
604 # TODO: find a way to block on ssl flushing its buffer
604 # TODO: find a way to block on ssl flushing its buffer
605 # similar to selecting on a raw socket.
605 # similar to selecting on a raw socket.
606 continue
606 continue
607 elif (e[0] not in (errno.ECONNRESET, errno.EPIPE)
607 elif (e[0] not in (errno.ECONNRESET, errno.EPIPE)
608 and not first):
608 and not first):
609 raise
609 raise
610 reconnect('write')
610 reconnect('write')
611 amt = self.sock.send(out)
611 amt = self.sock.send(out)
612 logger.debug('sent %d', amt)
612 logger.debug('sent %d', amt)
613 first = False
613 first = False
614 # stash data we think we sent in case the socket breaks
614 # stash data we think we sent in case the socket breaks
615 # when we read from it
615 # when we read from it
616 if was_first:
616 if was_first:
617 sent_data = out[:amt]
617 sent_data = out[:amt]
618 if out is body:
618 if out is body:
619 body = out[amt:]
619 body = out[amt:]
620 else:
620 else:
621 outgoing_headers = out[amt:]
621 outgoing_headers = out[amt:]
622
622
623 # close if the server response said to or responded before eating
623 # close if the server response said to or responded before eating
624 # the whole request
624 # the whole request
625 if response is None:
625 if response is None:
626 response = self.response_class(self.sock, self.timeout, method)
626 response = self.response_class(self.sock, self.timeout, method)
627 complete = response.complete()
627 complete = response.complete()
628 data_left = bool(outgoing_headers or body)
628 data_left = bool(outgoing_headers or body)
629 if data_left:
629 if data_left:
630 logger.info('stopped sending request early, '
630 logger.info('stopped sending request early, '
631 'will close the socket to be safe.')
631 'will close the socket to be safe.')
632 response.will_close = True
632 response.will_close = True
633 if response.will_close:
633 if response.will_close:
634 # The socket will be closed by the response, so we disown
634 # The socket will be closed by the response, so we disown
635 # the socket
635 # the socket
636 self.sock = None
636 self.sock = None
637 self._current_response = response
637 self._current_response = response
638
638
639 def getresponse(self):
639 def getresponse(self):
640 if self._current_response is None:
640 if self._current_response is None:
641 raise httplib.ResponseNotReady()
641 raise httplib.ResponseNotReady()
642 r = self._current_response
642 r = self._current_response
643 while r.headers is None:
643 while r.headers is None:
644 if not r._select() and not r.complete():
644 if not r._select() and not r.complete():
645 raise _readers.HTTPRemoteClosedError()
645 raise _readers.HTTPRemoteClosedError()
646 if r.will_close:
646 if r.will_close:
647 self.sock = None
647 self.sock = None
648 self._current_response = None
648 self._current_response = None
649 elif r.complete():
649 elif r.complete():
650 self._current_response = None
650 self._current_response = None
651 else:
651 else:
652 self._current_response_taken = True
652 self._current_response_taken = True
653 return r
653 return r
654
654
655
655
656 class HTTPTimeoutException(httplib.HTTPException):
656 class HTTPTimeoutException(httplib.HTTPException):
657 """A timeout occurred while waiting on the server."""
657 """A timeout occurred while waiting on the server."""
658
658
659
659
660 class BadRequestData(httplib.HTTPException):
660 class BadRequestData(httplib.HTTPException):
661 """Request body object has neither __len__ nor read."""
661 """Request body object has neither __len__ nor read."""
662
662
663
663
664 class HTTPProxyConnectFailedException(httplib.HTTPException):
664 class HTTPProxyConnectFailedException(httplib.HTTPException):
665 """Connecting to the HTTP proxy failed."""
665 """Connecting to the HTTP proxy failed."""
666
666
667
667
668 class HTTPStateError(httplib.HTTPException):
668 class HTTPStateError(httplib.HTTPException):
669 """Invalid internal state encountered."""
669 """Invalid internal state encountered."""
670
670
671 # Forward this exception type from _readers since it needs to be part
671 # Forward this exception type from _readers since it needs to be part
672 # of the public API.
672 # of the public API.
673 HTTPRemoteClosedError = _readers.HTTPRemoteClosedError
673 HTTPRemoteClosedError = _readers.HTTPRemoteClosedError
674 # no-check-code
674 # no-check-code
General Comments 0
You need to be logged in to leave comments. Login now