##// END OF EJS Templates
keepalive: remove useless parentheses around exception type
Gregory Szorc -
r28278:b1b22185 default
parent child Browse files
Show More
@@ -1,753 +1,753 b''
1 # This library is free software; you can redistribute it and/or
1 # This library is free software; you can redistribute it and/or
2 # modify it under the terms of the GNU Lesser General Public
2 # modify it under the terms of the GNU Lesser General Public
3 # License as published by the Free Software Foundation; either
3 # License as published by the Free Software Foundation; either
4 # version 2.1 of the License, or (at your option) any later version.
4 # version 2.1 of the License, or (at your option) any later version.
5 #
5 #
6 # This library is distributed in the hope that it will be useful,
6 # This library is distributed in the hope that it will be useful,
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9 # Lesser General Public License for more details.
9 # Lesser General Public License for more details.
10 #
10 #
11 # You should have received a copy of the GNU Lesser General Public
11 # You should have received a copy of the GNU Lesser General Public
12 # License along with this library; if not, see
12 # License along with this library; if not, see
13 # <http://www.gnu.org/licenses/>.
13 # <http://www.gnu.org/licenses/>.
14
14
15 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
15 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
16 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
16 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
17
17
18 # Modified by Benoit Boissinot:
18 # Modified by Benoit Boissinot:
19 # - fix for digest auth (inspired from urllib2.py @ Python v2.4)
19 # - fix for digest auth (inspired from urllib2.py @ Python v2.4)
20 # Modified by Dirkjan Ochtman:
20 # Modified by Dirkjan Ochtman:
21 # - import md5 function from a local util module
21 # - import md5 function from a local util module
22 # Modified by Augie Fackler:
22 # Modified by Augie Fackler:
23 # - add safesend method and use it to prevent broken pipe errors
23 # - add safesend method and use it to prevent broken pipe errors
24 # on large POST requests
24 # on large POST requests
25
25
26 """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
26 """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
27
27
28 >>> import urllib2
28 >>> import urllib2
29 >>> from keepalive import HTTPHandler
29 >>> from keepalive import HTTPHandler
30 >>> keepalive_handler = HTTPHandler()
30 >>> keepalive_handler = HTTPHandler()
31 >>> opener = urllib2.build_opener(keepalive_handler)
31 >>> opener = urllib2.build_opener(keepalive_handler)
32 >>> urllib2.install_opener(opener)
32 >>> urllib2.install_opener(opener)
33 >>>
33 >>>
34 >>> fo = urllib2.urlopen('http://www.python.org')
34 >>> fo = urllib2.urlopen('http://www.python.org')
35
35
36 If a connection to a given host is requested, and all of the existing
36 If a connection to a given host is requested, and all of the existing
37 connections are still in use, another connection will be opened. If
37 connections are still in use, another connection will be opened. If
38 the handler tries to use an existing connection but it fails in some
38 the handler tries to use an existing connection but it fails in some
39 way, it will be closed and removed from the pool.
39 way, it will be closed and removed from the pool.
40
40
41 To remove the handler, simply re-run build_opener with no arguments, and
41 To remove the handler, simply re-run build_opener with no arguments, and
42 install that opener.
42 install that opener.
43
43
44 You can explicitly close connections by using the close_connection()
44 You can explicitly close connections by using the close_connection()
45 method of the returned file-like object (described below) or you can
45 method of the returned file-like object (described below) or you can
46 use the handler methods:
46 use the handler methods:
47
47
48 close_connection(host)
48 close_connection(host)
49 close_all()
49 close_all()
50 open_connections()
50 open_connections()
51
51
52 NOTE: using the close_connection and close_all methods of the handler
52 NOTE: using the close_connection and close_all methods of the handler
53 should be done with care when using multiple threads.
53 should be done with care when using multiple threads.
54 * there is nothing that prevents another thread from creating new
54 * there is nothing that prevents another thread from creating new
55 connections immediately after connections are closed
55 connections immediately after connections are closed
56 * no checks are done to prevent in-use connections from being closed
56 * no checks are done to prevent in-use connections from being closed
57
57
58 >>> keepalive_handler.close_all()
58 >>> keepalive_handler.close_all()
59
59
60 EXTRA ATTRIBUTES AND METHODS
60 EXTRA ATTRIBUTES AND METHODS
61
61
62 Upon a status of 200, the object returned has a few additional
62 Upon a status of 200, the object returned has a few additional
63 attributes and methods, which should not be used if you want to
63 attributes and methods, which should not be used if you want to
64 remain consistent with the normal urllib2-returned objects:
64 remain consistent with the normal urllib2-returned objects:
65
65
66 close_connection() - close the connection to the host
66 close_connection() - close the connection to the host
67 readlines() - you know, readlines()
67 readlines() - you know, readlines()
68 status - the return status (i.e. 404)
68 status - the return status (i.e. 404)
69 reason - english translation of status (i.e. 'File not found')
69 reason - english translation of status (i.e. 'File not found')
70
70
71 If you want the best of both worlds, use this inside an
71 If you want the best of both worlds, use this inside an
72 AttributeError-catching try:
72 AttributeError-catching try:
73
73
74 >>> try: status = fo.status
74 >>> try: status = fo.status
75 >>> except AttributeError: status = None
75 >>> except AttributeError: status = None
76
76
77 Unfortunately, these are ONLY there if status == 200, so it's not
77 Unfortunately, these are ONLY there if status == 200, so it's not
78 easy to distinguish between non-200 responses. The reason is that
78 easy to distinguish between non-200 responses. The reason is that
79 urllib2 tries to do clever things with error codes 301, 302, 401,
79 urllib2 tries to do clever things with error codes 301, 302, 401,
80 and 407, and it wraps the object upon return.
80 and 407, and it wraps the object upon return.
81
81
82 For python versions earlier than 2.4, you can avoid this fancy error
82 For python versions earlier than 2.4, you can avoid this fancy error
83 handling by setting the module-level global HANDLE_ERRORS to zero.
83 handling by setting the module-level global HANDLE_ERRORS to zero.
84 You see, prior to 2.4, it's the HTTP Handler's job to determine what
84 You see, prior to 2.4, it's the HTTP Handler's job to determine what
85 to handle specially, and what to just pass up. HANDLE_ERRORS == 0
85 to handle specially, and what to just pass up. HANDLE_ERRORS == 0
86 means "pass everything up". In python 2.4, however, this job no
86 means "pass everything up". In python 2.4, however, this job no
87 longer belongs to the HTTP Handler and is now done by a NEW handler,
87 longer belongs to the HTTP Handler and is now done by a NEW handler,
88 HTTPErrorProcessor. Here's the bottom line:
88 HTTPErrorProcessor. Here's the bottom line:
89
89
90 python version < 2.4
90 python version < 2.4
91 HANDLE_ERRORS == 1 (default) pass up 200, treat the rest as
91 HANDLE_ERRORS == 1 (default) pass up 200, treat the rest as
92 errors
92 errors
93 HANDLE_ERRORS == 0 pass everything up, error processing is
93 HANDLE_ERRORS == 0 pass everything up, error processing is
94 left to the calling code
94 left to the calling code
95 python version >= 2.4
95 python version >= 2.4
96 HANDLE_ERRORS == 1 pass up 200, treat the rest as errors
96 HANDLE_ERRORS == 1 pass up 200, treat the rest as errors
97 HANDLE_ERRORS == 0 (default) pass everything up, let the
97 HANDLE_ERRORS == 0 (default) pass everything up, let the
98 other handlers (specifically,
98 other handlers (specifically,
99 HTTPErrorProcessor) decide what to do
99 HTTPErrorProcessor) decide what to do
100
100
101 In practice, setting the variable either way makes little difference
101 In practice, setting the variable either way makes little difference
102 in python 2.4, so for the most consistent behavior across versions,
102 in python 2.4, so for the most consistent behavior across versions,
103 you probably just want to use the defaults, which will give you
103 you probably just want to use the defaults, which will give you
104 exceptions on errors.
104 exceptions on errors.
105
105
106 """
106 """
107
107
108 # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $
108 # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $
109
109
110 from __future__ import absolute_import, print_function
110 from __future__ import absolute_import, print_function
111
111
112 import errno
112 import errno
113 import httplib
113 import httplib
114 import socket
114 import socket
115 import sys
115 import sys
116 import thread
116 import thread
117 import urllib2
117 import urllib2
118
118
119 DEBUG = None
119 DEBUG = None
120
120
121 if sys.version_info < (2, 4):
121 if sys.version_info < (2, 4):
122 HANDLE_ERRORS = 1
122 HANDLE_ERRORS = 1
123 else: HANDLE_ERRORS = 0
123 else: HANDLE_ERRORS = 0
124
124
125 class ConnectionManager(object):
125 class ConnectionManager(object):
126 """
126 """
127 The connection manager must be able to:
127 The connection manager must be able to:
128 * keep track of all existing
128 * keep track of all existing
129 """
129 """
130 def __init__(self):
130 def __init__(self):
131 self._lock = thread.allocate_lock()
131 self._lock = thread.allocate_lock()
132 self._hostmap = {} # map hosts to a list of connections
132 self._hostmap = {} # map hosts to a list of connections
133 self._connmap = {} # map connections to host
133 self._connmap = {} # map connections to host
134 self._readymap = {} # map connection to ready state
134 self._readymap = {} # map connection to ready state
135
135
136 def add(self, host, connection, ready):
136 def add(self, host, connection, ready):
137 self._lock.acquire()
137 self._lock.acquire()
138 try:
138 try:
139 if host not in self._hostmap:
139 if host not in self._hostmap:
140 self._hostmap[host] = []
140 self._hostmap[host] = []
141 self._hostmap[host].append(connection)
141 self._hostmap[host].append(connection)
142 self._connmap[connection] = host
142 self._connmap[connection] = host
143 self._readymap[connection] = ready
143 self._readymap[connection] = ready
144 finally:
144 finally:
145 self._lock.release()
145 self._lock.release()
146
146
147 def remove(self, connection):
147 def remove(self, connection):
148 self._lock.acquire()
148 self._lock.acquire()
149 try:
149 try:
150 try:
150 try:
151 host = self._connmap[connection]
151 host = self._connmap[connection]
152 except KeyError:
152 except KeyError:
153 pass
153 pass
154 else:
154 else:
155 del self._connmap[connection]
155 del self._connmap[connection]
156 del self._readymap[connection]
156 del self._readymap[connection]
157 self._hostmap[host].remove(connection)
157 self._hostmap[host].remove(connection)
158 if not self._hostmap[host]: del self._hostmap[host]
158 if not self._hostmap[host]: del self._hostmap[host]
159 finally:
159 finally:
160 self._lock.release()
160 self._lock.release()
161
161
162 def set_ready(self, connection, ready):
162 def set_ready(self, connection, ready):
163 try:
163 try:
164 self._readymap[connection] = ready
164 self._readymap[connection] = ready
165 except KeyError:
165 except KeyError:
166 pass
166 pass
167
167
168 def get_ready_conn(self, host):
168 def get_ready_conn(self, host):
169 conn = None
169 conn = None
170 self._lock.acquire()
170 self._lock.acquire()
171 try:
171 try:
172 if host in self._hostmap:
172 if host in self._hostmap:
173 for c in self._hostmap[host]:
173 for c in self._hostmap[host]:
174 if self._readymap[c]:
174 if self._readymap[c]:
175 self._readymap[c] = 0
175 self._readymap[c] = 0
176 conn = c
176 conn = c
177 break
177 break
178 finally:
178 finally:
179 self._lock.release()
179 self._lock.release()
180 return conn
180 return conn
181
181
182 def get_all(self, host=None):
182 def get_all(self, host=None):
183 if host:
183 if host:
184 return list(self._hostmap.get(host, []))
184 return list(self._hostmap.get(host, []))
185 else:
185 else:
186 return dict(self._hostmap)
186 return dict(self._hostmap)
187
187
188 class KeepAliveHandler(object):
188 class KeepAliveHandler(object):
189 def __init__(self):
189 def __init__(self):
190 self._cm = ConnectionManager()
190 self._cm = ConnectionManager()
191
191
192 #### Connection Management
192 #### Connection Management
193 def open_connections(self):
193 def open_connections(self):
194 """return a list of connected hosts and the number of connections
194 """return a list of connected hosts and the number of connections
195 to each. [('foo.com:80', 2), ('bar.org', 1)]"""
195 to each. [('foo.com:80', 2), ('bar.org', 1)]"""
196 return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
196 return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
197
197
198 def close_connection(self, host):
198 def close_connection(self, host):
199 """close connection(s) to <host>
199 """close connection(s) to <host>
200 host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
200 host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
201 no error occurs if there is no connection to that host."""
201 no error occurs if there is no connection to that host."""
202 for h in self._cm.get_all(host):
202 for h in self._cm.get_all(host):
203 self._cm.remove(h)
203 self._cm.remove(h)
204 h.close()
204 h.close()
205
205
206 def close_all(self):
206 def close_all(self):
207 """close all open connections"""
207 """close all open connections"""
208 for host, conns in self._cm.get_all().iteritems():
208 for host, conns in self._cm.get_all().iteritems():
209 for h in conns:
209 for h in conns:
210 self._cm.remove(h)
210 self._cm.remove(h)
211 h.close()
211 h.close()
212
212
213 def _request_closed(self, request, host, connection):
213 def _request_closed(self, request, host, connection):
214 """tells us that this request is now closed and that the
214 """tells us that this request is now closed and that the
215 connection is ready for another request"""
215 connection is ready for another request"""
216 self._cm.set_ready(connection, 1)
216 self._cm.set_ready(connection, 1)
217
217
218 def _remove_connection(self, host, connection, close=0):
218 def _remove_connection(self, host, connection, close=0):
219 if close:
219 if close:
220 connection.close()
220 connection.close()
221 self._cm.remove(connection)
221 self._cm.remove(connection)
222
222
223 #### Transaction Execution
223 #### Transaction Execution
224 def http_open(self, req):
224 def http_open(self, req):
225 return self.do_open(HTTPConnection, req)
225 return self.do_open(HTTPConnection, req)
226
226
227 def do_open(self, http_class, req):
227 def do_open(self, http_class, req):
228 host = req.get_host()
228 host = req.get_host()
229 if not host:
229 if not host:
230 raise urllib2.URLError('no host given')
230 raise urllib2.URLError('no host given')
231
231
232 try:
232 try:
233 h = self._cm.get_ready_conn(host)
233 h = self._cm.get_ready_conn(host)
234 while h:
234 while h:
235 r = self._reuse_connection(h, req, host)
235 r = self._reuse_connection(h, req, host)
236
236
237 # if this response is non-None, then it worked and we're
237 # if this response is non-None, then it worked and we're
238 # done. Break out, skipping the else block.
238 # done. Break out, skipping the else block.
239 if r:
239 if r:
240 break
240 break
241
241
242 # connection is bad - possibly closed by server
242 # connection is bad - possibly closed by server
243 # discard it and ask for the next free connection
243 # discard it and ask for the next free connection
244 h.close()
244 h.close()
245 self._cm.remove(h)
245 self._cm.remove(h)
246 h = self._cm.get_ready_conn(host)
246 h = self._cm.get_ready_conn(host)
247 else:
247 else:
248 # no (working) free connections were found. Create a new one.
248 # no (working) free connections were found. Create a new one.
249 h = http_class(host)
249 h = http_class(host)
250 if DEBUG:
250 if DEBUG:
251 DEBUG.info("creating new connection to %s (%d)",
251 DEBUG.info("creating new connection to %s (%d)",
252 host, id(h))
252 host, id(h))
253 self._cm.add(host, h, 0)
253 self._cm.add(host, h, 0)
254 self._start_transaction(h, req)
254 self._start_transaction(h, req)
255 r = h.getresponse()
255 r = h.getresponse()
256 except (socket.error, httplib.HTTPException) as err:
256 except (socket.error, httplib.HTTPException) as err:
257 raise urllib2.URLError(err)
257 raise urllib2.URLError(err)
258
258
259 # if not a persistent connection, don't try to reuse it
259 # if not a persistent connection, don't try to reuse it
260 if r.will_close:
260 if r.will_close:
261 self._cm.remove(h)
261 self._cm.remove(h)
262
262
263 if DEBUG:
263 if DEBUG:
264 DEBUG.info("STATUS: %s, %s", r.status, r.reason)
264 DEBUG.info("STATUS: %s, %s", r.status, r.reason)
265 r._handler = self
265 r._handler = self
266 r._host = host
266 r._host = host
267 r._url = req.get_full_url()
267 r._url = req.get_full_url()
268 r._connection = h
268 r._connection = h
269 r.code = r.status
269 r.code = r.status
270 r.headers = r.msg
270 r.headers = r.msg
271 r.msg = r.reason
271 r.msg = r.reason
272
272
273 if r.status == 200 or not HANDLE_ERRORS:
273 if r.status == 200 or not HANDLE_ERRORS:
274 return r
274 return r
275 else:
275 else:
276 return self.parent.error('http', req, r,
276 return self.parent.error('http', req, r,
277 r.status, r.msg, r.headers)
277 r.status, r.msg, r.headers)
278
278
279 def _reuse_connection(self, h, req, host):
279 def _reuse_connection(self, h, req, host):
280 """start the transaction with a re-used connection
280 """start the transaction with a re-used connection
281 return a response object (r) upon success or None on failure.
281 return a response object (r) upon success or None on failure.
282 This DOES not close or remove bad connections in cases where
282 This DOES not close or remove bad connections in cases where
283 it returns. However, if an unexpected exception occurs, it
283 it returns. However, if an unexpected exception occurs, it
284 will close and remove the connection before re-raising.
284 will close and remove the connection before re-raising.
285 """
285 """
286 try:
286 try:
287 self._start_transaction(h, req)
287 self._start_transaction(h, req)
288 r = h.getresponse()
288 r = h.getresponse()
289 # note: just because we got something back doesn't mean it
289 # note: just because we got something back doesn't mean it
290 # worked. We'll check the version below, too.
290 # worked. We'll check the version below, too.
291 except (socket.error, httplib.HTTPException):
291 except (socket.error, httplib.HTTPException):
292 r = None
292 r = None
293 except: # re-raises
293 except: # re-raises
294 # adding this block just in case we've missed
294 # adding this block just in case we've missed
295 # something we will still raise the exception, but
295 # something we will still raise the exception, but
296 # lets try and close the connection and remove it
296 # lets try and close the connection and remove it
297 # first. We previously got into a nasty loop
297 # first. We previously got into a nasty loop
298 # where an exception was uncaught, and so the
298 # where an exception was uncaught, and so the
299 # connection stayed open. On the next try, the
299 # connection stayed open. On the next try, the
300 # same exception was raised, etc. The trade-off is
300 # same exception was raised, etc. The trade-off is
301 # that it's now possible this call will raise
301 # that it's now possible this call will raise
302 # a DIFFERENT exception
302 # a DIFFERENT exception
303 if DEBUG:
303 if DEBUG:
304 DEBUG.error("unexpected exception - closing "
304 DEBUG.error("unexpected exception - closing "
305 "connection to %s (%d)", host, id(h))
305 "connection to %s (%d)", host, id(h))
306 self._cm.remove(h)
306 self._cm.remove(h)
307 h.close()
307 h.close()
308 raise
308 raise
309
309
310 if r is None or r.version == 9:
310 if r is None or r.version == 9:
311 # httplib falls back to assuming HTTP 0.9 if it gets a
311 # httplib falls back to assuming HTTP 0.9 if it gets a
312 # bad header back. This is most likely to happen if
312 # bad header back. This is most likely to happen if
313 # the socket has been closed by the server since we
313 # the socket has been closed by the server since we
314 # last used the connection.
314 # last used the connection.
315 if DEBUG:
315 if DEBUG:
316 DEBUG.info("failed to re-use connection to %s (%d)",
316 DEBUG.info("failed to re-use connection to %s (%d)",
317 host, id(h))
317 host, id(h))
318 r = None
318 r = None
319 else:
319 else:
320 if DEBUG:
320 if DEBUG:
321 DEBUG.info("re-using connection to %s (%d)", host, id(h))
321 DEBUG.info("re-using connection to %s (%d)", host, id(h))
322
322
323 return r
323 return r
324
324
325 def _start_transaction(self, h, req):
325 def _start_transaction(self, h, req):
326 # What follows mostly reimplements HTTPConnection.request()
326 # What follows mostly reimplements HTTPConnection.request()
327 # except it adds self.parent.addheaders in the mix.
327 # except it adds self.parent.addheaders in the mix.
328 headers = req.headers.copy()
328 headers = req.headers.copy()
329 if sys.version_info >= (2, 4):
329 if sys.version_info >= (2, 4):
330 headers.update(req.unredirected_hdrs)
330 headers.update(req.unredirected_hdrs)
331 headers.update(self.parent.addheaders)
331 headers.update(self.parent.addheaders)
332 headers = dict((n.lower(), v) for n, v in headers.items())
332 headers = dict((n.lower(), v) for n, v in headers.items())
333 skipheaders = {}
333 skipheaders = {}
334 for n in ('host', 'accept-encoding'):
334 for n in ('host', 'accept-encoding'):
335 if n in headers:
335 if n in headers:
336 skipheaders['skip_' + n.replace('-', '_')] = 1
336 skipheaders['skip_' + n.replace('-', '_')] = 1
337 try:
337 try:
338 if req.has_data():
338 if req.has_data():
339 data = req.get_data()
339 data = req.get_data()
340 h.putrequest('POST', req.get_selector(), **skipheaders)
340 h.putrequest('POST', req.get_selector(), **skipheaders)
341 if 'content-type' not in headers:
341 if 'content-type' not in headers:
342 h.putheader('Content-type',
342 h.putheader('Content-type',
343 'application/x-www-form-urlencoded')
343 'application/x-www-form-urlencoded')
344 if 'content-length' not in headers:
344 if 'content-length' not in headers:
345 h.putheader('Content-length', '%d' % len(data))
345 h.putheader('Content-length', '%d' % len(data))
346 else:
346 else:
347 h.putrequest('GET', req.get_selector(), **skipheaders)
347 h.putrequest('GET', req.get_selector(), **skipheaders)
348 except (socket.error) as err:
348 except socket.error as err:
349 raise urllib2.URLError(err)
349 raise urllib2.URLError(err)
350 for k, v in headers.items():
350 for k, v in headers.items():
351 h.putheader(k, v)
351 h.putheader(k, v)
352 h.endheaders()
352 h.endheaders()
353 if req.has_data():
353 if req.has_data():
354 h.send(data)
354 h.send(data)
355
355
356 class HTTPHandler(KeepAliveHandler, urllib2.HTTPHandler):
356 class HTTPHandler(KeepAliveHandler, urllib2.HTTPHandler):
357 pass
357 pass
358
358
359 class HTTPResponse(httplib.HTTPResponse):
359 class HTTPResponse(httplib.HTTPResponse):
360 # we need to subclass HTTPResponse in order to
360 # we need to subclass HTTPResponse in order to
361 # 1) add readline() and readlines() methods
361 # 1) add readline() and readlines() methods
362 # 2) add close_connection() methods
362 # 2) add close_connection() methods
363 # 3) add info() and geturl() methods
363 # 3) add info() and geturl() methods
364
364
365 # in order to add readline(), read must be modified to deal with a
365 # in order to add readline(), read must be modified to deal with a
366 # buffer. example: readline must read a buffer and then spit back
366 # buffer. example: readline must read a buffer and then spit back
367 # one line at a time. The only real alternative is to read one
367 # one line at a time. The only real alternative is to read one
368 # BYTE at a time (ick). Once something has been read, it can't be
368 # BYTE at a time (ick). Once something has been read, it can't be
369 # put back (ok, maybe it can, but that's even uglier than this),
369 # put back (ok, maybe it can, but that's even uglier than this),
370 # so if you THEN do a normal read, you must first take stuff from
370 # so if you THEN do a normal read, you must first take stuff from
371 # the buffer.
371 # the buffer.
372
372
373 # the read method wraps the original to accommodate buffering,
373 # the read method wraps the original to accommodate buffering,
374 # although read() never adds to the buffer.
374 # although read() never adds to the buffer.
375 # Both readline and readlines have been stolen with almost no
375 # Both readline and readlines have been stolen with almost no
376 # modification from socket.py
376 # modification from socket.py
377
377
378
378
379 def __init__(self, sock, debuglevel=0, strict=0, method=None):
379 def __init__(self, sock, debuglevel=0, strict=0, method=None):
380 httplib.HTTPResponse.__init__(self, sock, debuglevel, method)
380 httplib.HTTPResponse.__init__(self, sock, debuglevel, method)
381 self.fileno = sock.fileno
381 self.fileno = sock.fileno
382 self.code = None
382 self.code = None
383 self._rbuf = ''
383 self._rbuf = ''
384 self._rbufsize = 8096
384 self._rbufsize = 8096
385 self._handler = None # inserted by the handler later
385 self._handler = None # inserted by the handler later
386 self._host = None # (same)
386 self._host = None # (same)
387 self._url = None # (same)
387 self._url = None # (same)
388 self._connection = None # (same)
388 self._connection = None # (same)
389
389
390 _raw_read = httplib.HTTPResponse.read
390 _raw_read = httplib.HTTPResponse.read
391
391
392 def close(self):
392 def close(self):
393 if self.fp:
393 if self.fp:
394 self.fp.close()
394 self.fp.close()
395 self.fp = None
395 self.fp = None
396 if self._handler:
396 if self._handler:
397 self._handler._request_closed(self, self._host,
397 self._handler._request_closed(self, self._host,
398 self._connection)
398 self._connection)
399
399
400 def close_connection(self):
400 def close_connection(self):
401 self._handler._remove_connection(self._host, self._connection, close=1)
401 self._handler._remove_connection(self._host, self._connection, close=1)
402 self.close()
402 self.close()
403
403
404 def info(self):
404 def info(self):
405 return self.headers
405 return self.headers
406
406
407 def geturl(self):
407 def geturl(self):
408 return self._url
408 return self._url
409
409
410 def read(self, amt=None):
410 def read(self, amt=None):
411 # the _rbuf test is only in this first if for speed. It's not
411 # the _rbuf test is only in this first if for speed. It's not
412 # logically necessary
412 # logically necessary
413 if self._rbuf and not amt is None:
413 if self._rbuf and not amt is None:
414 L = len(self._rbuf)
414 L = len(self._rbuf)
415 if amt > L:
415 if amt > L:
416 amt -= L
416 amt -= L
417 else:
417 else:
418 s = self._rbuf[:amt]
418 s = self._rbuf[:amt]
419 self._rbuf = self._rbuf[amt:]
419 self._rbuf = self._rbuf[amt:]
420 return s
420 return s
421
421
422 s = self._rbuf + self._raw_read(amt)
422 s = self._rbuf + self._raw_read(amt)
423 self._rbuf = ''
423 self._rbuf = ''
424 return s
424 return s
425
425
426 # stolen from Python SVN #68532 to fix issue1088
426 # stolen from Python SVN #68532 to fix issue1088
427 def _read_chunked(self, amt):
427 def _read_chunked(self, amt):
428 chunk_left = self.chunk_left
428 chunk_left = self.chunk_left
429 value = ''
429 value = ''
430
430
431 # XXX This accumulates chunks by repeated string concatenation,
431 # XXX This accumulates chunks by repeated string concatenation,
432 # which is not efficient as the number or size of chunks gets big.
432 # which is not efficient as the number or size of chunks gets big.
433 while True:
433 while True:
434 if chunk_left is None:
434 if chunk_left is None:
435 line = self.fp.readline()
435 line = self.fp.readline()
436 i = line.find(';')
436 i = line.find(';')
437 if i >= 0:
437 if i >= 0:
438 line = line[:i] # strip chunk-extensions
438 line = line[:i] # strip chunk-extensions
439 try:
439 try:
440 chunk_left = int(line, 16)
440 chunk_left = int(line, 16)
441 except ValueError:
441 except ValueError:
442 # close the connection as protocol synchronization is
442 # close the connection as protocol synchronization is
443 # probably lost
443 # probably lost
444 self.close()
444 self.close()
445 raise httplib.IncompleteRead(value)
445 raise httplib.IncompleteRead(value)
446 if chunk_left == 0:
446 if chunk_left == 0:
447 break
447 break
448 if amt is None:
448 if amt is None:
449 value += self._safe_read(chunk_left)
449 value += self._safe_read(chunk_left)
450 elif amt < chunk_left:
450 elif amt < chunk_left:
451 value += self._safe_read(amt)
451 value += self._safe_read(amt)
452 self.chunk_left = chunk_left - amt
452 self.chunk_left = chunk_left - amt
453 return value
453 return value
454 elif amt == chunk_left:
454 elif amt == chunk_left:
455 value += self._safe_read(amt)
455 value += self._safe_read(amt)
456 self._safe_read(2) # toss the CRLF at the end of the chunk
456 self._safe_read(2) # toss the CRLF at the end of the chunk
457 self.chunk_left = None
457 self.chunk_left = None
458 return value
458 return value
459 else:
459 else:
460 value += self._safe_read(chunk_left)
460 value += self._safe_read(chunk_left)
461 amt -= chunk_left
461 amt -= chunk_left
462
462
463 # we read the whole chunk, get another
463 # we read the whole chunk, get another
464 self._safe_read(2) # toss the CRLF at the end of the chunk
464 self._safe_read(2) # toss the CRLF at the end of the chunk
465 chunk_left = None
465 chunk_left = None
466
466
467 # read and discard trailer up to the CRLF terminator
467 # read and discard trailer up to the CRLF terminator
468 ### note: we shouldn't have any trailers!
468 ### note: we shouldn't have any trailers!
469 while True:
469 while True:
470 line = self.fp.readline()
470 line = self.fp.readline()
471 if not line:
471 if not line:
472 # a vanishingly small number of sites EOF without
472 # a vanishingly small number of sites EOF without
473 # sending the trailer
473 # sending the trailer
474 break
474 break
475 if line == '\r\n':
475 if line == '\r\n':
476 break
476 break
477
477
478 # we read everything; close the "file"
478 # we read everything; close the "file"
479 self.close()
479 self.close()
480
480
481 return value
481 return value
482
482
483 def readline(self, limit=-1):
483 def readline(self, limit=-1):
484 i = self._rbuf.find('\n')
484 i = self._rbuf.find('\n')
485 while i < 0 and not (0 < limit <= len(self._rbuf)):
485 while i < 0 and not (0 < limit <= len(self._rbuf)):
486 new = self._raw_read(self._rbufsize)
486 new = self._raw_read(self._rbufsize)
487 if not new:
487 if not new:
488 break
488 break
489 i = new.find('\n')
489 i = new.find('\n')
490 if i >= 0:
490 if i >= 0:
491 i = i + len(self._rbuf)
491 i = i + len(self._rbuf)
492 self._rbuf = self._rbuf + new
492 self._rbuf = self._rbuf + new
493 if i < 0:
493 if i < 0:
494 i = len(self._rbuf)
494 i = len(self._rbuf)
495 else:
495 else:
496 i = i + 1
496 i = i + 1
497 if 0 <= limit < len(self._rbuf):
497 if 0 <= limit < len(self._rbuf):
498 i = limit
498 i = limit
499 data, self._rbuf = self._rbuf[:i], self._rbuf[i:]
499 data, self._rbuf = self._rbuf[:i], self._rbuf[i:]
500 return data
500 return data
501
501
502 def readlines(self, sizehint=0):
502 def readlines(self, sizehint=0):
503 total = 0
503 total = 0
504 list = []
504 list = []
505 while True:
505 while True:
506 line = self.readline()
506 line = self.readline()
507 if not line:
507 if not line:
508 break
508 break
509 list.append(line)
509 list.append(line)
510 total += len(line)
510 total += len(line)
511 if sizehint and total >= sizehint:
511 if sizehint and total >= sizehint:
512 break
512 break
513 return list
513 return list
514
514
515 def safesend(self, str):
515 def safesend(self, str):
516 """Send `str' to the server.
516 """Send `str' to the server.
517
517
518 Shamelessly ripped off from httplib to patch a bad behavior.
518 Shamelessly ripped off from httplib to patch a bad behavior.
519 """
519 """
520 # _broken_pipe_resp is an attribute we set in this function
520 # _broken_pipe_resp is an attribute we set in this function
521 # if the socket is closed while we're sending data but
521 # if the socket is closed while we're sending data but
522 # the server sent us a response before hanging up.
522 # the server sent us a response before hanging up.
523 # In that case, we want to pretend to send the rest of the
523 # In that case, we want to pretend to send the rest of the
524 # outgoing data, and then let the user use getresponse()
524 # outgoing data, and then let the user use getresponse()
525 # (which we wrap) to get this last response before
525 # (which we wrap) to get this last response before
526 # opening a new socket.
526 # opening a new socket.
527 if getattr(self, '_broken_pipe_resp', None) is not None:
527 if getattr(self, '_broken_pipe_resp', None) is not None:
528 return
528 return
529
529
530 if self.sock is None:
530 if self.sock is None:
531 if self.auto_open:
531 if self.auto_open:
532 self.connect()
532 self.connect()
533 else:
533 else:
534 raise httplib.NotConnected
534 raise httplib.NotConnected
535
535
536 # send the data to the server. if we get a broken pipe, then close
536 # send the data to the server. if we get a broken pipe, then close
537 # the socket. we want to reconnect when somebody tries to send again.
537 # the socket. we want to reconnect when somebody tries to send again.
538 #
538 #
539 # NOTE: we DO propagate the error, though, because we cannot simply
539 # NOTE: we DO propagate the error, though, because we cannot simply
540 # ignore the error... the caller will know if they can retry.
540 # ignore the error... the caller will know if they can retry.
541 if self.debuglevel > 0:
541 if self.debuglevel > 0:
542 print("send:", repr(str))
542 print("send:", repr(str))
543 try:
543 try:
544 blocksize = 8192
544 blocksize = 8192
545 read = getattr(str, 'read', None)
545 read = getattr(str, 'read', None)
546 if read is not None:
546 if read is not None:
547 if self.debuglevel > 0:
547 if self.debuglevel > 0:
548 print("sending a read()able")
548 print("sending a read()able")
549 data = read(blocksize)
549 data = read(blocksize)
550 while data:
550 while data:
551 self.sock.sendall(data)
551 self.sock.sendall(data)
552 data = read(blocksize)
552 data = read(blocksize)
553 else:
553 else:
554 self.sock.sendall(str)
554 self.sock.sendall(str)
555 except socket.error as v:
555 except socket.error as v:
556 reraise = True
556 reraise = True
557 if v[0] == errno.EPIPE: # Broken pipe
557 if v[0] == errno.EPIPE: # Broken pipe
558 if self._HTTPConnection__state == httplib._CS_REQ_SENT:
558 if self._HTTPConnection__state == httplib._CS_REQ_SENT:
559 self._broken_pipe_resp = None
559 self._broken_pipe_resp = None
560 self._broken_pipe_resp = self.getresponse()
560 self._broken_pipe_resp = self.getresponse()
561 reraise = False
561 reraise = False
562 self.close()
562 self.close()
563 if reraise:
563 if reraise:
564 raise
564 raise
565
565
566 def wrapgetresponse(cls):
566 def wrapgetresponse(cls):
567 """Wraps getresponse in cls with a broken-pipe sane version.
567 """Wraps getresponse in cls with a broken-pipe sane version.
568 """
568 """
569 def safegetresponse(self):
569 def safegetresponse(self):
570 # In safesend() we might set the _broken_pipe_resp
570 # In safesend() we might set the _broken_pipe_resp
571 # attribute, in which case the socket has already
571 # attribute, in which case the socket has already
572 # been closed and we just need to give them the response
572 # been closed and we just need to give them the response
573 # back. Otherwise, we use the normal response path.
573 # back. Otherwise, we use the normal response path.
574 r = getattr(self, '_broken_pipe_resp', None)
574 r = getattr(self, '_broken_pipe_resp', None)
575 if r is not None:
575 if r is not None:
576 return r
576 return r
577 return cls.getresponse(self)
577 return cls.getresponse(self)
578 safegetresponse.__doc__ = cls.getresponse.__doc__
578 safegetresponse.__doc__ = cls.getresponse.__doc__
579 return safegetresponse
579 return safegetresponse
580
580
581 class HTTPConnection(httplib.HTTPConnection):
581 class HTTPConnection(httplib.HTTPConnection):
582 # use the modified response class
582 # use the modified response class
583 response_class = HTTPResponse
583 response_class = HTTPResponse
584 send = safesend
584 send = safesend
585 getresponse = wrapgetresponse(httplib.HTTPConnection)
585 getresponse = wrapgetresponse(httplib.HTTPConnection)
586
586
587
587
588 #########################################################################
588 #########################################################################
589 ##### TEST FUNCTIONS
589 ##### TEST FUNCTIONS
590 #########################################################################
590 #########################################################################
591
591
592 def error_handler(url):
592 def error_handler(url):
593 global HANDLE_ERRORS
593 global HANDLE_ERRORS
594 orig = HANDLE_ERRORS
594 orig = HANDLE_ERRORS
595 keepalive_handler = HTTPHandler()
595 keepalive_handler = HTTPHandler()
596 opener = urllib2.build_opener(keepalive_handler)
596 opener = urllib2.build_opener(keepalive_handler)
597 urllib2.install_opener(opener)
597 urllib2.install_opener(opener)
598 pos = {0: 'off', 1: 'on'}
598 pos = {0: 'off', 1: 'on'}
599 for i in (0, 1):
599 for i in (0, 1):
600 print(" fancy error handling %s (HANDLE_ERRORS = %i)" % (pos[i], i))
600 print(" fancy error handling %s (HANDLE_ERRORS = %i)" % (pos[i], i))
601 HANDLE_ERRORS = i
601 HANDLE_ERRORS = i
602 try:
602 try:
603 fo = urllib2.urlopen(url)
603 fo = urllib2.urlopen(url)
604 fo.read()
604 fo.read()
605 fo.close()
605 fo.close()
606 try:
606 try:
607 status, reason = fo.status, fo.reason
607 status, reason = fo.status, fo.reason
608 except AttributeError:
608 except AttributeError:
609 status, reason = None, None
609 status, reason = None, None
610 except IOError as e:
610 except IOError as e:
611 print(" EXCEPTION: %s" % e)
611 print(" EXCEPTION: %s" % e)
612 raise
612 raise
613 else:
613 else:
614 print(" status = %s, reason = %s" % (status, reason))
614 print(" status = %s, reason = %s" % (status, reason))
615 HANDLE_ERRORS = orig
615 HANDLE_ERRORS = orig
616 hosts = keepalive_handler.open_connections()
616 hosts = keepalive_handler.open_connections()
617 print("open connections:", hosts)
617 print("open connections:", hosts)
618 keepalive_handler.close_all()
618 keepalive_handler.close_all()
619
619
620 def continuity(url):
620 def continuity(url):
621 from . import util
621 from . import util
622 md5 = util.md5
622 md5 = util.md5
623 format = '%25s: %s'
623 format = '%25s: %s'
624
624
625 # first fetch the file with the normal http handler
625 # first fetch the file with the normal http handler
626 opener = urllib2.build_opener()
626 opener = urllib2.build_opener()
627 urllib2.install_opener(opener)
627 urllib2.install_opener(opener)
628 fo = urllib2.urlopen(url)
628 fo = urllib2.urlopen(url)
629 foo = fo.read()
629 foo = fo.read()
630 fo.close()
630 fo.close()
631 m = md5(foo)
631 m = md5(foo)
632 print(format % ('normal urllib', m.hexdigest()))
632 print(format % ('normal urllib', m.hexdigest()))
633
633
634 # now install the keepalive handler and try again
634 # now install the keepalive handler and try again
635 opener = urllib2.build_opener(HTTPHandler())
635 opener = urllib2.build_opener(HTTPHandler())
636 urllib2.install_opener(opener)
636 urllib2.install_opener(opener)
637
637
638 fo = urllib2.urlopen(url)
638 fo = urllib2.urlopen(url)
639 foo = fo.read()
639 foo = fo.read()
640 fo.close()
640 fo.close()
641 m = md5(foo)
641 m = md5(foo)
642 print(format % ('keepalive read', m.hexdigest()))
642 print(format % ('keepalive read', m.hexdigest()))
643
643
644 fo = urllib2.urlopen(url)
644 fo = urllib2.urlopen(url)
645 foo = ''
645 foo = ''
646 while True:
646 while True:
647 f = fo.readline()
647 f = fo.readline()
648 if f:
648 if f:
649 foo = foo + f
649 foo = foo + f
650 else: break
650 else: break
651 fo.close()
651 fo.close()
652 m = md5(foo)
652 m = md5(foo)
653 print(format % ('keepalive readline', m.hexdigest()))
653 print(format % ('keepalive readline', m.hexdigest()))
654
654
655 def comp(N, url):
655 def comp(N, url):
656 print(' making %i connections to:\n %s' % (N, url))
656 print(' making %i connections to:\n %s' % (N, url))
657
657
658 sys.stdout.write(' first using the normal urllib handlers')
658 sys.stdout.write(' first using the normal urllib handlers')
659 # first use normal opener
659 # first use normal opener
660 opener = urllib2.build_opener()
660 opener = urllib2.build_opener()
661 urllib2.install_opener(opener)
661 urllib2.install_opener(opener)
662 t1 = fetch(N, url)
662 t1 = fetch(N, url)
663 print(' TIME: %.3f s' % t1)
663 print(' TIME: %.3f s' % t1)
664
664
665 sys.stdout.write(' now using the keepalive handler ')
665 sys.stdout.write(' now using the keepalive handler ')
666 # now install the keepalive handler and try again
666 # now install the keepalive handler and try again
667 opener = urllib2.build_opener(HTTPHandler())
667 opener = urllib2.build_opener(HTTPHandler())
668 urllib2.install_opener(opener)
668 urllib2.install_opener(opener)
669 t2 = fetch(N, url)
669 t2 = fetch(N, url)
670 print(' TIME: %.3f s' % t2)
670 print(' TIME: %.3f s' % t2)
671 print(' improvement factor: %.2f' % (t1 / t2))
671 print(' improvement factor: %.2f' % (t1 / t2))
672
672
673 def fetch(N, url, delay=0):
673 def fetch(N, url, delay=0):
674 import time
674 import time
675 lens = []
675 lens = []
676 starttime = time.time()
676 starttime = time.time()
677 for i in range(N):
677 for i in range(N):
678 if delay and i > 0:
678 if delay and i > 0:
679 time.sleep(delay)
679 time.sleep(delay)
680 fo = urllib2.urlopen(url)
680 fo = urllib2.urlopen(url)
681 foo = fo.read()
681 foo = fo.read()
682 fo.close()
682 fo.close()
683 lens.append(len(foo))
683 lens.append(len(foo))
684 diff = time.time() - starttime
684 diff = time.time() - starttime
685
685
686 j = 0
686 j = 0
687 for i in lens[1:]:
687 for i in lens[1:]:
688 j = j + 1
688 j = j + 1
689 if not i == lens[0]:
689 if not i == lens[0]:
690 print("WARNING: inconsistent length on read %i: %i" % (j, i))
690 print("WARNING: inconsistent length on read %i: %i" % (j, i))
691
691
692 return diff
692 return diff
693
693
694 def test_timeout(url):
694 def test_timeout(url):
695 global DEBUG
695 global DEBUG
696 dbbackup = DEBUG
696 dbbackup = DEBUG
697 class FakeLogger(object):
697 class FakeLogger(object):
698 def debug(self, msg, *args):
698 def debug(self, msg, *args):
699 print(msg % args)
699 print(msg % args)
700 info = warning = error = debug
700 info = warning = error = debug
701 DEBUG = FakeLogger()
701 DEBUG = FakeLogger()
702 print(" fetching the file to establish a connection")
702 print(" fetching the file to establish a connection")
703 fo = urllib2.urlopen(url)
703 fo = urllib2.urlopen(url)
704 data1 = fo.read()
704 data1 = fo.read()
705 fo.close()
705 fo.close()
706
706
707 i = 20
707 i = 20
708 print(" waiting %i seconds for the server to close the connection" % i)
708 print(" waiting %i seconds for the server to close the connection" % i)
709 while i > 0:
709 while i > 0:
710 sys.stdout.write('\r %2i' % i)
710 sys.stdout.write('\r %2i' % i)
711 sys.stdout.flush()
711 sys.stdout.flush()
712 time.sleep(1)
712 time.sleep(1)
713 i -= 1
713 i -= 1
714 sys.stderr.write('\r')
714 sys.stderr.write('\r')
715
715
716 print(" fetching the file a second time")
716 print(" fetching the file a second time")
717 fo = urllib2.urlopen(url)
717 fo = urllib2.urlopen(url)
718 data2 = fo.read()
718 data2 = fo.read()
719 fo.close()
719 fo.close()
720
720
721 if data1 == data2:
721 if data1 == data2:
722 print(' data are identical')
722 print(' data are identical')
723 else:
723 else:
724 print(' ERROR: DATA DIFFER')
724 print(' ERROR: DATA DIFFER')
725
725
726 DEBUG = dbbackup
726 DEBUG = dbbackup
727
727
728
728
729 def test(url, N=10):
729 def test(url, N=10):
730 print("checking error handler (do this on a non-200)")
730 print("checking error handler (do this on a non-200)")
731 try: error_handler(url)
731 try: error_handler(url)
732 except IOError:
732 except IOError:
733 print("exiting - exception will prevent further tests")
733 print("exiting - exception will prevent further tests")
734 sys.exit()
734 sys.exit()
735 print('')
735 print('')
736 print("performing continuity test (making sure stuff isn't corrupted)")
736 print("performing continuity test (making sure stuff isn't corrupted)")
737 continuity(url)
737 continuity(url)
738 print('')
738 print('')
739 print("performing speed comparison")
739 print("performing speed comparison")
740 comp(N, url)
740 comp(N, url)
741 print('')
741 print('')
742 print("performing dropped-connection check")
742 print("performing dropped-connection check")
743 test_timeout(url)
743 test_timeout(url)
744
744
745 if __name__ == '__main__':
745 if __name__ == '__main__':
746 import time
746 import time
747 try:
747 try:
748 N = int(sys.argv[1])
748 N = int(sys.argv[1])
749 url = sys.argv[2]
749 url = sys.argv[2]
750 except (IndexError, ValueError):
750 except (IndexError, ValueError):
751 print("%s <integer> <url>" % sys.argv[0])
751 print("%s <integer> <url>" % sys.argv[0])
752 else:
752 else:
753 test(url, N)
753 test(url, N)
General Comments 0
You need to be logged in to leave comments. Login now