##// END OF EJS Templates
Update keepalive.py to current CVS version of urlgrabber....
Alexis S. L. Carvalho -
r2444:5eb02f9e default
parent child Browse files
Show More
@@ -1,587 +1,589 b''
1 # This library is free software; you can redistribute it and/or
1 # This library is free software; you can redistribute it and/or
2 # modify it under the terms of the GNU Lesser General Public
2 # modify it under the terms of the GNU Lesser General Public
3 # License as published by the Free Software Foundation; either
3 # License as published by the Free Software Foundation; either
4 # version 2.1 of the License, or (at your option) any later version.
4 # version 2.1 of the License, or (at your option) any later version.
5 #
5 #
6 # This library is distributed in the hope that it will be useful,
6 # This library is distributed in the hope that it will be useful,
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9 # Lesser General Public License for more details.
9 # Lesser General Public License for more details.
10 #
10 #
11 # You should have received a copy of the GNU Lesser General Public
11 # You should have received a copy of the GNU Lesser General Public
12 # License along with this library; if not, write to the
12 # License along with this library; if not, write to the
13 # Free Software Foundation, Inc.,
13 # Free Software Foundation, Inc.,
14 # 59 Temple Place, Suite 330,
14 # 59 Temple Place, Suite 330,
15 # Boston, MA 02111-1307 USA
15 # Boston, MA 02111-1307 USA
16
16
17 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
17 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
18 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
18 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
19
19
20 """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
20 """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
21
21
22 >>> import urllib2
22 >>> import urllib2
23 >>> from keepalive import HTTPHandler
23 >>> from keepalive import HTTPHandler
24 >>> keepalive_handler = HTTPHandler()
24 >>> keepalive_handler = HTTPHandler()
25 >>> opener = urllib2.build_opener(keepalive_handler)
25 >>> opener = urllib2.build_opener(keepalive_handler)
26 >>> urllib2.install_opener(opener)
26 >>> urllib2.install_opener(opener)
27 >>>
27 >>>
28 >>> fo = urllib2.urlopen('http://www.python.org')
28 >>> fo = urllib2.urlopen('http://www.python.org')
29
29
30 If a connection to a given host is requested, and all of the existing
30 If a connection to a given host is requested, and all of the existing
31 connections are still in use, another connection will be opened. If
31 connections are still in use, another connection will be opened. If
32 the handler tries to use an existing connection but it fails in some
32 the handler tries to use an existing connection but it fails in some
33 way, it will be closed and removed from the pool.
33 way, it will be closed and removed from the pool.
34
34
35 To remove the handler, simply re-run build_opener with no arguments, and
35 To remove the handler, simply re-run build_opener with no arguments, and
36 install that opener.
36 install that opener.
37
37
38 You can explicitly close connections by using the close_connection()
38 You can explicitly close connections by using the close_connection()
39 method of the returned file-like object (described below) or you can
39 method of the returned file-like object (described below) or you can
40 use the handler methods:
40 use the handler methods:
41
41
42 close_connection(host)
42 close_connection(host)
43 close_all()
43 close_all()
44 open_connections()
44 open_connections()
45
45
46 NOTE: using the close_connection and close_all methods of the handler
46 NOTE: using the close_connection and close_all methods of the handler
47 should be done with care when using multiple threads.
47 should be done with care when using multiple threads.
48 * there is nothing that prevents another thread from creating new
48 * there is nothing that prevents another thread from creating new
49 connections immediately after connections are closed
49 connections immediately after connections are closed
50 * no checks are done to prevent in-use connections from being closed
50 * no checks are done to prevent in-use connections from being closed
51
51
52 >>> keepalive_handler.close_all()
52 >>> keepalive_handler.close_all()
53
53
54 EXTRA ATTRIBUTES AND METHODS
54 EXTRA ATTRIBUTES AND METHODS
55
55
56 Upon a status of 200, the object returned has a few additional
56 Upon a status of 200, the object returned has a few additional
57 attributes and methods, which should not be used if you want to
57 attributes and methods, which should not be used if you want to
58 remain consistent with the normal urllib2-returned objects:
58 remain consistent with the normal urllib2-returned objects:
59
59
60 close_connection() - close the connection to the host
60 close_connection() - close the connection to the host
61 readlines() - you know, readlines()
61 readlines() - you know, readlines()
62 status - the return status (ie 404)
62 status - the return status (ie 404)
63 reason - english translation of status (ie 'File not found')
63 reason - english translation of status (ie 'File not found')
64
64
65 If you want the best of both worlds, use this inside an
65 If you want the best of both worlds, use this inside an
66 AttributeError-catching try:
66 AttributeError-catching try:
67
67
68 >>> try: status = fo.status
68 >>> try: status = fo.status
69 >>> except AttributeError: status = None
69 >>> except AttributeError: status = None
70
70
71 Unfortunately, these are ONLY there if status == 200, so it's not
71 Unfortunately, these are ONLY there if status == 200, so it's not
72 easy to distinguish between non-200 responses. The reason is that
72 easy to distinguish between non-200 responses. The reason is that
73 urllib2 tries to do clever things with error codes 301, 302, 401,
73 urllib2 tries to do clever things with error codes 301, 302, 401,
74 and 407, and it wraps the object upon return.
74 and 407, and it wraps the object upon return.
75
75
76 For python versions earlier than 2.4, you can avoid this fancy error
76 For python versions earlier than 2.4, you can avoid this fancy error
77 handling by setting the module-level global HANDLE_ERRORS to zero.
77 handling by setting the module-level global HANDLE_ERRORS to zero.
78 You see, prior to 2.4, it's the HTTP Handler's job to determine what
78 You see, prior to 2.4, it's the HTTP Handler's job to determine what
79 to handle specially, and what to just pass up. HANDLE_ERRORS == 0
79 to handle specially, and what to just pass up. HANDLE_ERRORS == 0
80 means "pass everything up". In python 2.4, however, this job no
80 means "pass everything up". In python 2.4, however, this job no
81 longer belongs to the HTTP Handler and is now done by a NEW handler,
81 longer belongs to the HTTP Handler and is now done by a NEW handler,
82 HTTPErrorProcessor. Here's the bottom line:
82 HTTPErrorProcessor. Here's the bottom line:
83
83
84 python version < 2.4
84 python version < 2.4
85 HANDLE_ERRORS == 1 (default) pass up 200, treat the rest as
85 HANDLE_ERRORS == 1 (default) pass up 200, treat the rest as
86 errors
86 errors
87 HANDLE_ERRORS == 0 pass everything up, error processing is
87 HANDLE_ERRORS == 0 pass everything up, error processing is
88 left to the calling code
88 left to the calling code
89 python version >= 2.4
89 python version >= 2.4
90 HANDLE_ERRORS == 1 pass up 200, treat the rest as errors
90 HANDLE_ERRORS == 1 pass up 200, treat the rest as errors
91 HANDLE_ERRORS == 0 (default) pass everything up, let the
91 HANDLE_ERRORS == 0 (default) pass everything up, let the
92 other handlers (specifically,
92 other handlers (specifically,
93 HTTPErrorProcessor) decide what to do
93 HTTPErrorProcessor) decide what to do
94
94
95 In practice, setting the variable either way makes little difference
95 In practice, setting the variable either way makes little difference
96 in python 2.4, so for the most consistent behavior across versions,
96 in python 2.4, so for the most consistent behavior across versions,
97 you probably just want to use the defaults, which will give you
97 you probably just want to use the defaults, which will give you
98 exceptions on errors.
98 exceptions on errors.
99
99
100 """
100 """
101
101
102 # $Id: keepalive.py,v 1.13 2005/10/22 21:57:28 mstenner Exp $
102 # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $
103
103
104 import urllib2
104 import urllib2
105 import httplib
105 import httplib
106 import socket
106 import socket
107 import thread
107 import thread
108
108
109 DEBUG = None
109 DEBUG = None
110
110
111 import sys
111 import sys
112 if sys.version_info < (2, 4): HANDLE_ERRORS = 1
112 if sys.version_info < (2, 4): HANDLE_ERRORS = 1
113 else: HANDLE_ERRORS = 0
113 else: HANDLE_ERRORS = 0
114
114
115 class ConnectionManager:
115 class ConnectionManager:
116 """
116 """
117 The connection manager must be able to:
117 The connection manager must be able to:
118 * keep track of all existing
118 * keep track of all existing
119 """
119 """
120 def __init__(self):
120 def __init__(self):
121 self._lock = thread.allocate_lock()
121 self._lock = thread.allocate_lock()
122 self._hostmap = {} # map hosts to a list of connections
122 self._hostmap = {} # map hosts to a list of connections
123 self._connmap = {} # map connections to host
123 self._connmap = {} # map connections to host
124 self._readymap = {} # map connection to ready state
124 self._readymap = {} # map connection to ready state
125
125
126 def add(self, host, connection, ready):
126 def add(self, host, connection, ready):
127 self._lock.acquire()
127 self._lock.acquire()
128 try:
128 try:
129 if not self._hostmap.has_key(host): self._hostmap[host] = []
129 if not self._hostmap.has_key(host): self._hostmap[host] = []
130 self._hostmap[host].append(connection)
130 self._hostmap[host].append(connection)
131 self._connmap[connection] = host
131 self._connmap[connection] = host
132 self._readymap[connection] = ready
132 self._readymap[connection] = ready
133 finally:
133 finally:
134 self._lock.release()
134 self._lock.release()
135
135
136 def remove(self, connection):
136 def remove(self, connection):
137 self._lock.acquire()
137 self._lock.acquire()
138 try:
138 try:
139 try:
139 try:
140 host = self._connmap[connection]
140 host = self._connmap[connection]
141 except KeyError:
141 except KeyError:
142 pass
142 pass
143 else:
143 else:
144 del self._connmap[connection]
144 del self._connmap[connection]
145 del self._readymap[connection]
145 del self._readymap[connection]
146 self._hostmap[host].remove(connection)
146 self._hostmap[host].remove(connection)
147 if not self._hostmap[host]: del self._hostmap[host]
147 if not self._hostmap[host]: del self._hostmap[host]
148 finally:
148 finally:
149 self._lock.release()
149 self._lock.release()
150
150
151 def set_ready(self, connection, ready):
151 def set_ready(self, connection, ready):
152 try: self._readymap[connection] = ready
152 try: self._readymap[connection] = ready
153 except KeyError: pass
153 except KeyError: pass
154
154
155 def get_ready_conn(self, host):
155 def get_ready_conn(self, host):
156 conn = None
156 conn = None
157 self._lock.acquire()
157 self._lock.acquire()
158 try:
158 try:
159 if self._hostmap.has_key(host):
159 if self._hostmap.has_key(host):
160 for c in self._hostmap[host]:
160 for c in self._hostmap[host]:
161 if self._readymap[c]:
161 if self._readymap[c]:
162 self._readymap[c] = 0
162 self._readymap[c] = 0
163 conn = c
163 conn = c
164 break
164 break
165 finally:
165 finally:
166 self._lock.release()
166 self._lock.release()
167 return conn
167 return conn
168
168
169 def get_all(self, host=None):
169 def get_all(self, host=None):
170 if host:
170 if host:
171 return list(self._hostmap.get(host, []))
171 return list(self._hostmap.get(host, []))
172 else:
172 else:
173 return dict(self._hostmap)
173 return dict(self._hostmap)
174
174
175 class HTTPHandler(urllib2.HTTPHandler):
175 class HTTPHandler(urllib2.HTTPHandler):
176 def __init__(self):
176 def __init__(self):
177 self._cm = ConnectionManager()
177 self._cm = ConnectionManager()
178
178
179 #### Connection Management
179 #### Connection Management
180 def open_connections(self):
180 def open_connections(self):
181 """return a list of connected hosts and the number of connections
181 """return a list of connected hosts and the number of connections
182 to each. [('foo.com:80', 2), ('bar.org', 1)]"""
182 to each. [('foo.com:80', 2), ('bar.org', 1)]"""
183 return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
183 return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
184
184
185 def close_connection(self, host):
185 def close_connection(self, host):
186 """close connection(s) to <host>
186 """close connection(s) to <host>
187 host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
187 host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
188 no error occurs if there is no connection to that host."""
188 no error occurs if there is no connection to that host."""
189 for h in self._cm.get_all(host):
189 for h in self._cm.get_all(host):
190 self._cm.remove(h)
190 self._cm.remove(h)
191 h.close()
191 h.close()
192
192
193 def close_all(self):
193 def close_all(self):
194 """close all open connections"""
194 """close all open connections"""
195 for host, conns in self._cm.get_all().items():
195 for host, conns in self._cm.get_all().items():
196 for h in conns:
196 for h in conns:
197 self._cm.remove(h)
197 self._cm.remove(h)
198 h.close()
198 h.close()
199
199
200 def _request_closed(self, request, host, connection):
200 def _request_closed(self, request, host, connection):
201 """tells us that this request is now closed and the the
201 """tells us that this request is now closed and the the
202 connection is ready for another request"""
202 connection is ready for another request"""
203 self._cm.set_ready(connection, 1)
203 self._cm.set_ready(connection, 1)
204
204
205 def _remove_connection(self, host, connection, close=0):
205 def _remove_connection(self, host, connection, close=0):
206 if close: connection.close()
206 if close: connection.close()
207 self._cm.remove(connection)
207 self._cm.remove(connection)
208
208
209 #### Transaction Execution
209 #### Transaction Execution
210 def http_open(self, req):
210 def http_open(self, req):
211 return self.do_open(HTTPConnection, req)
211 return self.do_open(HTTPConnection, req)
212
212
213 def do_open(self, http_class, req):
213 def do_open(self, http_class, req):
214 host = req.get_host()
214 host = req.get_host()
215 if not host:
215 if not host:
216 raise urllib2.URLError('no host given')
216 raise urllib2.URLError('no host given')
217
217
218 try:
218 try:
219 h = self._cm.get_ready_conn(host)
219 h = self._cm.get_ready_conn(host)
220 while h:
220 while h:
221 r = self._reuse_connection(h, req, host)
221 r = self._reuse_connection(h, req, host)
222
222
223 # if this response is non-None, then it worked and we're
223 # if this response is non-None, then it worked and we're
224 # done. Break out, skipping the else block.
224 # done. Break out, skipping the else block.
225 if r: break
225 if r: break
226
226
227 # connection is bad - possibly closed by server
227 # connection is bad - possibly closed by server
228 # discard it and ask for the next free connection
228 # discard it and ask for the next free connection
229 h.close()
229 h.close()
230 self._cm.remove(h)
230 self._cm.remove(h)
231 h = self._cm.get_ready_conn(host)
231 h = self._cm.get_ready_conn(host)
232 else:
232 else:
233 # no (working) free connections were found. Create a new one.
233 # no (working) free connections were found. Create a new one.
234 h = http_class(host)
234 h = http_class(host)
235 if DEBUG: DEBUG.info("creating new connection to %s (%d)",
235 if DEBUG: DEBUG.info("creating new connection to %s (%d)",
236 host, id(h))
236 host, id(h))
237 self._cm.add(host, h, 0)
237 self._cm.add(host, h, 0)
238 self._start_transaction(h, req)
238 self._start_transaction(h, req)
239 r = h.getresponse()
239 r = h.getresponse()
240 except (socket.error, httplib.HTTPException), err:
240 except (socket.error, httplib.HTTPException), err:
241 raise urllib2.URLError(err)
241 raise urllib2.URLError(err)
242
242
243 # if not a persistent connection, don't try to reuse it
243 # if not a persistent connection, don't try to reuse it
244 if r.will_close: self._cm.remove(h)
244 if r.will_close: self._cm.remove(h)
245
245
246 if DEBUG: DEBUG.info("STATUS: %s, %s", r.status, r.reason)
246 if DEBUG: DEBUG.info("STATUS: %s, %s", r.status, r.reason)
247 r._handler = self
247 r._handler = self
248 r._host = host
248 r._host = host
249 r._url = req.get_full_url()
249 r._url = req.get_full_url()
250 r._connection = h
250 r._connection = h
251 r.code = r.status
251 r.code = r.status
252 r.headers = r.msg
253 r.msg = r.reason
252
254
253 if r.status == 200 or not HANDLE_ERRORS:
255 if r.status == 200 or not HANDLE_ERRORS:
254 return r
256 return r
255 else:
257 else:
256 return self.parent.error('http', req, r, r.status, r.reason, r.msg)
258 return self.parent.error('http', req, r,
257
259 r.status, r.msg, r.headers)
258
260
259 def _reuse_connection(self, h, req, host):
261 def _reuse_connection(self, h, req, host):
260 """start the transaction with a re-used connection
262 """start the transaction with a re-used connection
261 return a response object (r) upon success or None on failure.
263 return a response object (r) upon success or None on failure.
262 This DOES not close or remove bad connections in cases where
264 This DOES not close or remove bad connections in cases where
263 it returns. However, if an unexpected exception occurs, it
265 it returns. However, if an unexpected exception occurs, it
264 will close and remove the connection before re-raising.
266 will close and remove the connection before re-raising.
265 """
267 """
266 try:
268 try:
267 self._start_transaction(h, req)
269 self._start_transaction(h, req)
268 r = h.getresponse()
270 r = h.getresponse()
269 # note: just because we got something back doesn't mean it
271 # note: just because we got something back doesn't mean it
270 # worked. We'll check the version below, too.
272 # worked. We'll check the version below, too.
271 except (socket.error, httplib.HTTPException):
273 except (socket.error, httplib.HTTPException):
272 r = None
274 r = None
273 except:
275 except:
274 # adding this block just in case we've missed
276 # adding this block just in case we've missed
275 # something we will still raise the exception, but
277 # something we will still raise the exception, but
276 # lets try and close the connection and remove it
278 # lets try and close the connection and remove it
277 # first. We previously got into a nasty loop
279 # first. We previously got into a nasty loop
278 # where an exception was uncaught, and so the
280 # where an exception was uncaught, and so the
279 # connection stayed open. On the next try, the
281 # connection stayed open. On the next try, the
280 # same exception was raised, etc. The tradeoff is
282 # same exception was raised, etc. The tradeoff is
281 # that it's now possible this call will raise
283 # that it's now possible this call will raise
282 # a DIFFERENT exception
284 # a DIFFERENT exception
283 if DEBUG: DEBUG.error("unexpected exception - closing " + \
285 if DEBUG: DEBUG.error("unexpected exception - closing " + \
284 "connection to %s (%d)", host, id(h))
286 "connection to %s (%d)", host, id(h))
285 self._cm.remove(h)
287 self._cm.remove(h)
286 h.close()
288 h.close()
287 raise
289 raise
288
290
289 if r is None or r.version == 9:
291 if r is None or r.version == 9:
290 # httplib falls back to assuming HTTP 0.9 if it gets a
292 # httplib falls back to assuming HTTP 0.9 if it gets a
291 # bad header back. This is most likely to happen if
293 # bad header back. This is most likely to happen if
292 # the socket has been closed by the server since we
294 # the socket has been closed by the server since we
293 # last used the connection.
295 # last used the connection.
294 if DEBUG: DEBUG.info("failed to re-use connection to %s (%d)",
296 if DEBUG: DEBUG.info("failed to re-use connection to %s (%d)",
295 host, id(h))
297 host, id(h))
296 r = None
298 r = None
297 else:
299 else:
298 if DEBUG: DEBUG.info("re-using connection to %s (%d)", host, id(h))
300 if DEBUG: DEBUG.info("re-using connection to %s (%d)", host, id(h))
299
301
300 return r
302 return r
301
303
302 def _start_transaction(self, h, req):
304 def _start_transaction(self, h, req):
303 try:
305 try:
304 if req.has_data():
306 if req.has_data():
305 data = req.get_data()
307 data = req.get_data()
306 h.putrequest('POST', req.get_selector())
308 h.putrequest('POST', req.get_selector())
307 if not req.headers.has_key('Content-type'):
309 if not req.headers.has_key('Content-type'):
308 h.putheader('Content-type',
310 h.putheader('Content-type',
309 'application/x-www-form-urlencoded')
311 'application/x-www-form-urlencoded')
310 if not req.headers.has_key('Content-length'):
312 if not req.headers.has_key('Content-length'):
311 h.putheader('Content-length', '%d' % len(data))
313 h.putheader('Content-length', '%d' % len(data))
312 else:
314 else:
313 h.putrequest('GET', req.get_selector())
315 h.putrequest('GET', req.get_selector())
314 except (socket.error, httplib.HTTPException), err:
316 except (socket.error, httplib.HTTPException), err:
315 raise urllib2.URLError(err)
317 raise urllib2.URLError(err)
316
318
317 for args in self.parent.addheaders:
319 for args in self.parent.addheaders:
318 h.putheader(*args)
320 h.putheader(*args)
319 for k, v in req.headers.items():
321 for k, v in req.headers.items():
320 h.putheader(k, v)
322 h.putheader(k, v)
321 h.endheaders()
323 h.endheaders()
322 if req.has_data():
324 if req.has_data():
323 h.send(data)
325 h.send(data)
324
326
325 class HTTPResponse(httplib.HTTPResponse):
327 class HTTPResponse(httplib.HTTPResponse):
326 # we need to subclass HTTPResponse in order to
328 # we need to subclass HTTPResponse in order to
327 # 1) add readline() and readlines() methods
329 # 1) add readline() and readlines() methods
328 # 2) add close_connection() methods
330 # 2) add close_connection() methods
329 # 3) add info() and geturl() methods
331 # 3) add info() and geturl() methods
330
332
331 # in order to add readline(), read must be modified to deal with a
333 # in order to add readline(), read must be modified to deal with a
332 # buffer. example: readline must read a buffer and then spit back
334 # buffer. example: readline must read a buffer and then spit back
333 # one line at a time. The only real alternative is to read one
335 # one line at a time. The only real alternative is to read one
334 # BYTE at a time (ick). Once something has been read, it can't be
336 # BYTE at a time (ick). Once something has been read, it can't be
335 # put back (ok, maybe it can, but that's even uglier than this),
337 # put back (ok, maybe it can, but that's even uglier than this),
336 # so if you THEN do a normal read, you must first take stuff from
338 # so if you THEN do a normal read, you must first take stuff from
337 # the buffer.
339 # the buffer.
338
340
339 # the read method wraps the original to accomodate buffering,
341 # the read method wraps the original to accomodate buffering,
340 # although read() never adds to the buffer.
342 # although read() never adds to the buffer.
341 # Both readline and readlines have been stolen with almost no
343 # Both readline and readlines have been stolen with almost no
342 # modification from socket.py
344 # modification from socket.py
343
345
344
346
345 def __init__(self, sock, debuglevel=0, strict=0, method=None):
347 def __init__(self, sock, debuglevel=0, strict=0, method=None):
346 if method: # the httplib in python 2.3 uses the method arg
348 if method: # the httplib in python 2.3 uses the method arg
347 httplib.HTTPResponse.__init__(self, sock, debuglevel, method)
349 httplib.HTTPResponse.__init__(self, sock, debuglevel, method)
348 else: # 2.2 doesn't
350 else: # 2.2 doesn't
349 httplib.HTTPResponse.__init__(self, sock, debuglevel)
351 httplib.HTTPResponse.__init__(self, sock, debuglevel)
350 self.fileno = sock.fileno
352 self.fileno = sock.fileno
351 self.code = None
353 self.code = None
352 self._rbuf = ''
354 self._rbuf = ''
353 self._rbufsize = 8096
355 self._rbufsize = 8096
354 self._handler = None # inserted by the handler later
356 self._handler = None # inserted by the handler later
355 self._host = None # (same)
357 self._host = None # (same)
356 self._url = None # (same)
358 self._url = None # (same)
357 self._connection = None # (same)
359 self._connection = None # (same)
358
360
359 _raw_read = httplib.HTTPResponse.read
361 _raw_read = httplib.HTTPResponse.read
360
362
361 def close(self):
363 def close(self):
362 if self.fp:
364 if self.fp:
363 self.fp.close()
365 self.fp.close()
364 self.fp = None
366 self.fp = None
365 if self._handler:
367 if self._handler:
366 self._handler._request_closed(self, self._host,
368 self._handler._request_closed(self, self._host,
367 self._connection)
369 self._connection)
368
370
369 def close_connection(self):
371 def close_connection(self):
370 self._handler._remove_connection(self._host, self._connection, close=1)
372 self._handler._remove_connection(self._host, self._connection, close=1)
371 self.close()
373 self.close()
372
374
373 def info(self):
375 def info(self):
374 return self.msg
376 return self.headers
375
377
376 def geturl(self):
378 def geturl(self):
377 return self._url
379 return self._url
378
380
379 def read(self, amt=None):
381 def read(self, amt=None):
380 # the _rbuf test is only in this first if for speed. It's not
382 # the _rbuf test is only in this first if for speed. It's not
381 # logically necessary
383 # logically necessary
382 if self._rbuf and not amt is None:
384 if self._rbuf and not amt is None:
383 L = len(self._rbuf)
385 L = len(self._rbuf)
384 if amt > L:
386 if amt > L:
385 amt -= L
387 amt -= L
386 else:
388 else:
387 s = self._rbuf[:amt]
389 s = self._rbuf[:amt]
388 self._rbuf = self._rbuf[amt:]
390 self._rbuf = self._rbuf[amt:]
389 return s
391 return s
390
392
391 s = self._rbuf + self._raw_read(amt)
393 s = self._rbuf + self._raw_read(amt)
392 self._rbuf = ''
394 self._rbuf = ''
393 return s
395 return s
394
396
395 def readline(self, limit=-1):
397 def readline(self, limit=-1):
396 data = ""
398 data = ""
397 i = self._rbuf.find('\n')
399 i = self._rbuf.find('\n')
398 while i < 0 and not (0 < limit <= len(self._rbuf)):
400 while i < 0 and not (0 < limit <= len(self._rbuf)):
399 new = self._raw_read(self._rbufsize)
401 new = self._raw_read(self._rbufsize)
400 if not new: break
402 if not new: break
401 i = new.find('\n')
403 i = new.find('\n')
402 if i >= 0: i = i + len(self._rbuf)
404 if i >= 0: i = i + len(self._rbuf)
403 self._rbuf = self._rbuf + new
405 self._rbuf = self._rbuf + new
404 if i < 0: i = len(self._rbuf)
406 if i < 0: i = len(self._rbuf)
405 else: i = i+1
407 else: i = i+1
406 if 0 <= limit < len(self._rbuf): i = limit
408 if 0 <= limit < len(self._rbuf): i = limit
407 data, self._rbuf = self._rbuf[:i], self._rbuf[i:]
409 data, self._rbuf = self._rbuf[:i], self._rbuf[i:]
408 return data
410 return data
409
411
410 def readlines(self, sizehint = 0):
412 def readlines(self, sizehint = 0):
411 total = 0
413 total = 0
412 list = []
414 list = []
413 while 1:
415 while 1:
414 line = self.readline()
416 line = self.readline()
415 if not line: break
417 if not line: break
416 list.append(line)
418 list.append(line)
417 total += len(line)
419 total += len(line)
418 if sizehint and total >= sizehint:
420 if sizehint and total >= sizehint:
419 break
421 break
420 return list
422 return list
421
423
422
424
423 class HTTPConnection(httplib.HTTPConnection):
425 class HTTPConnection(httplib.HTTPConnection):
424 # use the modified response class
426 # use the modified response class
425 response_class = HTTPResponse
427 response_class = HTTPResponse
426
428
427 #########################################################################
429 #########################################################################
428 ##### TEST FUNCTIONS
430 ##### TEST FUNCTIONS
429 #########################################################################
431 #########################################################################
430
432
431 def error_handler(url):
433 def error_handler(url):
432 global HANDLE_ERRORS
434 global HANDLE_ERRORS
433 orig = HANDLE_ERRORS
435 orig = HANDLE_ERRORS
434 keepalive_handler = HTTPHandler()
436 keepalive_handler = HTTPHandler()
435 opener = urllib2.build_opener(keepalive_handler)
437 opener = urllib2.build_opener(keepalive_handler)
436 urllib2.install_opener(opener)
438 urllib2.install_opener(opener)
437 pos = {0: 'off', 1: 'on'}
439 pos = {0: 'off', 1: 'on'}
438 for i in (0, 1):
440 for i in (0, 1):
439 print " fancy error handling %s (HANDLE_ERRORS = %i)" % (pos[i], i)
441 print " fancy error handling %s (HANDLE_ERRORS = %i)" % (pos[i], i)
440 HANDLE_ERRORS = i
442 HANDLE_ERRORS = i
441 try:
443 try:
442 fo = urllib2.urlopen(url)
444 fo = urllib2.urlopen(url)
443 foo = fo.read()
445 foo = fo.read()
444 fo.close()
446 fo.close()
445 try: status, reason = fo.status, fo.reason
447 try: status, reason = fo.status, fo.reason
446 except AttributeError: status, reason = None, None
448 except AttributeError: status, reason = None, None
447 except IOError, e:
449 except IOError, e:
448 print " EXCEPTION: %s" % e
450 print " EXCEPTION: %s" % e
449 raise
451 raise
450 else:
452 else:
451 print " status = %s, reason = %s" % (status, reason)
453 print " status = %s, reason = %s" % (status, reason)
452 HANDLE_ERRORS = orig
454 HANDLE_ERRORS = orig
453 hosts = keepalive_handler.open_connections()
455 hosts = keepalive_handler.open_connections()
454 print "open connections:", hosts
456 print "open connections:", hosts
455 keepalive_handler.close_all()
457 keepalive_handler.close_all()
456
458
457 def continuity(url):
459 def continuity(url):
458 import md5
460 import md5
459 format = '%25s: %s'
461 format = '%25s: %s'
460
462
461 # first fetch the file with the normal http handler
463 # first fetch the file with the normal http handler
462 opener = urllib2.build_opener()
464 opener = urllib2.build_opener()
463 urllib2.install_opener(opener)
465 urllib2.install_opener(opener)
464 fo = urllib2.urlopen(url)
466 fo = urllib2.urlopen(url)
465 foo = fo.read()
467 foo = fo.read()
466 fo.close()
468 fo.close()
467 m = md5.new(foo)
469 m = md5.new(foo)
468 print format % ('normal urllib', m.hexdigest())
470 print format % ('normal urllib', m.hexdigest())
469
471
470 # now install the keepalive handler and try again
472 # now install the keepalive handler and try again
471 opener = urllib2.build_opener(HTTPHandler())
473 opener = urllib2.build_opener(HTTPHandler())
472 urllib2.install_opener(opener)
474 urllib2.install_opener(opener)
473
475
474 fo = urllib2.urlopen(url)
476 fo = urllib2.urlopen(url)
475 foo = fo.read()
477 foo = fo.read()
476 fo.close()
478 fo.close()
477 m = md5.new(foo)
479 m = md5.new(foo)
478 print format % ('keepalive read', m.hexdigest())
480 print format % ('keepalive read', m.hexdigest())
479
481
480 fo = urllib2.urlopen(url)
482 fo = urllib2.urlopen(url)
481 foo = ''
483 foo = ''
482 while 1:
484 while 1:
483 f = fo.readline()
485 f = fo.readline()
484 if f: foo = foo + f
486 if f: foo = foo + f
485 else: break
487 else: break
486 fo.close()
488 fo.close()
487 m = md5.new(foo)
489 m = md5.new(foo)
488 print format % ('keepalive readline', m.hexdigest())
490 print format % ('keepalive readline', m.hexdigest())
489
491
490 def comp(N, url):
492 def comp(N, url):
491 print ' making %i connections to:\n %s' % (N, url)
493 print ' making %i connections to:\n %s' % (N, url)
492
494
493 sys.stdout.write(' first using the normal urllib handlers')
495 sys.stdout.write(' first using the normal urllib handlers')
494 # first use normal opener
496 # first use normal opener
495 opener = urllib2.build_opener()
497 opener = urllib2.build_opener()
496 urllib2.install_opener(opener)
498 urllib2.install_opener(opener)
497 t1 = fetch(N, url)
499 t1 = fetch(N, url)
498 print ' TIME: %.3f s' % t1
500 print ' TIME: %.3f s' % t1
499
501
500 sys.stdout.write(' now using the keepalive handler ')
502 sys.stdout.write(' now using the keepalive handler ')
501 # now install the keepalive handler and try again
503 # now install the keepalive handler and try again
502 opener = urllib2.build_opener(HTTPHandler())
504 opener = urllib2.build_opener(HTTPHandler())
503 urllib2.install_opener(opener)
505 urllib2.install_opener(opener)
504 t2 = fetch(N, url)
506 t2 = fetch(N, url)
505 print ' TIME: %.3f s' % t2
507 print ' TIME: %.3f s' % t2
506 print ' improvement factor: %.2f' % (t1/t2, )
508 print ' improvement factor: %.2f' % (t1/t2, )
507
509
508 def fetch(N, url, delay=0):
510 def fetch(N, url, delay=0):
509 import time
511 import time
510 lens = []
512 lens = []
511 starttime = time.time()
513 starttime = time.time()
512 for i in range(N):
514 for i in range(N):
513 if delay and i > 0: time.sleep(delay)
515 if delay and i > 0: time.sleep(delay)
514 fo = urllib2.urlopen(url)
516 fo = urllib2.urlopen(url)
515 foo = fo.read()
517 foo = fo.read()
516 fo.close()
518 fo.close()
517 lens.append(len(foo))
519 lens.append(len(foo))
518 diff = time.time() - starttime
520 diff = time.time() - starttime
519
521
520 j = 0
522 j = 0
521 for i in lens[1:]:
523 for i in lens[1:]:
522 j = j + 1
524 j = j + 1
523 if not i == lens[0]:
525 if not i == lens[0]:
524 print "WARNING: inconsistent length on read %i: %i" % (j, i)
526 print "WARNING: inconsistent length on read %i: %i" % (j, i)
525
527
526 return diff
528 return diff
527
529
528 def test_timeout(url):
530 def test_timeout(url):
529 global DEBUG
531 global DEBUG
530 dbbackup = DEBUG
532 dbbackup = DEBUG
531 class FakeLogger:
533 class FakeLogger:
532 def debug(self, msg, *args): print msg % args
534 def debug(self, msg, *args): print msg % args
533 info = warning = error = debug
535 info = warning = error = debug
534 DEBUG = FakeLogger()
536 DEBUG = FakeLogger()
535 print " fetching the file to establish a connection"
537 print " fetching the file to establish a connection"
536 fo = urllib2.urlopen(url)
538 fo = urllib2.urlopen(url)
537 data1 = fo.read()
539 data1 = fo.read()
538 fo.close()
540 fo.close()
539
541
540 i = 20
542 i = 20
541 print " waiting %i seconds for the server to close the connection" % i
543 print " waiting %i seconds for the server to close the connection" % i
542 while i > 0:
544 while i > 0:
543 sys.stdout.write('\r %2i' % i)
545 sys.stdout.write('\r %2i' % i)
544 sys.stdout.flush()
546 sys.stdout.flush()
545 time.sleep(1)
547 time.sleep(1)
546 i -= 1
548 i -= 1
547 sys.stderr.write('\r')
549 sys.stderr.write('\r')
548
550
549 print " fetching the file a second time"
551 print " fetching the file a second time"
550 fo = urllib2.urlopen(url)
552 fo = urllib2.urlopen(url)
551 data2 = fo.read()
553 data2 = fo.read()
552 fo.close()
554 fo.close()
553
555
554 if data1 == data2:
556 if data1 == data2:
555 print ' data are identical'
557 print ' data are identical'
556 else:
558 else:
557 print ' ERROR: DATA DIFFER'
559 print ' ERROR: DATA DIFFER'
558
560
559 DEBUG = dbbackup
561 DEBUG = dbbackup
560
562
561
563
562 def test(url, N=10):
564 def test(url, N=10):
563 print "checking error hander (do this on a non-200)"
565 print "checking error hander (do this on a non-200)"
564 try: error_handler(url)
566 try: error_handler(url)
565 except IOError, e:
567 except IOError, e:
566 print "exiting - exception will prevent further tests"
568 print "exiting - exception will prevent further tests"
567 sys.exit()
569 sys.exit()
568 print
570 print
569 print "performing continuity test (making sure stuff isn't corrupted)"
571 print "performing continuity test (making sure stuff isn't corrupted)"
570 continuity(url)
572 continuity(url)
571 print
573 print
572 print "performing speed comparison"
574 print "performing speed comparison"
573 comp(N, url)
575 comp(N, url)
574 print
576 print
575 print "performing dropped-connection check"
577 print "performing dropped-connection check"
576 test_timeout(url)
578 test_timeout(url)
577
579
578 if __name__ == '__main__':
580 if __name__ == '__main__':
579 import time
581 import time
580 import sys
582 import sys
581 try:
583 try:
582 N = int(sys.argv[1])
584 N = int(sys.argv[1])
583 url = sys.argv[2]
585 url = sys.argv[2]
584 except:
586 except:
585 print "%s <integer> <url>" % sys.argv[0]
587 print "%s <integer> <url>" % sys.argv[0]
586 else:
588 else:
587 test(url, N)
589 test(url, N)
@@ -1,9 +1,5 b''
1 abort: error: Connection refused
1 abort: error: Connection refused
2 255
2 255
3 copy: No such file or directory
3 copy: No such file or directory
4 abort: HTTP Error 404
4 abort: HTTP Error 404
5 Date:
6 Content-Type: text/html
7 Connection: close
8
9 0
5 0
General Comments 0
You need to be logged in to leave comments. Login now