##// END OF EJS Templates
keepalive: fix 4f13ed6ee544, reintroduce unredirected_hdrs...
Patrick Mezard -
r8233:655c435e default
parent child Browse files
Show More
@@ -1,653 +1,661 b''
1 # This library is free software; you can redistribute it and/or
1 # This library is free software; you can redistribute it and/or
2 # modify it under the terms of the GNU Lesser General Public
2 # modify it under the terms of the GNU Lesser General Public
3 # License as published by the Free Software Foundation; either
3 # License as published by the Free Software Foundation; either
4 # version 2.1 of the License, or (at your option) any later version.
4 # version 2.1 of the License, or (at your option) any later version.
5 #
5 #
6 # This library is distributed in the hope that it will be useful,
6 # This library is distributed in the hope that it will be useful,
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9 # Lesser General Public License for more details.
9 # Lesser General Public License for more details.
10 #
10 #
11 # You should have received a copy of the GNU Lesser General Public
11 # You should have received a copy of the GNU Lesser General Public
12 # License along with this library; if not, write to the
12 # License along with this library; if not, write to the
13 # Free Software Foundation, Inc.,
13 # Free Software Foundation, Inc.,
14 # 59 Temple Place, Suite 330,
14 # 59 Temple Place, Suite 330,
15 # Boston, MA 02111-1307 USA
15 # Boston, MA 02111-1307 USA
16
16
17 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
17 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
18 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
18 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
19
19
20 # Modified by Benoit Boissinot:
20 # Modified by Benoit Boissinot:
21 # - fix for digest auth (inspired from urllib2.py @ Python v2.4)
21 # - fix for digest auth (inspired from urllib2.py @ Python v2.4)
22 # Modified by Dirkjan Ochtman:
22 # Modified by Dirkjan Ochtman:
23 # - import md5 function from a local util module
23 # - import md5 function from a local util module
24
24
25 """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
25 """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
26
26
27 >>> import urllib2
27 >>> import urllib2
28 >>> from keepalive import HTTPHandler
28 >>> from keepalive import HTTPHandler
29 >>> keepalive_handler = HTTPHandler()
29 >>> keepalive_handler = HTTPHandler()
30 >>> opener = urllib2.build_opener(keepalive_handler)
30 >>> opener = urllib2.build_opener(keepalive_handler)
31 >>> urllib2.install_opener(opener)
31 >>> urllib2.install_opener(opener)
32 >>>
32 >>>
33 >>> fo = urllib2.urlopen('http://www.python.org')
33 >>> fo = urllib2.urlopen('http://www.python.org')
34
34
35 If a connection to a given host is requested, and all of the existing
35 If a connection to a given host is requested, and all of the existing
36 connections are still in use, another connection will be opened. If
36 connections are still in use, another connection will be opened. If
37 the handler tries to use an existing connection but it fails in some
37 the handler tries to use an existing connection but it fails in some
38 way, it will be closed and removed from the pool.
38 way, it will be closed and removed from the pool.
39
39
40 To remove the handler, simply re-run build_opener with no arguments, and
40 To remove the handler, simply re-run build_opener with no arguments, and
41 install that opener.
41 install that opener.
42
42
43 You can explicitly close connections by using the close_connection()
43 You can explicitly close connections by using the close_connection()
44 method of the returned file-like object (described below) or you can
44 method of the returned file-like object (described below) or you can
45 use the handler methods:
45 use the handler methods:
46
46
47 close_connection(host)
47 close_connection(host)
48 close_all()
48 close_all()
49 open_connections()
49 open_connections()
50
50
51 NOTE: using the close_connection and close_all methods of the handler
51 NOTE: using the close_connection and close_all methods of the handler
52 should be done with care when using multiple threads.
52 should be done with care when using multiple threads.
53 * there is nothing that prevents another thread from creating new
53 * there is nothing that prevents another thread from creating new
54 connections immediately after connections are closed
54 connections immediately after connections are closed
55 * no checks are done to prevent in-use connections from being closed
55 * no checks are done to prevent in-use connections from being closed
56
56
57 >>> keepalive_handler.close_all()
57 >>> keepalive_handler.close_all()
58
58
59 EXTRA ATTRIBUTES AND METHODS
59 EXTRA ATTRIBUTES AND METHODS
60
60
61 Upon a status of 200, the object returned has a few additional
61 Upon a status of 200, the object returned has a few additional
62 attributes and methods, which should not be used if you want to
62 attributes and methods, which should not be used if you want to
63 remain consistent with the normal urllib2-returned objects:
63 remain consistent with the normal urllib2-returned objects:
64
64
65 close_connection() - close the connection to the host
65 close_connection() - close the connection to the host
66 readlines() - you know, readlines()
66 readlines() - you know, readlines()
67 status - the return status (ie 404)
67 status - the return status (ie 404)
68 reason - english translation of status (ie 'File not found')
68 reason - english translation of status (ie 'File not found')
69
69
70 If you want the best of both worlds, use this inside an
70 If you want the best of both worlds, use this inside an
71 AttributeError-catching try:
71 AttributeError-catching try:
72
72
73 >>> try: status = fo.status
73 >>> try: status = fo.status
74 >>> except AttributeError: status = None
74 >>> except AttributeError: status = None
75
75
76 Unfortunately, these are ONLY there if status == 200, so it's not
76 Unfortunately, these are ONLY there if status == 200, so it's not
77 easy to distinguish between non-200 responses. The reason is that
77 easy to distinguish between non-200 responses. The reason is that
78 urllib2 tries to do clever things with error codes 301, 302, 401,
78 urllib2 tries to do clever things with error codes 301, 302, 401,
79 and 407, and it wraps the object upon return.
79 and 407, and it wraps the object upon return.
80
80
81 For python versions earlier than 2.4, you can avoid this fancy error
81 For python versions earlier than 2.4, you can avoid this fancy error
82 handling by setting the module-level global HANDLE_ERRORS to zero.
82 handling by setting the module-level global HANDLE_ERRORS to zero.
83 You see, prior to 2.4, it's the HTTP Handler's job to determine what
83 You see, prior to 2.4, it's the HTTP Handler's job to determine what
84 to handle specially, and what to just pass up. HANDLE_ERRORS == 0
84 to handle specially, and what to just pass up. HANDLE_ERRORS == 0
85 means "pass everything up". In python 2.4, however, this job no
85 means "pass everything up". In python 2.4, however, this job no
86 longer belongs to the HTTP Handler and is now done by a NEW handler,
86 longer belongs to the HTTP Handler and is now done by a NEW handler,
87 HTTPErrorProcessor. Here's the bottom line:
87 HTTPErrorProcessor. Here's the bottom line:
88
88
89 python version < 2.4
89 python version < 2.4
90 HANDLE_ERRORS == 1 (default) pass up 200, treat the rest as
90 HANDLE_ERRORS == 1 (default) pass up 200, treat the rest as
91 errors
91 errors
92 HANDLE_ERRORS == 0 pass everything up, error processing is
92 HANDLE_ERRORS == 0 pass everything up, error processing is
93 left to the calling code
93 left to the calling code
94 python version >= 2.4
94 python version >= 2.4
95 HANDLE_ERRORS == 1 pass up 200, treat the rest as errors
95 HANDLE_ERRORS == 1 pass up 200, treat the rest as errors
96 HANDLE_ERRORS == 0 (default) pass everything up, let the
96 HANDLE_ERRORS == 0 (default) pass everything up, let the
97 other handlers (specifically,
97 other handlers (specifically,
98 HTTPErrorProcessor) decide what to do
98 HTTPErrorProcessor) decide what to do
99
99
100 In practice, setting the variable either way makes little difference
100 In practice, setting the variable either way makes little difference
101 in python 2.4, so for the most consistent behavior across versions,
101 in python 2.4, so for the most consistent behavior across versions,
102 you probably just want to use the defaults, which will give you
102 you probably just want to use the defaults, which will give you
103 exceptions on errors.
103 exceptions on errors.
104
104
105 """
105 """
106
106
107 # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $
107 # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $
108
108
109 import urllib2
109 import urllib2
110 import httplib
110 import httplib
111 import socket
111 import socket
112 import thread
112 import thread
113
113
114 DEBUG = None
114 DEBUG = None
115
115
116 import sys
116 import sys
117 if sys.version_info < (2, 4): HANDLE_ERRORS = 1
117 if sys.version_info < (2, 4): HANDLE_ERRORS = 1
118 else: HANDLE_ERRORS = 0
118 else: HANDLE_ERRORS = 0
119
119
120 class ConnectionManager:
120 class ConnectionManager:
121 """
121 """
122 The connection manager must be able to:
122 The connection manager must be able to:
123 * keep track of all existing
123 * keep track of all existing
124 """
124 """
125 def __init__(self):
125 def __init__(self):
126 self._lock = thread.allocate_lock()
126 self._lock = thread.allocate_lock()
127 self._hostmap = {} # map hosts to a list of connections
127 self._hostmap = {} # map hosts to a list of connections
128 self._connmap = {} # map connections to host
128 self._connmap = {} # map connections to host
129 self._readymap = {} # map connection to ready state
129 self._readymap = {} # map connection to ready state
130
130
131 def add(self, host, connection, ready):
131 def add(self, host, connection, ready):
132 self._lock.acquire()
132 self._lock.acquire()
133 try:
133 try:
134 if not host in self._hostmap: self._hostmap[host] = []
134 if not host in self._hostmap: self._hostmap[host] = []
135 self._hostmap[host].append(connection)
135 self._hostmap[host].append(connection)
136 self._connmap[connection] = host
136 self._connmap[connection] = host
137 self._readymap[connection] = ready
137 self._readymap[connection] = ready
138 finally:
138 finally:
139 self._lock.release()
139 self._lock.release()
140
140
141 def remove(self, connection):
141 def remove(self, connection):
142 self._lock.acquire()
142 self._lock.acquire()
143 try:
143 try:
144 try:
144 try:
145 host = self._connmap[connection]
145 host = self._connmap[connection]
146 except KeyError:
146 except KeyError:
147 pass
147 pass
148 else:
148 else:
149 del self._connmap[connection]
149 del self._connmap[connection]
150 del self._readymap[connection]
150 del self._readymap[connection]
151 self._hostmap[host].remove(connection)
151 self._hostmap[host].remove(connection)
152 if not self._hostmap[host]: del self._hostmap[host]
152 if not self._hostmap[host]: del self._hostmap[host]
153 finally:
153 finally:
154 self._lock.release()
154 self._lock.release()
155
155
156 def set_ready(self, connection, ready):
156 def set_ready(self, connection, ready):
157 try: self._readymap[connection] = ready
157 try: self._readymap[connection] = ready
158 except KeyError: pass
158 except KeyError: pass
159
159
160 def get_ready_conn(self, host):
160 def get_ready_conn(self, host):
161 conn = None
161 conn = None
162 self._lock.acquire()
162 self._lock.acquire()
163 try:
163 try:
164 if host in self._hostmap:
164 if host in self._hostmap:
165 for c in self._hostmap[host]:
165 for c in self._hostmap[host]:
166 if self._readymap[c]:
166 if self._readymap[c]:
167 self._readymap[c] = 0
167 self._readymap[c] = 0
168 conn = c
168 conn = c
169 break
169 break
170 finally:
170 finally:
171 self._lock.release()
171 self._lock.release()
172 return conn
172 return conn
173
173
174 def get_all(self, host=None):
174 def get_all(self, host=None):
175 if host:
175 if host:
176 return list(self._hostmap.get(host, []))
176 return list(self._hostmap.get(host, []))
177 else:
177 else:
178 return dict(self._hostmap)
178 return dict(self._hostmap)
179
179
180 class KeepAliveHandler:
180 class KeepAliveHandler:
181 def __init__(self):
181 def __init__(self):
182 self._cm = ConnectionManager()
182 self._cm = ConnectionManager()
183
183
184 #### Connection Management
184 #### Connection Management
185 def open_connections(self):
185 def open_connections(self):
186 """return a list of connected hosts and the number of connections
186 """return a list of connected hosts and the number of connections
187 to each. [('foo.com:80', 2), ('bar.org', 1)]"""
187 to each. [('foo.com:80', 2), ('bar.org', 1)]"""
188 return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
188 return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
189
189
190 def close_connection(self, host):
190 def close_connection(self, host):
191 """close connection(s) to <host>
191 """close connection(s) to <host>
192 host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
192 host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
193 no error occurs if there is no connection to that host."""
193 no error occurs if there is no connection to that host."""
194 for h in self._cm.get_all(host):
194 for h in self._cm.get_all(host):
195 self._cm.remove(h)
195 self._cm.remove(h)
196 h.close()
196 h.close()
197
197
198 def close_all(self):
198 def close_all(self):
199 """close all open connections"""
199 """close all open connections"""
200 for host, conns in self._cm.get_all().iteritems():
200 for host, conns in self._cm.get_all().iteritems():
201 for h in conns:
201 for h in conns:
202 self._cm.remove(h)
202 self._cm.remove(h)
203 h.close()
203 h.close()
204
204
205 def _request_closed(self, request, host, connection):
205 def _request_closed(self, request, host, connection):
206 """tells us that this request is now closed and the the
206 """tells us that this request is now closed and the the
207 connection is ready for another request"""
207 connection is ready for another request"""
208 self._cm.set_ready(connection, 1)
208 self._cm.set_ready(connection, 1)
209
209
210 def _remove_connection(self, host, connection, close=0):
210 def _remove_connection(self, host, connection, close=0):
211 if close: connection.close()
211 if close: connection.close()
212 self._cm.remove(connection)
212 self._cm.remove(connection)
213
213
214 #### Transaction Execution
214 #### Transaction Execution
215 def http_open(self, req):
215 def http_open(self, req):
216 return self.do_open(HTTPConnection, req)
216 return self.do_open(HTTPConnection, req)
217
217
218 def do_open(self, http_class, req):
218 def do_open(self, http_class, req):
219 host = req.get_host()
219 host = req.get_host()
220 if not host:
220 if not host:
221 raise urllib2.URLError('no host given')
221 raise urllib2.URLError('no host given')
222
222
223 try:
223 try:
224 h = self._cm.get_ready_conn(host)
224 h = self._cm.get_ready_conn(host)
225 while h:
225 while h:
226 r = self._reuse_connection(h, req, host)
226 r = self._reuse_connection(h, req, host)
227
227
228 # if this response is non-None, then it worked and we're
228 # if this response is non-None, then it worked and we're
229 # done. Break out, skipping the else block.
229 # done. Break out, skipping the else block.
230 if r: break
230 if r: break
231
231
232 # connection is bad - possibly closed by server
232 # connection is bad - possibly closed by server
233 # discard it and ask for the next free connection
233 # discard it and ask for the next free connection
234 h.close()
234 h.close()
235 self._cm.remove(h)
235 self._cm.remove(h)
236 h = self._cm.get_ready_conn(host)
236 h = self._cm.get_ready_conn(host)
237 else:
237 else:
238 # no (working) free connections were found. Create a new one.
238 # no (working) free connections were found. Create a new one.
239 h = http_class(host)
239 h = http_class(host)
240 if DEBUG: DEBUG.info("creating new connection to %s (%d)",
240 if DEBUG: DEBUG.info("creating new connection to %s (%d)",
241 host, id(h))
241 host, id(h))
242 self._cm.add(host, h, 0)
242 self._cm.add(host, h, 0)
243 self._start_transaction(h, req)
243 self._start_transaction(h, req)
244 r = h.getresponse()
244 r = h.getresponse()
245 except (socket.error, httplib.HTTPException), err:
245 except (socket.error, httplib.HTTPException), err:
246 raise urllib2.URLError(err)
246 raise urllib2.URLError(err)
247
247
248 # if not a persistent connection, don't try to reuse it
248 # if not a persistent connection, don't try to reuse it
249 if r.will_close: self._cm.remove(h)
249 if r.will_close: self._cm.remove(h)
250
250
251 if DEBUG: DEBUG.info("STATUS: %s, %s", r.status, r.reason)
251 if DEBUG: DEBUG.info("STATUS: %s, %s", r.status, r.reason)
252 r._handler = self
252 r._handler = self
253 r._host = host
253 r._host = host
254 r._url = req.get_full_url()
254 r._url = req.get_full_url()
255 r._connection = h
255 r._connection = h
256 r.code = r.status
256 r.code = r.status
257 r.headers = r.msg
257 r.headers = r.msg
258 r.msg = r.reason
258 r.msg = r.reason
259
259
260 if r.status == 200 or not HANDLE_ERRORS:
260 if r.status == 200 or not HANDLE_ERRORS:
261 return r
261 return r
262 else:
262 else:
263 return self.parent.error('http', req, r,
263 return self.parent.error('http', req, r,
264 r.status, r.msg, r.headers)
264 r.status, r.msg, r.headers)
265
265
266 def _reuse_connection(self, h, req, host):
266 def _reuse_connection(self, h, req, host):
267 """start the transaction with a re-used connection
267 """start the transaction with a re-used connection
268 return a response object (r) upon success or None on failure.
268 return a response object (r) upon success or None on failure.
269 This DOES not close or remove bad connections in cases where
269 This DOES not close or remove bad connections in cases where
270 it returns. However, if an unexpected exception occurs, it
270 it returns. However, if an unexpected exception occurs, it
271 will close and remove the connection before re-raising.
271 will close and remove the connection before re-raising.
272 """
272 """
273 try:
273 try:
274 self._start_transaction(h, req)
274 self._start_transaction(h, req)
275 r = h.getresponse()
275 r = h.getresponse()
276 # note: just because we got something back doesn't mean it
276 # note: just because we got something back doesn't mean it
277 # worked. We'll check the version below, too.
277 # worked. We'll check the version below, too.
278 except (socket.error, httplib.HTTPException):
278 except (socket.error, httplib.HTTPException):
279 r = None
279 r = None
280 except:
280 except:
281 # adding this block just in case we've missed
281 # adding this block just in case we've missed
282 # something we will still raise the exception, but
282 # something we will still raise the exception, but
283 # lets try and close the connection and remove it
283 # lets try and close the connection and remove it
284 # first. We previously got into a nasty loop
284 # first. We previously got into a nasty loop
285 # where an exception was uncaught, and so the
285 # where an exception was uncaught, and so the
286 # connection stayed open. On the next try, the
286 # connection stayed open. On the next try, the
287 # same exception was raised, etc. The tradeoff is
287 # same exception was raised, etc. The tradeoff is
288 # that it's now possible this call will raise
288 # that it's now possible this call will raise
289 # a DIFFERENT exception
289 # a DIFFERENT exception
290 if DEBUG: DEBUG.error("unexpected exception - closing " + \
290 if DEBUG: DEBUG.error("unexpected exception - closing " + \
291 "connection to %s (%d)", host, id(h))
291 "connection to %s (%d)", host, id(h))
292 self._cm.remove(h)
292 self._cm.remove(h)
293 h.close()
293 h.close()
294 raise
294 raise
295
295
296 if r is None or r.version == 9:
296 if r is None or r.version == 9:
297 # httplib falls back to assuming HTTP 0.9 if it gets a
297 # httplib falls back to assuming HTTP 0.9 if it gets a
298 # bad header back. This is most likely to happen if
298 # bad header back. This is most likely to happen if
299 # the socket has been closed by the server since we
299 # the socket has been closed by the server since we
300 # last used the connection.
300 # last used the connection.
301 if DEBUG: DEBUG.info("failed to re-use connection to %s (%d)",
301 if DEBUG: DEBUG.info("failed to re-use connection to %s (%d)",
302 host, id(h))
302 host, id(h))
303 r = None
303 r = None
304 else:
304 else:
305 if DEBUG: DEBUG.info("re-using connection to %s (%d)", host, id(h))
305 if DEBUG: DEBUG.info("re-using connection to %s (%d)", host, id(h))
306
306
307 return r
307 return r
308
308
309 def _start_transaction(self, h, req):
309 def _start_transaction(self, h, req):
310 # What follows mostly reimplements HTTPConnection.request()
311 # except it adds self.parent.addheaders in the mix.
312 headers = req.headers.copy()
313 if sys.version_info >= (2, 4):
314 headers.update(req.unredirected_hdrs)
315 headers.update(self.parent.addheaders)
316 headers = dict((n.lower(), v) for n,v in headers.items())
317 skipheaders = {}
318 for n in ('host', 'accept-encoding'):
319 if n in headers:
320 skipheaders['skip_' + n.replace('-', '_')] = 1
310 try:
321 try:
311 if req.has_data():
322 if req.has_data():
312 data = req.get_data()
323 data = req.get_data()
313 h.putrequest('POST', req.get_selector())
324 h.putrequest('POST', req.get_selector(), **skipheaders)
314 if 'Content-type' not in req.headers:
325 if 'content-type' not in headers:
315 h.putheader('Content-type',
326 h.putheader('Content-type',
316 'application/x-www-form-urlencoded')
327 'application/x-www-form-urlencoded')
317 if 'Content-length' not in req.headers:
328 if 'content-length' not in headers:
318 h.putheader('Content-length', '%d' % len(data))
329 h.putheader('Content-length', '%d' % len(data))
319 else:
330 else:
320 h.putrequest('GET', req.get_selector())
331 h.putrequest('GET', req.get_selector(), **skipheaders)
321 except (socket.error), err:
332 except (socket.error), err:
322 raise urllib2.URLError(err)
333 raise urllib2.URLError(err)
323
334 for k, v in headers.items():
324 for args in self.parent.addheaders:
325 h.putheader(*args)
326 for k, v in req.headers.items():
327 h.putheader(k, v)
335 h.putheader(k, v)
328 h.endheaders()
336 h.endheaders()
329 if req.has_data():
337 if req.has_data():
330 h.send(data)
338 h.send(data)
331
339
332 class HTTPHandler(KeepAliveHandler, urllib2.HTTPHandler):
340 class HTTPHandler(KeepAliveHandler, urllib2.HTTPHandler):
333 pass
341 pass
334
342
335 class HTTPResponse(httplib.HTTPResponse):
343 class HTTPResponse(httplib.HTTPResponse):
336 # we need to subclass HTTPResponse in order to
344 # we need to subclass HTTPResponse in order to
337 # 1) add readline() and readlines() methods
345 # 1) add readline() and readlines() methods
338 # 2) add close_connection() methods
346 # 2) add close_connection() methods
339 # 3) add info() and geturl() methods
347 # 3) add info() and geturl() methods
340
348
341 # in order to add readline(), read must be modified to deal with a
349 # in order to add readline(), read must be modified to deal with a
342 # buffer. example: readline must read a buffer and then spit back
350 # buffer. example: readline must read a buffer and then spit back
343 # one line at a time. The only real alternative is to read one
351 # one line at a time. The only real alternative is to read one
344 # BYTE at a time (ick). Once something has been read, it can't be
352 # BYTE at a time (ick). Once something has been read, it can't be
345 # put back (ok, maybe it can, but that's even uglier than this),
353 # put back (ok, maybe it can, but that's even uglier than this),
346 # so if you THEN do a normal read, you must first take stuff from
354 # so if you THEN do a normal read, you must first take stuff from
347 # the buffer.
355 # the buffer.
348
356
349 # the read method wraps the original to accomodate buffering,
357 # the read method wraps the original to accomodate buffering,
350 # although read() never adds to the buffer.
358 # although read() never adds to the buffer.
351 # Both readline and readlines have been stolen with almost no
359 # Both readline and readlines have been stolen with almost no
352 # modification from socket.py
360 # modification from socket.py
353
361
354
362
355 def __init__(self, sock, debuglevel=0, strict=0, method=None):
363 def __init__(self, sock, debuglevel=0, strict=0, method=None):
356 if method: # the httplib in python 2.3 uses the method arg
364 if method: # the httplib in python 2.3 uses the method arg
357 httplib.HTTPResponse.__init__(self, sock, debuglevel, method)
365 httplib.HTTPResponse.__init__(self, sock, debuglevel, method)
358 else: # 2.2 doesn't
366 else: # 2.2 doesn't
359 httplib.HTTPResponse.__init__(self, sock, debuglevel)
367 httplib.HTTPResponse.__init__(self, sock, debuglevel)
360 self.fileno = sock.fileno
368 self.fileno = sock.fileno
361 self.code = None
369 self.code = None
362 self._rbuf = ''
370 self._rbuf = ''
363 self._rbufsize = 8096
371 self._rbufsize = 8096
364 self._handler = None # inserted by the handler later
372 self._handler = None # inserted by the handler later
365 self._host = None # (same)
373 self._host = None # (same)
366 self._url = None # (same)
374 self._url = None # (same)
367 self._connection = None # (same)
375 self._connection = None # (same)
368
376
369 _raw_read = httplib.HTTPResponse.read
377 _raw_read = httplib.HTTPResponse.read
370
378
371 def close(self):
379 def close(self):
372 if self.fp:
380 if self.fp:
373 self.fp.close()
381 self.fp.close()
374 self.fp = None
382 self.fp = None
375 if self._handler:
383 if self._handler:
376 self._handler._request_closed(self, self._host,
384 self._handler._request_closed(self, self._host,
377 self._connection)
385 self._connection)
378
386
379 def close_connection(self):
387 def close_connection(self):
380 self._handler._remove_connection(self._host, self._connection, close=1)
388 self._handler._remove_connection(self._host, self._connection, close=1)
381 self.close()
389 self.close()
382
390
383 def info(self):
391 def info(self):
384 return self.headers
392 return self.headers
385
393
386 def geturl(self):
394 def geturl(self):
387 return self._url
395 return self._url
388
396
389 def read(self, amt=None):
397 def read(self, amt=None):
390 # the _rbuf test is only in this first if for speed. It's not
398 # the _rbuf test is only in this first if for speed. It's not
391 # logically necessary
399 # logically necessary
392 if self._rbuf and not amt is None:
400 if self._rbuf and not amt is None:
393 L = len(self._rbuf)
401 L = len(self._rbuf)
394 if amt > L:
402 if amt > L:
395 amt -= L
403 amt -= L
396 else:
404 else:
397 s = self._rbuf[:amt]
405 s = self._rbuf[:amt]
398 self._rbuf = self._rbuf[amt:]
406 self._rbuf = self._rbuf[amt:]
399 return s
407 return s
400
408
401 s = self._rbuf + self._raw_read(amt)
409 s = self._rbuf + self._raw_read(amt)
402 self._rbuf = ''
410 self._rbuf = ''
403 return s
411 return s
404
412
405 # stolen from Python SVN #68532 to fix issue1088
413 # stolen from Python SVN #68532 to fix issue1088
406 def _read_chunked(self, amt):
414 def _read_chunked(self, amt):
407 chunk_left = self.chunk_left
415 chunk_left = self.chunk_left
408 value = ''
416 value = ''
409
417
410 # XXX This accumulates chunks by repeated string concatenation,
418 # XXX This accumulates chunks by repeated string concatenation,
411 # which is not efficient as the number or size of chunks gets big.
419 # which is not efficient as the number or size of chunks gets big.
412 while True:
420 while True:
413 if chunk_left is None:
421 if chunk_left is None:
414 line = self.fp.readline()
422 line = self.fp.readline()
415 i = line.find(';')
423 i = line.find(';')
416 if i >= 0:
424 if i >= 0:
417 line = line[:i] # strip chunk-extensions
425 line = line[:i] # strip chunk-extensions
418 try:
426 try:
419 chunk_left = int(line, 16)
427 chunk_left = int(line, 16)
420 except ValueError:
428 except ValueError:
421 # close the connection as protocol synchronisation is
429 # close the connection as protocol synchronisation is
422 # probably lost
430 # probably lost
423 self.close()
431 self.close()
424 raise httplib.IncompleteRead(value)
432 raise httplib.IncompleteRead(value)
425 if chunk_left == 0:
433 if chunk_left == 0:
426 break
434 break
427 if amt is None:
435 if amt is None:
428 value += self._safe_read(chunk_left)
436 value += self._safe_read(chunk_left)
429 elif amt < chunk_left:
437 elif amt < chunk_left:
430 value += self._safe_read(amt)
438 value += self._safe_read(amt)
431 self.chunk_left = chunk_left - amt
439 self.chunk_left = chunk_left - amt
432 return value
440 return value
433 elif amt == chunk_left:
441 elif amt == chunk_left:
434 value += self._safe_read(amt)
442 value += self._safe_read(amt)
435 self._safe_read(2) # toss the CRLF at the end of the chunk
443 self._safe_read(2) # toss the CRLF at the end of the chunk
436 self.chunk_left = None
444 self.chunk_left = None
437 return value
445 return value
438 else:
446 else:
439 value += self._safe_read(chunk_left)
447 value += self._safe_read(chunk_left)
440 amt -= chunk_left
448 amt -= chunk_left
441
449
442 # we read the whole chunk, get another
450 # we read the whole chunk, get another
443 self._safe_read(2) # toss the CRLF at the end of the chunk
451 self._safe_read(2) # toss the CRLF at the end of the chunk
444 chunk_left = None
452 chunk_left = None
445
453
446 # read and discard trailer up to the CRLF terminator
454 # read and discard trailer up to the CRLF terminator
447 ### note: we shouldn't have any trailers!
455 ### note: we shouldn't have any trailers!
448 while True:
456 while True:
449 line = self.fp.readline()
457 line = self.fp.readline()
450 if not line:
458 if not line:
451 # a vanishingly small number of sites EOF without
459 # a vanishingly small number of sites EOF without
452 # sending the trailer
460 # sending the trailer
453 break
461 break
454 if line == '\r\n':
462 if line == '\r\n':
455 break
463 break
456
464
457 # we read everything; close the "file"
465 # we read everything; close the "file"
458 self.close()
466 self.close()
459
467
460 return value
468 return value
461
469
462 def readline(self, limit=-1):
470 def readline(self, limit=-1):
463 i = self._rbuf.find('\n')
471 i = self._rbuf.find('\n')
464 while i < 0 and not (0 < limit <= len(self._rbuf)):
472 while i < 0 and not (0 < limit <= len(self._rbuf)):
465 new = self._raw_read(self._rbufsize)
473 new = self._raw_read(self._rbufsize)
466 if not new: break
474 if not new: break
467 i = new.find('\n')
475 i = new.find('\n')
468 if i >= 0: i = i + len(self._rbuf)
476 if i >= 0: i = i + len(self._rbuf)
469 self._rbuf = self._rbuf + new
477 self._rbuf = self._rbuf + new
470 if i < 0: i = len(self._rbuf)
478 if i < 0: i = len(self._rbuf)
471 else: i = i+1
479 else: i = i+1
472 if 0 <= limit < len(self._rbuf): i = limit
480 if 0 <= limit < len(self._rbuf): i = limit
473 data, self._rbuf = self._rbuf[:i], self._rbuf[i:]
481 data, self._rbuf = self._rbuf[:i], self._rbuf[i:]
474 return data
482 return data
475
483
476 def readlines(self, sizehint = 0):
484 def readlines(self, sizehint = 0):
477 total = 0
485 total = 0
478 list = []
486 list = []
479 while 1:
487 while 1:
480 line = self.readline()
488 line = self.readline()
481 if not line: break
489 if not line: break
482 list.append(line)
490 list.append(line)
483 total += len(line)
491 total += len(line)
484 if sizehint and total >= sizehint:
492 if sizehint and total >= sizehint:
485 break
493 break
486 return list
494 return list
487
495
488
496
489 class HTTPConnection(httplib.HTTPConnection):
497 class HTTPConnection(httplib.HTTPConnection):
490 # use the modified response class
498 # use the modified response class
491 response_class = HTTPResponse
499 response_class = HTTPResponse
492
500
493 #########################################################################
501 #########################################################################
494 ##### TEST FUNCTIONS
502 ##### TEST FUNCTIONS
495 #########################################################################
503 #########################################################################
496
504
497 def error_handler(url):
505 def error_handler(url):
498 global HANDLE_ERRORS
506 global HANDLE_ERRORS
499 orig = HANDLE_ERRORS
507 orig = HANDLE_ERRORS
500 keepalive_handler = HTTPHandler()
508 keepalive_handler = HTTPHandler()
501 opener = urllib2.build_opener(keepalive_handler)
509 opener = urllib2.build_opener(keepalive_handler)
502 urllib2.install_opener(opener)
510 urllib2.install_opener(opener)
503 pos = {0: 'off', 1: 'on'}
511 pos = {0: 'off', 1: 'on'}
504 for i in (0, 1):
512 for i in (0, 1):
505 print " fancy error handling %s (HANDLE_ERRORS = %i)" % (pos[i], i)
513 print " fancy error handling %s (HANDLE_ERRORS = %i)" % (pos[i], i)
506 HANDLE_ERRORS = i
514 HANDLE_ERRORS = i
507 try:
515 try:
508 fo = urllib2.urlopen(url)
516 fo = urllib2.urlopen(url)
509 fo.read()
517 fo.read()
510 fo.close()
518 fo.close()
511 try: status, reason = fo.status, fo.reason
519 try: status, reason = fo.status, fo.reason
512 except AttributeError: status, reason = None, None
520 except AttributeError: status, reason = None, None
513 except IOError, e:
521 except IOError, e:
514 print " EXCEPTION: %s" % e
522 print " EXCEPTION: %s" % e
515 raise
523 raise
516 else:
524 else:
517 print " status = %s, reason = %s" % (status, reason)
525 print " status = %s, reason = %s" % (status, reason)
518 HANDLE_ERRORS = orig
526 HANDLE_ERRORS = orig
519 hosts = keepalive_handler.open_connections()
527 hosts = keepalive_handler.open_connections()
520 print "open connections:", hosts
528 print "open connections:", hosts
521 keepalive_handler.close_all()
529 keepalive_handler.close_all()
522
530
523 def continuity(url):
531 def continuity(url):
524 from util import md5
532 from util import md5
525 format = '%25s: %s'
533 format = '%25s: %s'
526
534
527 # first fetch the file with the normal http handler
535 # first fetch the file with the normal http handler
528 opener = urllib2.build_opener()
536 opener = urllib2.build_opener()
529 urllib2.install_opener(opener)
537 urllib2.install_opener(opener)
530 fo = urllib2.urlopen(url)
538 fo = urllib2.urlopen(url)
531 foo = fo.read()
539 foo = fo.read()
532 fo.close()
540 fo.close()
533 m = md5.new(foo)
541 m = md5.new(foo)
534 print format % ('normal urllib', m.hexdigest())
542 print format % ('normal urllib', m.hexdigest())
535
543
536 # now install the keepalive handler and try again
544 # now install the keepalive handler and try again
537 opener = urllib2.build_opener(HTTPHandler())
545 opener = urllib2.build_opener(HTTPHandler())
538 urllib2.install_opener(opener)
546 urllib2.install_opener(opener)
539
547
540 fo = urllib2.urlopen(url)
548 fo = urllib2.urlopen(url)
541 foo = fo.read()
549 foo = fo.read()
542 fo.close()
550 fo.close()
543 m = md5.new(foo)
551 m = md5.new(foo)
544 print format % ('keepalive read', m.hexdigest())
552 print format % ('keepalive read', m.hexdigest())
545
553
546 fo = urllib2.urlopen(url)
554 fo = urllib2.urlopen(url)
547 foo = ''
555 foo = ''
548 while 1:
556 while 1:
549 f = fo.readline()
557 f = fo.readline()
550 if f: foo = foo + f
558 if f: foo = foo + f
551 else: break
559 else: break
552 fo.close()
560 fo.close()
553 m = md5.new(foo)
561 m = md5.new(foo)
554 print format % ('keepalive readline', m.hexdigest())
562 print format % ('keepalive readline', m.hexdigest())
555
563
556 def comp(N, url):
564 def comp(N, url):
557 print ' making %i connections to:\n %s' % (N, url)
565 print ' making %i connections to:\n %s' % (N, url)
558
566
559 sys.stdout.write(' first using the normal urllib handlers')
567 sys.stdout.write(' first using the normal urllib handlers')
560 # first use normal opener
568 # first use normal opener
561 opener = urllib2.build_opener()
569 opener = urllib2.build_opener()
562 urllib2.install_opener(opener)
570 urllib2.install_opener(opener)
563 t1 = fetch(N, url)
571 t1 = fetch(N, url)
564 print ' TIME: %.3f s' % t1
572 print ' TIME: %.3f s' % t1
565
573
566 sys.stdout.write(' now using the keepalive handler ')
574 sys.stdout.write(' now using the keepalive handler ')
567 # now install the keepalive handler and try again
575 # now install the keepalive handler and try again
568 opener = urllib2.build_opener(HTTPHandler())
576 opener = urllib2.build_opener(HTTPHandler())
569 urllib2.install_opener(opener)
577 urllib2.install_opener(opener)
570 t2 = fetch(N, url)
578 t2 = fetch(N, url)
571 print ' TIME: %.3f s' % t2
579 print ' TIME: %.3f s' % t2
572 print ' improvement factor: %.2f' % (t1/t2, )
580 print ' improvement factor: %.2f' % (t1/t2, )
573
581
574 def fetch(N, url, delay=0):
582 def fetch(N, url, delay=0):
575 import time
583 import time
576 lens = []
584 lens = []
577 starttime = time.time()
585 starttime = time.time()
578 for i in range(N):
586 for i in range(N):
579 if delay and i > 0: time.sleep(delay)
587 if delay and i > 0: time.sleep(delay)
580 fo = urllib2.urlopen(url)
588 fo = urllib2.urlopen(url)
581 foo = fo.read()
589 foo = fo.read()
582 fo.close()
590 fo.close()
583 lens.append(len(foo))
591 lens.append(len(foo))
584 diff = time.time() - starttime
592 diff = time.time() - starttime
585
593
586 j = 0
594 j = 0
587 for i in lens[1:]:
595 for i in lens[1:]:
588 j = j + 1
596 j = j + 1
589 if not i == lens[0]:
597 if not i == lens[0]:
590 print "WARNING: inconsistent length on read %i: %i" % (j, i)
598 print "WARNING: inconsistent length on read %i: %i" % (j, i)
591
599
592 return diff
600 return diff
593
601
594 def test_timeout(url):
602 def test_timeout(url):
595 global DEBUG
603 global DEBUG
596 dbbackup = DEBUG
604 dbbackup = DEBUG
597 class FakeLogger:
605 class FakeLogger:
598 def debug(self, msg, *args): print msg % args
606 def debug(self, msg, *args): print msg % args
599 info = warning = error = debug
607 info = warning = error = debug
600 DEBUG = FakeLogger()
608 DEBUG = FakeLogger()
601 print " fetching the file to establish a connection"
609 print " fetching the file to establish a connection"
602 fo = urllib2.urlopen(url)
610 fo = urllib2.urlopen(url)
603 data1 = fo.read()
611 data1 = fo.read()
604 fo.close()
612 fo.close()
605
613
606 i = 20
614 i = 20
607 print " waiting %i seconds for the server to close the connection" % i
615 print " waiting %i seconds for the server to close the connection" % i
608 while i > 0:
616 while i > 0:
609 sys.stdout.write('\r %2i' % i)
617 sys.stdout.write('\r %2i' % i)
610 sys.stdout.flush()
618 sys.stdout.flush()
611 time.sleep(1)
619 time.sleep(1)
612 i -= 1
620 i -= 1
613 sys.stderr.write('\r')
621 sys.stderr.write('\r')
614
622
615 print " fetching the file a second time"
623 print " fetching the file a second time"
616 fo = urllib2.urlopen(url)
624 fo = urllib2.urlopen(url)
617 data2 = fo.read()
625 data2 = fo.read()
618 fo.close()
626 fo.close()
619
627
620 if data1 == data2:
628 if data1 == data2:
621 print ' data are identical'
629 print ' data are identical'
622 else:
630 else:
623 print ' ERROR: DATA DIFFER'
631 print ' ERROR: DATA DIFFER'
624
632
625 DEBUG = dbbackup
633 DEBUG = dbbackup
626
634
627
635
628 def test(url, N=10):
636 def test(url, N=10):
629 print "checking error hander (do this on a non-200)"
637 print "checking error hander (do this on a non-200)"
630 try: error_handler(url)
638 try: error_handler(url)
631 except IOError:
639 except IOError:
632 print "exiting - exception will prevent further tests"
640 print "exiting - exception will prevent further tests"
633 sys.exit()
641 sys.exit()
634 print
642 print
635 print "performing continuity test (making sure stuff isn't corrupted)"
643 print "performing continuity test (making sure stuff isn't corrupted)"
636 continuity(url)
644 continuity(url)
637 print
645 print
638 print "performing speed comparison"
646 print "performing speed comparison"
639 comp(N, url)
647 comp(N, url)
640 print
648 print
641 print "performing dropped-connection check"
649 print "performing dropped-connection check"
642 test_timeout(url)
650 test_timeout(url)
643
651
644 if __name__ == '__main__':
652 if __name__ == '__main__':
645 import time
653 import time
646 import sys
654 import sys
647 try:
655 try:
648 N = int(sys.argv[1])
656 N = int(sys.argv[1])
649 url = sys.argv[2]
657 url = sys.argv[2]
650 except:
658 except:
651 print "%s <integer> <url>" % sys.argv[0]
659 print "%s <integer> <url>" % sys.argv[0]
652 else:
660 else:
653 test(url, N)
661 test(url, N)
General Comments 0
You need to be logged in to leave comments. Login now