##// END OF EJS Templates
keepalive: discard legacy Python support for error handling...
Augie Fackler -
r30487:88a448a1 default
parent child Browse files
Show More
@@ -1,758 +1,692 b''
1 # This library is free software; you can redistribute it and/or
1 # This library is free software; you can redistribute it and/or
2 # modify it under the terms of the GNU Lesser General Public
2 # modify it under the terms of the GNU Lesser General Public
3 # License as published by the Free Software Foundation; either
3 # License as published by the Free Software Foundation; either
4 # version 2.1 of the License, or (at your option) any later version.
4 # version 2.1 of the License, or (at your option) any later version.
5 #
5 #
6 # This library is distributed in the hope that it will be useful,
6 # This library is distributed in the hope that it will be useful,
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9 # Lesser General Public License for more details.
9 # Lesser General Public License for more details.
10 #
10 #
11 # You should have received a copy of the GNU Lesser General Public
11 # You should have received a copy of the GNU Lesser General Public
12 # License along with this library; if not, see
12 # License along with this library; if not, see
13 # <http://www.gnu.org/licenses/>.
13 # <http://www.gnu.org/licenses/>.
14
14
15 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
15 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
16 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
16 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
17
17
18 # Modified by Benoit Boissinot:
18 # Modified by Benoit Boissinot:
19 # - fix for digest auth (inspired from urllib2.py @ Python v2.4)
19 # - fix for digest auth (inspired from urllib2.py @ Python v2.4)
20 # Modified by Dirkjan Ochtman:
20 # Modified by Dirkjan Ochtman:
21 # - import md5 function from a local util module
21 # - import md5 function from a local util module
22 # Modified by Augie Fackler:
22 # Modified by Augie Fackler:
23 # - add safesend method and use it to prevent broken pipe errors
23 # - add safesend method and use it to prevent broken pipe errors
24 # on large POST requests
24 # on large POST requests
25
25
26 """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
26 """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
27
27
28 >>> import urllib2
28 >>> import urllib2
29 >>> from keepalive import HTTPHandler
29 >>> from keepalive import HTTPHandler
30 >>> keepalive_handler = HTTPHandler()
30 >>> keepalive_handler = HTTPHandler()
31 >>> opener = urlreq.buildopener(keepalive_handler)
31 >>> opener = urlreq.buildopener(keepalive_handler)
32 >>> urlreq.installopener(opener)
32 >>> urlreq.installopener(opener)
33 >>>
33 >>>
34 >>> fo = urlreq.urlopen('http://www.python.org')
34 >>> fo = urlreq.urlopen('http://www.python.org')
35
35
36 If a connection to a given host is requested, and all of the existing
36 If a connection to a given host is requested, and all of the existing
37 connections are still in use, another connection will be opened. If
37 connections are still in use, another connection will be opened. If
38 the handler tries to use an existing connection but it fails in some
38 the handler tries to use an existing connection but it fails in some
39 way, it will be closed and removed from the pool.
39 way, it will be closed and removed from the pool.
40
40
41 To remove the handler, simply re-run build_opener with no arguments, and
41 To remove the handler, simply re-run build_opener with no arguments, and
42 install that opener.
42 install that opener.
43
43
44 You can explicitly close connections by using the close_connection()
44 You can explicitly close connections by using the close_connection()
45 method of the returned file-like object (described below) or you can
45 method of the returned file-like object (described below) or you can
46 use the handler methods:
46 use the handler methods:
47
47
48 close_connection(host)
48 close_connection(host)
49 close_all()
49 close_all()
50 open_connections()
50 open_connections()
51
51
52 NOTE: using the close_connection and close_all methods of the handler
52 NOTE: using the close_connection and close_all methods of the handler
53 should be done with care when using multiple threads.
53 should be done with care when using multiple threads.
54 * there is nothing that prevents another thread from creating new
54 * there is nothing that prevents another thread from creating new
55 connections immediately after connections are closed
55 connections immediately after connections are closed
56 * no checks are done to prevent in-use connections from being closed
56 * no checks are done to prevent in-use connections from being closed
57
57
58 >>> keepalive_handler.close_all()
58 >>> keepalive_handler.close_all()
59
59
60 EXTRA ATTRIBUTES AND METHODS
60 EXTRA ATTRIBUTES AND METHODS
61
61
62 Upon a status of 200, the object returned has a few additional
62 Upon a status of 200, the object returned has a few additional
63 attributes and methods, which should not be used if you want to
63 attributes and methods, which should not be used if you want to
64 remain consistent with the normal urllib2-returned objects:
64 remain consistent with the normal urllib2-returned objects:
65
65
66 close_connection() - close the connection to the host
66 close_connection() - close the connection to the host
67 readlines() - you know, readlines()
67 readlines() - you know, readlines()
68 status - the return status (i.e. 404)
68 status - the return status (i.e. 404)
69 reason - english translation of status (i.e. 'File not found')
69 reason - english translation of status (i.e. 'File not found')
70
70
71 If you want the best of both worlds, use this inside an
71 If you want the best of both worlds, use this inside an
72 AttributeError-catching try:
72 AttributeError-catching try:
73
73
74 >>> try: status = fo.status
74 >>> try: status = fo.status
75 >>> except AttributeError: status = None
75 >>> except AttributeError: status = None
76
76
77 Unfortunately, these are ONLY there if status == 200, so it's not
77 Unfortunately, these are ONLY there if status == 200, so it's not
78 easy to distinguish between non-200 responses. The reason is that
78 easy to distinguish between non-200 responses. The reason is that
79 urllib2 tries to do clever things with error codes 301, 302, 401,
79 urllib2 tries to do clever things with error codes 301, 302, 401,
80 and 407, and it wraps the object upon return.
80 and 407, and it wraps the object upon return.
81
82 For python versions earlier than 2.4, you can avoid this fancy error
83 handling by setting the module-level global HANDLE_ERRORS to zero.
84 You see, prior to 2.4, it's the HTTP Handler's job to determine what
85 to handle specially, and what to just pass up. HANDLE_ERRORS == 0
86 means "pass everything up". In python 2.4, however, this job no
87 longer belongs to the HTTP Handler and is now done by a NEW handler,
88 HTTPErrorProcessor. Here's the bottom line:
89
90 python version < 2.4
91 HANDLE_ERRORS == 1 (default) pass up 200, treat the rest as
92 errors
93 HANDLE_ERRORS == 0 pass everything up, error processing is
94 left to the calling code
95 python version >= 2.4
96 HANDLE_ERRORS == 1 pass up 200, treat the rest as errors
97 HANDLE_ERRORS == 0 (default) pass everything up, let the
98 other handlers (specifically,
99 HTTPErrorProcessor) decide what to do
100
101 In practice, setting the variable either way makes little difference
102 in python 2.4, so for the most consistent behavior across versions,
103 you probably just want to use the defaults, which will give you
104 exceptions on errors.
105
106 """
81 """
107
82
108 # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $
83 # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $
109
84
110 from __future__ import absolute_import, print_function
85 from __future__ import absolute_import, print_function
111
86
112 import errno
87 import errno
113 import hashlib
88 import hashlib
114 import socket
89 import socket
115 import sys
90 import sys
116 import threading
91 import threading
117
92
118 from . import (
93 from . import (
119 util,
94 util,
120 )
95 )
121
96
122 httplib = util.httplib
97 httplib = util.httplib
123 urlerr = util.urlerr
98 urlerr = util.urlerr
124 urlreq = util.urlreq
99 urlreq = util.urlreq
125
100
126 DEBUG = None
101 DEBUG = None
127
102
128 if sys.version_info < (2, 4):
129 HANDLE_ERRORS = 1
130 else: HANDLE_ERRORS = 0
131
132 class ConnectionManager(object):
103 class ConnectionManager(object):
133 """
104 """
134 The connection manager must be able to:
105 The connection manager must be able to:
135 * keep track of all existing
106 * keep track of all existing
136 """
107 """
137 def __init__(self):
108 def __init__(self):
138 self._lock = threading.Lock()
109 self._lock = threading.Lock()
139 self._hostmap = {} # map hosts to a list of connections
110 self._hostmap = {} # map hosts to a list of connections
140 self._connmap = {} # map connections to host
111 self._connmap = {} # map connections to host
141 self._readymap = {} # map connection to ready state
112 self._readymap = {} # map connection to ready state
142
113
143 def add(self, host, connection, ready):
114 def add(self, host, connection, ready):
144 self._lock.acquire()
115 self._lock.acquire()
145 try:
116 try:
146 if host not in self._hostmap:
117 if host not in self._hostmap:
147 self._hostmap[host] = []
118 self._hostmap[host] = []
148 self._hostmap[host].append(connection)
119 self._hostmap[host].append(connection)
149 self._connmap[connection] = host
120 self._connmap[connection] = host
150 self._readymap[connection] = ready
121 self._readymap[connection] = ready
151 finally:
122 finally:
152 self._lock.release()
123 self._lock.release()
153
124
154 def remove(self, connection):
125 def remove(self, connection):
155 self._lock.acquire()
126 self._lock.acquire()
156 try:
127 try:
157 try:
128 try:
158 host = self._connmap[connection]
129 host = self._connmap[connection]
159 except KeyError:
130 except KeyError:
160 pass
131 pass
161 else:
132 else:
162 del self._connmap[connection]
133 del self._connmap[connection]
163 del self._readymap[connection]
134 del self._readymap[connection]
164 self._hostmap[host].remove(connection)
135 self._hostmap[host].remove(connection)
165 if not self._hostmap[host]: del self._hostmap[host]
136 if not self._hostmap[host]: del self._hostmap[host]
166 finally:
137 finally:
167 self._lock.release()
138 self._lock.release()
168
139
169 def set_ready(self, connection, ready):
140 def set_ready(self, connection, ready):
170 try:
141 try:
171 self._readymap[connection] = ready
142 self._readymap[connection] = ready
172 except KeyError:
143 except KeyError:
173 pass
144 pass
174
145
175 def get_ready_conn(self, host):
146 def get_ready_conn(self, host):
176 conn = None
147 conn = None
177 self._lock.acquire()
148 self._lock.acquire()
178 try:
149 try:
179 if host in self._hostmap:
150 if host in self._hostmap:
180 for c in self._hostmap[host]:
151 for c in self._hostmap[host]:
181 if self._readymap[c]:
152 if self._readymap[c]:
182 self._readymap[c] = 0
153 self._readymap[c] = 0
183 conn = c
154 conn = c
184 break
155 break
185 finally:
156 finally:
186 self._lock.release()
157 self._lock.release()
187 return conn
158 return conn
188
159
189 def get_all(self, host=None):
160 def get_all(self, host=None):
190 if host:
161 if host:
191 return list(self._hostmap.get(host, []))
162 return list(self._hostmap.get(host, []))
192 else:
163 else:
193 return dict(self._hostmap)
164 return dict(self._hostmap)
194
165
195 class KeepAliveHandler(object):
166 class KeepAliveHandler(object):
196 def __init__(self):
167 def __init__(self):
197 self._cm = ConnectionManager()
168 self._cm = ConnectionManager()
198
169
199 #### Connection Management
170 #### Connection Management
200 def open_connections(self):
171 def open_connections(self):
201 """return a list of connected hosts and the number of connections
172 """return a list of connected hosts and the number of connections
202 to each. [('foo.com:80', 2), ('bar.org', 1)]"""
173 to each. [('foo.com:80', 2), ('bar.org', 1)]"""
203 return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
174 return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
204
175
205 def close_connection(self, host):
176 def close_connection(self, host):
206 """close connection(s) to <host>
177 """close connection(s) to <host>
207 host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
178 host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
208 no error occurs if there is no connection to that host."""
179 no error occurs if there is no connection to that host."""
209 for h in self._cm.get_all(host):
180 for h in self._cm.get_all(host):
210 self._cm.remove(h)
181 self._cm.remove(h)
211 h.close()
182 h.close()
212
183
213 def close_all(self):
184 def close_all(self):
214 """close all open connections"""
185 """close all open connections"""
215 for host, conns in self._cm.get_all().iteritems():
186 for host, conns in self._cm.get_all().iteritems():
216 for h in conns:
187 for h in conns:
217 self._cm.remove(h)
188 self._cm.remove(h)
218 h.close()
189 h.close()
219
190
220 def _request_closed(self, request, host, connection):
191 def _request_closed(self, request, host, connection):
221 """tells us that this request is now closed and that the
192 """tells us that this request is now closed and that the
222 connection is ready for another request"""
193 connection is ready for another request"""
223 self._cm.set_ready(connection, 1)
194 self._cm.set_ready(connection, 1)
224
195
225 def _remove_connection(self, host, connection, close=0):
196 def _remove_connection(self, host, connection, close=0):
226 if close:
197 if close:
227 connection.close()
198 connection.close()
228 self._cm.remove(connection)
199 self._cm.remove(connection)
229
200
230 #### Transaction Execution
201 #### Transaction Execution
231 def http_open(self, req):
202 def http_open(self, req):
232 return self.do_open(HTTPConnection, req)
203 return self.do_open(HTTPConnection, req)
233
204
234 def do_open(self, http_class, req):
205 def do_open(self, http_class, req):
235 host = req.get_host()
206 host = req.get_host()
236 if not host:
207 if not host:
237 raise urlerr.urlerror('no host given')
208 raise urlerr.urlerror('no host given')
238
209
239 try:
210 try:
240 h = self._cm.get_ready_conn(host)
211 h = self._cm.get_ready_conn(host)
241 while h:
212 while h:
242 r = self._reuse_connection(h, req, host)
213 r = self._reuse_connection(h, req, host)
243
214
244 # if this response is non-None, then it worked and we're
215 # if this response is non-None, then it worked and we're
245 # done. Break out, skipping the else block.
216 # done. Break out, skipping the else block.
246 if r:
217 if r:
247 break
218 break
248
219
249 # connection is bad - possibly closed by server
220 # connection is bad - possibly closed by server
250 # discard it and ask for the next free connection
221 # discard it and ask for the next free connection
251 h.close()
222 h.close()
252 self._cm.remove(h)
223 self._cm.remove(h)
253 h = self._cm.get_ready_conn(host)
224 h = self._cm.get_ready_conn(host)
254 else:
225 else:
255 # no (working) free connections were found. Create a new one.
226 # no (working) free connections were found. Create a new one.
256 h = http_class(host)
227 h = http_class(host)
257 if DEBUG:
228 if DEBUG:
258 DEBUG.info("creating new connection to %s (%d)",
229 DEBUG.info("creating new connection to %s (%d)",
259 host, id(h))
230 host, id(h))
260 self._cm.add(host, h, 0)
231 self._cm.add(host, h, 0)
261 self._start_transaction(h, req)
232 self._start_transaction(h, req)
262 r = h.getresponse()
233 r = h.getresponse()
263 except (socket.error, httplib.HTTPException) as err:
234 except (socket.error, httplib.HTTPException) as err:
264 raise urlerr.urlerror(err)
235 raise urlerr.urlerror(err)
265
236
266 # if not a persistent connection, don't try to reuse it
237 # if not a persistent connection, don't try to reuse it
267 if r.will_close:
238 if r.will_close:
268 self._cm.remove(h)
239 self._cm.remove(h)
269
240
270 if DEBUG:
241 if DEBUG:
271 DEBUG.info("STATUS: %s, %s", r.status, r.reason)
242 DEBUG.info("STATUS: %s, %s", r.status, r.reason)
272 r._handler = self
243 r._handler = self
273 r._host = host
244 r._host = host
274 r._url = req.get_full_url()
245 r._url = req.get_full_url()
275 r._connection = h
246 r._connection = h
276 r.code = r.status
247 r.code = r.status
277 r.headers = r.msg
248 r.headers = r.msg
278 r.msg = r.reason
249 r.msg = r.reason
279
250
280 if r.status == 200 or not HANDLE_ERRORS:
251 return r
281 return r
282 else:
283 return self.parent.error('http', req, r,
284 r.status, r.msg, r.headers)
285
252
286 def _reuse_connection(self, h, req, host):
253 def _reuse_connection(self, h, req, host):
287 """start the transaction with a re-used connection
254 """start the transaction with a re-used connection
288 return a response object (r) upon success or None on failure.
255 return a response object (r) upon success or None on failure.
289 This DOES not close or remove bad connections in cases where
256 This DOES not close or remove bad connections in cases where
290 it returns. However, if an unexpected exception occurs, it
257 it returns. However, if an unexpected exception occurs, it
291 will close and remove the connection before re-raising.
258 will close and remove the connection before re-raising.
292 """
259 """
293 try:
260 try:
294 self._start_transaction(h, req)
261 self._start_transaction(h, req)
295 r = h.getresponse()
262 r = h.getresponse()
296 # note: just because we got something back doesn't mean it
263 # note: just because we got something back doesn't mean it
297 # worked. We'll check the version below, too.
264 # worked. We'll check the version below, too.
298 except (socket.error, httplib.HTTPException):
265 except (socket.error, httplib.HTTPException):
299 r = None
266 r = None
300 except: # re-raises
267 except: # re-raises
301 # adding this block just in case we've missed
268 # adding this block just in case we've missed
302 # something we will still raise the exception, but
269 # something we will still raise the exception, but
303 # lets try and close the connection and remove it
270 # lets try and close the connection and remove it
304 # first. We previously got into a nasty loop
271 # first. We previously got into a nasty loop
305 # where an exception was uncaught, and so the
272 # where an exception was uncaught, and so the
306 # connection stayed open. On the next try, the
273 # connection stayed open. On the next try, the
307 # same exception was raised, etc. The trade-off is
274 # same exception was raised, etc. The trade-off is
308 # that it's now possible this call will raise
275 # that it's now possible this call will raise
309 # a DIFFERENT exception
276 # a DIFFERENT exception
310 if DEBUG:
277 if DEBUG:
311 DEBUG.error("unexpected exception - closing "
278 DEBUG.error("unexpected exception - closing "
312 "connection to %s (%d)", host, id(h))
279 "connection to %s (%d)", host, id(h))
313 self._cm.remove(h)
280 self._cm.remove(h)
314 h.close()
281 h.close()
315 raise
282 raise
316
283
317 if r is None or r.version == 9:
284 if r is None or r.version == 9:
318 # httplib falls back to assuming HTTP 0.9 if it gets a
285 # httplib falls back to assuming HTTP 0.9 if it gets a
319 # bad header back. This is most likely to happen if
286 # bad header back. This is most likely to happen if
320 # the socket has been closed by the server since we
287 # the socket has been closed by the server since we
321 # last used the connection.
288 # last used the connection.
322 if DEBUG:
289 if DEBUG:
323 DEBUG.info("failed to re-use connection to %s (%d)",
290 DEBUG.info("failed to re-use connection to %s (%d)",
324 host, id(h))
291 host, id(h))
325 r = None
292 r = None
326 else:
293 else:
327 if DEBUG:
294 if DEBUG:
328 DEBUG.info("re-using connection to %s (%d)", host, id(h))
295 DEBUG.info("re-using connection to %s (%d)", host, id(h))
329
296
330 return r
297 return r
331
298
332 def _start_transaction(self, h, req):
299 def _start_transaction(self, h, req):
333 # What follows mostly reimplements HTTPConnection.request()
300 # What follows mostly reimplements HTTPConnection.request()
334 # except it adds self.parent.addheaders in the mix.
301 # except it adds self.parent.addheaders in the mix.
335 headers = dict(self.parent.addheaders)
302 headers = dict(self.parent.addheaders)
336 headers.update(req.headers)
303 headers.update(req.headers)
337 headers.update(req.unredirected_hdrs)
304 headers.update(req.unredirected_hdrs)
338 headers = dict((n.lower(), v) for n, v in headers.items())
305 headers = dict((n.lower(), v) for n, v in headers.items())
339 skipheaders = {}
306 skipheaders = {}
340 for n in ('host', 'accept-encoding'):
307 for n in ('host', 'accept-encoding'):
341 if n in headers:
308 if n in headers:
342 skipheaders['skip_' + n.replace('-', '_')] = 1
309 skipheaders['skip_' + n.replace('-', '_')] = 1
343 try:
310 try:
344 if req.has_data():
311 if req.has_data():
345 data = req.get_data()
312 data = req.get_data()
346 h.putrequest('POST', req.get_selector(), **skipheaders)
313 h.putrequest('POST', req.get_selector(), **skipheaders)
347 if 'content-type' not in headers:
314 if 'content-type' not in headers:
348 h.putheader('Content-type',
315 h.putheader('Content-type',
349 'application/x-www-form-urlencoded')
316 'application/x-www-form-urlencoded')
350 if 'content-length' not in headers:
317 if 'content-length' not in headers:
351 h.putheader('Content-length', '%d' % len(data))
318 h.putheader('Content-length', '%d' % len(data))
352 else:
319 else:
353 h.putrequest('GET', req.get_selector(), **skipheaders)
320 h.putrequest('GET', req.get_selector(), **skipheaders)
354 except socket.error as err:
321 except socket.error as err:
355 raise urlerr.urlerror(err)
322 raise urlerr.urlerror(err)
356 for k, v in headers.items():
323 for k, v in headers.items():
357 h.putheader(k, v)
324 h.putheader(k, v)
358 h.endheaders()
325 h.endheaders()
359 if req.has_data():
326 if req.has_data():
360 h.send(data)
327 h.send(data)
361
328
362 class HTTPHandler(KeepAliveHandler, urlreq.httphandler):
329 class HTTPHandler(KeepAliveHandler, urlreq.httphandler):
363 pass
330 pass
364
331
365 class HTTPResponse(httplib.HTTPResponse):
332 class HTTPResponse(httplib.HTTPResponse):
366 # we need to subclass HTTPResponse in order to
333 # we need to subclass HTTPResponse in order to
367 # 1) add readline() and readlines() methods
334 # 1) add readline() and readlines() methods
368 # 2) add close_connection() methods
335 # 2) add close_connection() methods
369 # 3) add info() and geturl() methods
336 # 3) add info() and geturl() methods
370
337
371 # in order to add readline(), read must be modified to deal with a
338 # in order to add readline(), read must be modified to deal with a
372 # buffer. example: readline must read a buffer and then spit back
339 # buffer. example: readline must read a buffer and then spit back
373 # one line at a time. The only real alternative is to read one
340 # one line at a time. The only real alternative is to read one
374 # BYTE at a time (ick). Once something has been read, it can't be
341 # BYTE at a time (ick). Once something has been read, it can't be
375 # put back (ok, maybe it can, but that's even uglier than this),
342 # put back (ok, maybe it can, but that's even uglier than this),
376 # so if you THEN do a normal read, you must first take stuff from
343 # so if you THEN do a normal read, you must first take stuff from
377 # the buffer.
344 # the buffer.
378
345
379 # the read method wraps the original to accommodate buffering,
346 # the read method wraps the original to accommodate buffering,
380 # although read() never adds to the buffer.
347 # although read() never adds to the buffer.
381 # Both readline and readlines have been stolen with almost no
348 # Both readline and readlines have been stolen with almost no
382 # modification from socket.py
349 # modification from socket.py
383
350
384
351
385 def __init__(self, sock, debuglevel=0, strict=0, method=None):
352 def __init__(self, sock, debuglevel=0, strict=0, method=None):
386 httplib.HTTPResponse.__init__(self, sock, debuglevel, method)
353 httplib.HTTPResponse.__init__(self, sock, debuglevel, method)
387 self.fileno = sock.fileno
354 self.fileno = sock.fileno
388 self.code = None
355 self.code = None
389 self._rbuf = ''
356 self._rbuf = ''
390 self._rbufsize = 8096
357 self._rbufsize = 8096
391 self._handler = None # inserted by the handler later
358 self._handler = None # inserted by the handler later
392 self._host = None # (same)
359 self._host = None # (same)
393 self._url = None # (same)
360 self._url = None # (same)
394 self._connection = None # (same)
361 self._connection = None # (same)
395
362
396 _raw_read = httplib.HTTPResponse.read
363 _raw_read = httplib.HTTPResponse.read
397
364
398 def close(self):
365 def close(self):
399 if self.fp:
366 if self.fp:
400 self.fp.close()
367 self.fp.close()
401 self.fp = None
368 self.fp = None
402 if self._handler:
369 if self._handler:
403 self._handler._request_closed(self, self._host,
370 self._handler._request_closed(self, self._host,
404 self._connection)
371 self._connection)
405
372
406 def close_connection(self):
373 def close_connection(self):
407 self._handler._remove_connection(self._host, self._connection, close=1)
374 self._handler._remove_connection(self._host, self._connection, close=1)
408 self.close()
375 self.close()
409
376
410 def info(self):
377 def info(self):
411 return self.headers
378 return self.headers
412
379
413 def geturl(self):
380 def geturl(self):
414 return self._url
381 return self._url
415
382
416 def read(self, amt=None):
383 def read(self, amt=None):
417 # the _rbuf test is only in this first if for speed. It's not
384 # the _rbuf test is only in this first if for speed. It's not
418 # logically necessary
385 # logically necessary
419 if self._rbuf and not amt is None:
386 if self._rbuf and not amt is None:
420 L = len(self._rbuf)
387 L = len(self._rbuf)
421 if amt > L:
388 if amt > L:
422 amt -= L
389 amt -= L
423 else:
390 else:
424 s = self._rbuf[:amt]
391 s = self._rbuf[:amt]
425 self._rbuf = self._rbuf[amt:]
392 self._rbuf = self._rbuf[amt:]
426 return s
393 return s
427
394
428 s = self._rbuf + self._raw_read(amt)
395 s = self._rbuf + self._raw_read(amt)
429 self._rbuf = ''
396 self._rbuf = ''
430 return s
397 return s
431
398
432 # stolen from Python SVN #68532 to fix issue1088
399 # stolen from Python SVN #68532 to fix issue1088
433 def _read_chunked(self, amt):
400 def _read_chunked(self, amt):
434 chunk_left = self.chunk_left
401 chunk_left = self.chunk_left
435 value = ''
402 value = ''
436
403
437 # XXX This accumulates chunks by repeated string concatenation,
404 # XXX This accumulates chunks by repeated string concatenation,
438 # which is not efficient as the number or size of chunks gets big.
405 # which is not efficient as the number or size of chunks gets big.
439 while True:
406 while True:
440 if chunk_left is None:
407 if chunk_left is None:
441 line = self.fp.readline()
408 line = self.fp.readline()
442 i = line.find(';')
409 i = line.find(';')
443 if i >= 0:
410 if i >= 0:
444 line = line[:i] # strip chunk-extensions
411 line = line[:i] # strip chunk-extensions
445 try:
412 try:
446 chunk_left = int(line, 16)
413 chunk_left = int(line, 16)
447 except ValueError:
414 except ValueError:
448 # close the connection as protocol synchronization is
415 # close the connection as protocol synchronization is
449 # probably lost
416 # probably lost
450 self.close()
417 self.close()
451 raise httplib.IncompleteRead(value)
418 raise httplib.IncompleteRead(value)
452 if chunk_left == 0:
419 if chunk_left == 0:
453 break
420 break
454 if amt is None:
421 if amt is None:
455 value += self._safe_read(chunk_left)
422 value += self._safe_read(chunk_left)
456 elif amt < chunk_left:
423 elif amt < chunk_left:
457 value += self._safe_read(amt)
424 value += self._safe_read(amt)
458 self.chunk_left = chunk_left - amt
425 self.chunk_left = chunk_left - amt
459 return value
426 return value
460 elif amt == chunk_left:
427 elif amt == chunk_left:
461 value += self._safe_read(amt)
428 value += self._safe_read(amt)
462 self._safe_read(2) # toss the CRLF at the end of the chunk
429 self._safe_read(2) # toss the CRLF at the end of the chunk
463 self.chunk_left = None
430 self.chunk_left = None
464 return value
431 return value
465 else:
432 else:
466 value += self._safe_read(chunk_left)
433 value += self._safe_read(chunk_left)
467 amt -= chunk_left
434 amt -= chunk_left
468
435
469 # we read the whole chunk, get another
436 # we read the whole chunk, get another
470 self._safe_read(2) # toss the CRLF at the end of the chunk
437 self._safe_read(2) # toss the CRLF at the end of the chunk
471 chunk_left = None
438 chunk_left = None
472
439
473 # read and discard trailer up to the CRLF terminator
440 # read and discard trailer up to the CRLF terminator
474 ### note: we shouldn't have any trailers!
441 ### note: we shouldn't have any trailers!
475 while True:
442 while True:
476 line = self.fp.readline()
443 line = self.fp.readline()
477 if not line:
444 if not line:
478 # a vanishingly small number of sites EOF without
445 # a vanishingly small number of sites EOF without
479 # sending the trailer
446 # sending the trailer
480 break
447 break
481 if line == '\r\n':
448 if line == '\r\n':
482 break
449 break
483
450
484 # we read everything; close the "file"
451 # we read everything; close the "file"
485 self.close()
452 self.close()
486
453
487 return value
454 return value
488
455
489 def readline(self, limit=-1):
456 def readline(self, limit=-1):
490 i = self._rbuf.find('\n')
457 i = self._rbuf.find('\n')
491 while i < 0 and not (0 < limit <= len(self._rbuf)):
458 while i < 0 and not (0 < limit <= len(self._rbuf)):
492 new = self._raw_read(self._rbufsize)
459 new = self._raw_read(self._rbufsize)
493 if not new:
460 if not new:
494 break
461 break
495 i = new.find('\n')
462 i = new.find('\n')
496 if i >= 0:
463 if i >= 0:
497 i = i + len(self._rbuf)
464 i = i + len(self._rbuf)
498 self._rbuf = self._rbuf + new
465 self._rbuf = self._rbuf + new
499 if i < 0:
466 if i < 0:
500 i = len(self._rbuf)
467 i = len(self._rbuf)
501 else:
468 else:
502 i = i + 1
469 i = i + 1
503 if 0 <= limit < len(self._rbuf):
470 if 0 <= limit < len(self._rbuf):
504 i = limit
471 i = limit
505 data, self._rbuf = self._rbuf[:i], self._rbuf[i:]
472 data, self._rbuf = self._rbuf[:i], self._rbuf[i:]
506 return data
473 return data
507
474
508 def readlines(self, sizehint=0):
475 def readlines(self, sizehint=0):
509 total = 0
476 total = 0
510 list = []
477 list = []
511 while True:
478 while True:
512 line = self.readline()
479 line = self.readline()
513 if not line:
480 if not line:
514 break
481 break
515 list.append(line)
482 list.append(line)
516 total += len(line)
483 total += len(line)
517 if sizehint and total >= sizehint:
484 if sizehint and total >= sizehint:
518 break
485 break
519 return list
486 return list
520
487
521 def safesend(self, str):
488 def safesend(self, str):
522 """Send `str' to the server.
489 """Send `str' to the server.
523
490
524 Shamelessly ripped off from httplib to patch a bad behavior.
491 Shamelessly ripped off from httplib to patch a bad behavior.
525 """
492 """
526 # _broken_pipe_resp is an attribute we set in this function
493 # _broken_pipe_resp is an attribute we set in this function
527 # if the socket is closed while we're sending data but
494 # if the socket is closed while we're sending data but
528 # the server sent us a response before hanging up.
495 # the server sent us a response before hanging up.
529 # In that case, we want to pretend to send the rest of the
496 # In that case, we want to pretend to send the rest of the
530 # outgoing data, and then let the user use getresponse()
497 # outgoing data, and then let the user use getresponse()
531 # (which we wrap) to get this last response before
498 # (which we wrap) to get this last response before
532 # opening a new socket.
499 # opening a new socket.
533 if getattr(self, '_broken_pipe_resp', None) is not None:
500 if getattr(self, '_broken_pipe_resp', None) is not None:
534 return
501 return
535
502
536 if self.sock is None:
503 if self.sock is None:
537 if self.auto_open:
504 if self.auto_open:
538 self.connect()
505 self.connect()
539 else:
506 else:
540 raise httplib.NotConnected
507 raise httplib.NotConnected
541
508
542 # send the data to the server. if we get a broken pipe, then close
509 # send the data to the server. if we get a broken pipe, then close
543 # the socket. we want to reconnect when somebody tries to send again.
510 # the socket. we want to reconnect when somebody tries to send again.
544 #
511 #
545 # NOTE: we DO propagate the error, though, because we cannot simply
512 # NOTE: we DO propagate the error, though, because we cannot simply
546 # ignore the error... the caller will know if they can retry.
513 # ignore the error... the caller will know if they can retry.
547 if self.debuglevel > 0:
514 if self.debuglevel > 0:
548 print("send:", repr(str))
515 print("send:", repr(str))
549 try:
516 try:
550 blocksize = 8192
517 blocksize = 8192
551 read = getattr(str, 'read', None)
518 read = getattr(str, 'read', None)
552 if read is not None:
519 if read is not None:
553 if self.debuglevel > 0:
520 if self.debuglevel > 0:
554 print("sending a read()able")
521 print("sending a read()able")
555 data = read(blocksize)
522 data = read(blocksize)
556 while data:
523 while data:
557 self.sock.sendall(data)
524 self.sock.sendall(data)
558 data = read(blocksize)
525 data = read(blocksize)
559 else:
526 else:
560 self.sock.sendall(str)
527 self.sock.sendall(str)
561 except socket.error as v:
528 except socket.error as v:
562 reraise = True
529 reraise = True
563 if v[0] == errno.EPIPE: # Broken pipe
530 if v[0] == errno.EPIPE: # Broken pipe
564 if self._HTTPConnection__state == httplib._CS_REQ_SENT:
531 if self._HTTPConnection__state == httplib._CS_REQ_SENT:
565 self._broken_pipe_resp = None
532 self._broken_pipe_resp = None
566 self._broken_pipe_resp = self.getresponse()
533 self._broken_pipe_resp = self.getresponse()
567 reraise = False
534 reraise = False
568 self.close()
535 self.close()
569 if reraise:
536 if reraise:
570 raise
537 raise
571
538
572 def wrapgetresponse(cls):
539 def wrapgetresponse(cls):
573 """Wraps getresponse in cls with a broken-pipe sane version.
540 """Wraps getresponse in cls with a broken-pipe sane version.
574 """
541 """
575 def safegetresponse(self):
542 def safegetresponse(self):
576 # In safesend() we might set the _broken_pipe_resp
543 # In safesend() we might set the _broken_pipe_resp
577 # attribute, in which case the socket has already
544 # attribute, in which case the socket has already
578 # been closed and we just need to give them the response
545 # been closed and we just need to give them the response
579 # back. Otherwise, we use the normal response path.
546 # back. Otherwise, we use the normal response path.
580 r = getattr(self, '_broken_pipe_resp', None)
547 r = getattr(self, '_broken_pipe_resp', None)
581 if r is not None:
548 if r is not None:
582 return r
549 return r
583 return cls.getresponse(self)
550 return cls.getresponse(self)
584 safegetresponse.__doc__ = cls.getresponse.__doc__
551 safegetresponse.__doc__ = cls.getresponse.__doc__
585 return safegetresponse
552 return safegetresponse
586
553
587 class HTTPConnection(httplib.HTTPConnection):
554 class HTTPConnection(httplib.HTTPConnection):
588 # use the modified response class
555 # use the modified response class
589 response_class = HTTPResponse
556 response_class = HTTPResponse
590 send = safesend
557 send = safesend
591 getresponse = wrapgetresponse(httplib.HTTPConnection)
558 getresponse = wrapgetresponse(httplib.HTTPConnection)
592
559
593
560
594 #########################################################################
561 #########################################################################
595 ##### TEST FUNCTIONS
562 ##### TEST FUNCTIONS
596 #########################################################################
563 #########################################################################
597
564
598 def error_handler(url):
599 global HANDLE_ERRORS
600 orig = HANDLE_ERRORS
601 keepalive_handler = HTTPHandler()
602 opener = urlreq.buildopener(keepalive_handler)
603 urlreq.installopener(opener)
604 pos = {0: 'off', 1: 'on'}
605 for i in (0, 1):
606 print(" fancy error handling %s (HANDLE_ERRORS = %i)" % (pos[i], i))
607 HANDLE_ERRORS = i
608 try:
609 fo = urlreq.urlopen(url)
610 fo.read()
611 fo.close()
612 try:
613 status, reason = fo.status, fo.reason
614 except AttributeError:
615 status, reason = None, None
616 except IOError as e:
617 print(" EXCEPTION: %s" % e)
618 raise
619 else:
620 print(" status = %s, reason = %s" % (status, reason))
621 HANDLE_ERRORS = orig
622 hosts = keepalive_handler.open_connections()
623 print("open connections:", hosts)
624 keepalive_handler.close_all()
625
565
626 def continuity(url):
566 def continuity(url):
627 md5 = hashlib.md5
567 md5 = hashlib.md5
628 format = '%25s: %s'
568 format = '%25s: %s'
629
569
630 # first fetch the file with the normal http handler
570 # first fetch the file with the normal http handler
631 opener = urlreq.buildopener()
571 opener = urlreq.buildopener()
632 urlreq.installopener(opener)
572 urlreq.installopener(opener)
633 fo = urlreq.urlopen(url)
573 fo = urlreq.urlopen(url)
634 foo = fo.read()
574 foo = fo.read()
635 fo.close()
575 fo.close()
636 m = md5(foo)
576 m = md5(foo)
637 print(format % ('normal urllib', m.hexdigest()))
577 print(format % ('normal urllib', m.hexdigest()))
638
578
639 # now install the keepalive handler and try again
579 # now install the keepalive handler and try again
640 opener = urlreq.buildopener(HTTPHandler())
580 opener = urlreq.buildopener(HTTPHandler())
641 urlreq.installopener(opener)
581 urlreq.installopener(opener)
642
582
643 fo = urlreq.urlopen(url)
583 fo = urlreq.urlopen(url)
644 foo = fo.read()
584 foo = fo.read()
645 fo.close()
585 fo.close()
646 m = md5(foo)
586 m = md5(foo)
647 print(format % ('keepalive read', m.hexdigest()))
587 print(format % ('keepalive read', m.hexdigest()))
648
588
649 fo = urlreq.urlopen(url)
589 fo = urlreq.urlopen(url)
650 foo = ''
590 foo = ''
651 while True:
591 while True:
652 f = fo.readline()
592 f = fo.readline()
653 if f:
593 if f:
654 foo = foo + f
594 foo = foo + f
655 else: break
595 else: break
656 fo.close()
596 fo.close()
657 m = md5(foo)
597 m = md5(foo)
658 print(format % ('keepalive readline', m.hexdigest()))
598 print(format % ('keepalive readline', m.hexdigest()))
659
599
660 def comp(N, url):
600 def comp(N, url):
661 print(' making %i connections to:\n %s' % (N, url))
601 print(' making %i connections to:\n %s' % (N, url))
662
602
663 util.stdout.write(' first using the normal urllib handlers')
603 util.stdout.write(' first using the normal urllib handlers')
664 # first use normal opener
604 # first use normal opener
665 opener = urlreq.buildopener()
605 opener = urlreq.buildopener()
666 urlreq.installopener(opener)
606 urlreq.installopener(opener)
667 t1 = fetch(N, url)
607 t1 = fetch(N, url)
668 print(' TIME: %.3f s' % t1)
608 print(' TIME: %.3f s' % t1)
669
609
670 util.stdout.write(' now using the keepalive handler ')
610 util.stdout.write(' now using the keepalive handler ')
671 # now install the keepalive handler and try again
611 # now install the keepalive handler and try again
672 opener = urlreq.buildopener(HTTPHandler())
612 opener = urlreq.buildopener(HTTPHandler())
673 urlreq.installopener(opener)
613 urlreq.installopener(opener)
674 t2 = fetch(N, url)
614 t2 = fetch(N, url)
675 print(' TIME: %.3f s' % t2)
615 print(' TIME: %.3f s' % t2)
676 print(' improvement factor: %.2f' % (t1 / t2))
616 print(' improvement factor: %.2f' % (t1 / t2))
677
617
678 def fetch(N, url, delay=0):
618 def fetch(N, url, delay=0):
679 import time
619 import time
680 lens = []
620 lens = []
681 starttime = time.time()
621 starttime = time.time()
682 for i in range(N):
622 for i in range(N):
683 if delay and i > 0:
623 if delay and i > 0:
684 time.sleep(delay)
624 time.sleep(delay)
685 fo = urlreq.urlopen(url)
625 fo = urlreq.urlopen(url)
686 foo = fo.read()
626 foo = fo.read()
687 fo.close()
627 fo.close()
688 lens.append(len(foo))
628 lens.append(len(foo))
689 diff = time.time() - starttime
629 diff = time.time() - starttime
690
630
691 j = 0
631 j = 0
692 for i in lens[1:]:
632 for i in lens[1:]:
693 j = j + 1
633 j = j + 1
694 if not i == lens[0]:
634 if not i == lens[0]:
695 print("WARNING: inconsistent length on read %i: %i" % (j, i))
635 print("WARNING: inconsistent length on read %i: %i" % (j, i))
696
636
697 return diff
637 return diff
698
638
699 def test_timeout(url):
639 def test_timeout(url):
700 global DEBUG
640 global DEBUG
701 dbbackup = DEBUG
641 dbbackup = DEBUG
702 class FakeLogger(object):
642 class FakeLogger(object):
703 def debug(self, msg, *args):
643 def debug(self, msg, *args):
704 print(msg % args)
644 print(msg % args)
705 info = warning = error = debug
645 info = warning = error = debug
706 DEBUG = FakeLogger()
646 DEBUG = FakeLogger()
707 print(" fetching the file to establish a connection")
647 print(" fetching the file to establish a connection")
708 fo = urlreq.urlopen(url)
648 fo = urlreq.urlopen(url)
709 data1 = fo.read()
649 data1 = fo.read()
710 fo.close()
650 fo.close()
711
651
712 i = 20
652 i = 20
713 print(" waiting %i seconds for the server to close the connection" % i)
653 print(" waiting %i seconds for the server to close the connection" % i)
714 while i > 0:
654 while i > 0:
715 util.stdout.write('\r %2i' % i)
655 util.stdout.write('\r %2i' % i)
716 util.stdout.flush()
656 util.stdout.flush()
717 time.sleep(1)
657 time.sleep(1)
718 i -= 1
658 i -= 1
719 util.stderr.write('\r')
659 util.stderr.write('\r')
720
660
721 print(" fetching the file a second time")
661 print(" fetching the file a second time")
722 fo = urlreq.urlopen(url)
662 fo = urlreq.urlopen(url)
723 data2 = fo.read()
663 data2 = fo.read()
724 fo.close()
664 fo.close()
725
665
726 if data1 == data2:
666 if data1 == data2:
727 print(' data are identical')
667 print(' data are identical')
728 else:
668 else:
729 print(' ERROR: DATA DIFFER')
669 print(' ERROR: DATA DIFFER')
730
670
731 DEBUG = dbbackup
671 DEBUG = dbbackup
732
672
733
673
734 def test(url, N=10):
674 def test(url, N=10):
735 print("checking error handler (do this on a non-200)")
736 try: error_handler(url)
737 except IOError:
738 print("exiting - exception will prevent further tests")
739 sys.exit()
740 print('')
741 print("performing continuity test (making sure stuff isn't corrupted)")
675 print("performing continuity test (making sure stuff isn't corrupted)")
742 continuity(url)
676 continuity(url)
743 print('')
677 print('')
744 print("performing speed comparison")
678 print("performing speed comparison")
745 comp(N, url)
679 comp(N, url)
746 print('')
680 print('')
747 print("performing dropped-connection check")
681 print("performing dropped-connection check")
748 test_timeout(url)
682 test_timeout(url)
749
683
750 if __name__ == '__main__':
684 if __name__ == '__main__':
751 import time
685 import time
752 try:
686 try:
753 N = int(sys.argv[1])
687 N = int(sys.argv[1])
754 url = sys.argv[2]
688 url = sys.argv[2]
755 except (IndexError, ValueError):
689 except (IndexError, ValueError):
756 print("%s <integer> <url>" % sys.argv[0])
690 print("%s <integer> <url>" % sys.argv[0])
757 else:
691 else:
758 test(url, N)
692 test(url, N)
General Comments 0
You need to be logged in to leave comments. Login now