##// END OF EJS Templates
keepalive: don't concatenate strings when reading chunked transfer...
Gregory Szorc -
r30686:8352c42a default
parent child Browse files
Show More
@@ -1,692 +1,690 b''
1 # This library is free software; you can redistribute it and/or
1 # This library is free software; you can redistribute it and/or
2 # modify it under the terms of the GNU Lesser General Public
2 # modify it under the terms of the GNU Lesser General Public
3 # License as published by the Free Software Foundation; either
3 # License as published by the Free Software Foundation; either
4 # version 2.1 of the License, or (at your option) any later version.
4 # version 2.1 of the License, or (at your option) any later version.
5 #
5 #
6 # This library is distributed in the hope that it will be useful,
6 # This library is distributed in the hope that it will be useful,
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9 # Lesser General Public License for more details.
9 # Lesser General Public License for more details.
10 #
10 #
11 # You should have received a copy of the GNU Lesser General Public
11 # You should have received a copy of the GNU Lesser General Public
12 # License along with this library; if not, see
12 # License along with this library; if not, see
13 # <http://www.gnu.org/licenses/>.
13 # <http://www.gnu.org/licenses/>.
14
14
15 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
15 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
16 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
16 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
17
17
18 # Modified by Benoit Boissinot:
18 # Modified by Benoit Boissinot:
19 # - fix for digest auth (inspired from urllib2.py @ Python v2.4)
19 # - fix for digest auth (inspired from urllib2.py @ Python v2.4)
20 # Modified by Dirkjan Ochtman:
20 # Modified by Dirkjan Ochtman:
21 # - import md5 function from a local util module
21 # - import md5 function from a local util module
22 # Modified by Augie Fackler:
22 # Modified by Augie Fackler:
23 # - add safesend method and use it to prevent broken pipe errors
23 # - add safesend method and use it to prevent broken pipe errors
24 # on large POST requests
24 # on large POST requests
25
25
26 """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
26 """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
27
27
28 >>> import urllib2
28 >>> import urllib2
29 >>> from keepalive import HTTPHandler
29 >>> from keepalive import HTTPHandler
30 >>> keepalive_handler = HTTPHandler()
30 >>> keepalive_handler = HTTPHandler()
31 >>> opener = urlreq.buildopener(keepalive_handler)
31 >>> opener = urlreq.buildopener(keepalive_handler)
32 >>> urlreq.installopener(opener)
32 >>> urlreq.installopener(opener)
33 >>>
33 >>>
34 >>> fo = urlreq.urlopen('http://www.python.org')
34 >>> fo = urlreq.urlopen('http://www.python.org')
35
35
36 If a connection to a given host is requested, and all of the existing
36 If a connection to a given host is requested, and all of the existing
37 connections are still in use, another connection will be opened. If
37 connections are still in use, another connection will be opened. If
38 the handler tries to use an existing connection but it fails in some
38 the handler tries to use an existing connection but it fails in some
39 way, it will be closed and removed from the pool.
39 way, it will be closed and removed from the pool.
40
40
41 To remove the handler, simply re-run build_opener with no arguments, and
41 To remove the handler, simply re-run build_opener with no arguments, and
42 install that opener.
42 install that opener.
43
43
44 You can explicitly close connections by using the close_connection()
44 You can explicitly close connections by using the close_connection()
45 method of the returned file-like object (described below) or you can
45 method of the returned file-like object (described below) or you can
46 use the handler methods:
46 use the handler methods:
47
47
48 close_connection(host)
48 close_connection(host)
49 close_all()
49 close_all()
50 open_connections()
50 open_connections()
51
51
52 NOTE: using the close_connection and close_all methods of the handler
52 NOTE: using the close_connection and close_all methods of the handler
53 should be done with care when using multiple threads.
53 should be done with care when using multiple threads.
54 * there is nothing that prevents another thread from creating new
54 * there is nothing that prevents another thread from creating new
55 connections immediately after connections are closed
55 connections immediately after connections are closed
56 * no checks are done to prevent in-use connections from being closed
56 * no checks are done to prevent in-use connections from being closed
57
57
58 >>> keepalive_handler.close_all()
58 >>> keepalive_handler.close_all()
59
59
60 EXTRA ATTRIBUTES AND METHODS
60 EXTRA ATTRIBUTES AND METHODS
61
61
62 Upon a status of 200, the object returned has a few additional
62 Upon a status of 200, the object returned has a few additional
63 attributes and methods, which should not be used if you want to
63 attributes and methods, which should not be used if you want to
64 remain consistent with the normal urllib2-returned objects:
64 remain consistent with the normal urllib2-returned objects:
65
65
66 close_connection() - close the connection to the host
66 close_connection() - close the connection to the host
67 readlines() - you know, readlines()
67 readlines() - you know, readlines()
68 status - the return status (i.e. 404)
68 status - the return status (i.e. 404)
69 reason - english translation of status (i.e. 'File not found')
69 reason - english translation of status (i.e. 'File not found')
70
70
71 If you want the best of both worlds, use this inside an
71 If you want the best of both worlds, use this inside an
72 AttributeError-catching try:
72 AttributeError-catching try:
73
73
74 >>> try: status = fo.status
74 >>> try: status = fo.status
75 >>> except AttributeError: status = None
75 >>> except AttributeError: status = None
76
76
77 Unfortunately, these are ONLY there if status == 200, so it's not
77 Unfortunately, these are ONLY there if status == 200, so it's not
78 easy to distinguish between non-200 responses. The reason is that
78 easy to distinguish between non-200 responses. The reason is that
79 urllib2 tries to do clever things with error codes 301, 302, 401,
79 urllib2 tries to do clever things with error codes 301, 302, 401,
80 and 407, and it wraps the object upon return.
80 and 407, and it wraps the object upon return.
81 """
81 """
82
82
83 # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $
83 # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $
84
84
85 from __future__ import absolute_import, print_function
85 from __future__ import absolute_import, print_function
86
86
87 import errno
87 import errno
88 import hashlib
88 import hashlib
89 import socket
89 import socket
90 import sys
90 import sys
91 import threading
91 import threading
92
92
93 from . import (
93 from . import (
94 util,
94 util,
95 )
95 )
96
96
97 httplib = util.httplib
97 httplib = util.httplib
98 urlerr = util.urlerr
98 urlerr = util.urlerr
99 urlreq = util.urlreq
99 urlreq = util.urlreq
100
100
101 DEBUG = None
101 DEBUG = None
102
102
103 class ConnectionManager(object):
103 class ConnectionManager(object):
104 """
104 """
105 The connection manager must be able to:
105 The connection manager must be able to:
106 * keep track of all existing
106 * keep track of all existing
107 """
107 """
108 def __init__(self):
108 def __init__(self):
109 self._lock = threading.Lock()
109 self._lock = threading.Lock()
110 self._hostmap = {} # map hosts to a list of connections
110 self._hostmap = {} # map hosts to a list of connections
111 self._connmap = {} # map connections to host
111 self._connmap = {} # map connections to host
112 self._readymap = {} # map connection to ready state
112 self._readymap = {} # map connection to ready state
113
113
114 def add(self, host, connection, ready):
114 def add(self, host, connection, ready):
115 self._lock.acquire()
115 self._lock.acquire()
116 try:
116 try:
117 if host not in self._hostmap:
117 if host not in self._hostmap:
118 self._hostmap[host] = []
118 self._hostmap[host] = []
119 self._hostmap[host].append(connection)
119 self._hostmap[host].append(connection)
120 self._connmap[connection] = host
120 self._connmap[connection] = host
121 self._readymap[connection] = ready
121 self._readymap[connection] = ready
122 finally:
122 finally:
123 self._lock.release()
123 self._lock.release()
124
124
125 def remove(self, connection):
125 def remove(self, connection):
126 self._lock.acquire()
126 self._lock.acquire()
127 try:
127 try:
128 try:
128 try:
129 host = self._connmap[connection]
129 host = self._connmap[connection]
130 except KeyError:
130 except KeyError:
131 pass
131 pass
132 else:
132 else:
133 del self._connmap[connection]
133 del self._connmap[connection]
134 del self._readymap[connection]
134 del self._readymap[connection]
135 self._hostmap[host].remove(connection)
135 self._hostmap[host].remove(connection)
136 if not self._hostmap[host]: del self._hostmap[host]
136 if not self._hostmap[host]: del self._hostmap[host]
137 finally:
137 finally:
138 self._lock.release()
138 self._lock.release()
139
139
140 def set_ready(self, connection, ready):
140 def set_ready(self, connection, ready):
141 try:
141 try:
142 self._readymap[connection] = ready
142 self._readymap[connection] = ready
143 except KeyError:
143 except KeyError:
144 pass
144 pass
145
145
146 def get_ready_conn(self, host):
146 def get_ready_conn(self, host):
147 conn = None
147 conn = None
148 self._lock.acquire()
148 self._lock.acquire()
149 try:
149 try:
150 if host in self._hostmap:
150 if host in self._hostmap:
151 for c in self._hostmap[host]:
151 for c in self._hostmap[host]:
152 if self._readymap[c]:
152 if self._readymap[c]:
153 self._readymap[c] = 0
153 self._readymap[c] = 0
154 conn = c
154 conn = c
155 break
155 break
156 finally:
156 finally:
157 self._lock.release()
157 self._lock.release()
158 return conn
158 return conn
159
159
160 def get_all(self, host=None):
160 def get_all(self, host=None):
161 if host:
161 if host:
162 return list(self._hostmap.get(host, []))
162 return list(self._hostmap.get(host, []))
163 else:
163 else:
164 return dict(self._hostmap)
164 return dict(self._hostmap)
165
165
166 class KeepAliveHandler(object):
166 class KeepAliveHandler(object):
167 def __init__(self):
167 def __init__(self):
168 self._cm = ConnectionManager()
168 self._cm = ConnectionManager()
169
169
170 #### Connection Management
170 #### Connection Management
171 def open_connections(self):
171 def open_connections(self):
172 """return a list of connected hosts and the number of connections
172 """return a list of connected hosts and the number of connections
173 to each. [('foo.com:80', 2), ('bar.org', 1)]"""
173 to each. [('foo.com:80', 2), ('bar.org', 1)]"""
174 return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
174 return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
175
175
176 def close_connection(self, host):
176 def close_connection(self, host):
177 """close connection(s) to <host>
177 """close connection(s) to <host>
178 host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
178 host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
179 no error occurs if there is no connection to that host."""
179 no error occurs if there is no connection to that host."""
180 for h in self._cm.get_all(host):
180 for h in self._cm.get_all(host):
181 self._cm.remove(h)
181 self._cm.remove(h)
182 h.close()
182 h.close()
183
183
184 def close_all(self):
184 def close_all(self):
185 """close all open connections"""
185 """close all open connections"""
186 for host, conns in self._cm.get_all().iteritems():
186 for host, conns in self._cm.get_all().iteritems():
187 for h in conns:
187 for h in conns:
188 self._cm.remove(h)
188 self._cm.remove(h)
189 h.close()
189 h.close()
190
190
191 def _request_closed(self, request, host, connection):
191 def _request_closed(self, request, host, connection):
192 """tells us that this request is now closed and that the
192 """tells us that this request is now closed and that the
193 connection is ready for another request"""
193 connection is ready for another request"""
194 self._cm.set_ready(connection, 1)
194 self._cm.set_ready(connection, 1)
195
195
196 def _remove_connection(self, host, connection, close=0):
196 def _remove_connection(self, host, connection, close=0):
197 if close:
197 if close:
198 connection.close()
198 connection.close()
199 self._cm.remove(connection)
199 self._cm.remove(connection)
200
200
201 #### Transaction Execution
201 #### Transaction Execution
202 def http_open(self, req):
202 def http_open(self, req):
203 return self.do_open(HTTPConnection, req)
203 return self.do_open(HTTPConnection, req)
204
204
205 def do_open(self, http_class, req):
205 def do_open(self, http_class, req):
206 host = req.get_host()
206 host = req.get_host()
207 if not host:
207 if not host:
208 raise urlerr.urlerror('no host given')
208 raise urlerr.urlerror('no host given')
209
209
210 try:
210 try:
211 h = self._cm.get_ready_conn(host)
211 h = self._cm.get_ready_conn(host)
212 while h:
212 while h:
213 r = self._reuse_connection(h, req, host)
213 r = self._reuse_connection(h, req, host)
214
214
215 # if this response is non-None, then it worked and we're
215 # if this response is non-None, then it worked and we're
216 # done. Break out, skipping the else block.
216 # done. Break out, skipping the else block.
217 if r:
217 if r:
218 break
218 break
219
219
220 # connection is bad - possibly closed by server
220 # connection is bad - possibly closed by server
221 # discard it and ask for the next free connection
221 # discard it and ask for the next free connection
222 h.close()
222 h.close()
223 self._cm.remove(h)
223 self._cm.remove(h)
224 h = self._cm.get_ready_conn(host)
224 h = self._cm.get_ready_conn(host)
225 else:
225 else:
226 # no (working) free connections were found. Create a new one.
226 # no (working) free connections were found. Create a new one.
227 h = http_class(host)
227 h = http_class(host)
228 if DEBUG:
228 if DEBUG:
229 DEBUG.info("creating new connection to %s (%d)",
229 DEBUG.info("creating new connection to %s (%d)",
230 host, id(h))
230 host, id(h))
231 self._cm.add(host, h, 0)
231 self._cm.add(host, h, 0)
232 self._start_transaction(h, req)
232 self._start_transaction(h, req)
233 r = h.getresponse()
233 r = h.getresponse()
234 except (socket.error, httplib.HTTPException) as err:
234 except (socket.error, httplib.HTTPException) as err:
235 raise urlerr.urlerror(err)
235 raise urlerr.urlerror(err)
236
236
237 # if not a persistent connection, don't try to reuse it
237 # if not a persistent connection, don't try to reuse it
238 if r.will_close:
238 if r.will_close:
239 self._cm.remove(h)
239 self._cm.remove(h)
240
240
241 if DEBUG:
241 if DEBUG:
242 DEBUG.info("STATUS: %s, %s", r.status, r.reason)
242 DEBUG.info("STATUS: %s, %s", r.status, r.reason)
243 r._handler = self
243 r._handler = self
244 r._host = host
244 r._host = host
245 r._url = req.get_full_url()
245 r._url = req.get_full_url()
246 r._connection = h
246 r._connection = h
247 r.code = r.status
247 r.code = r.status
248 r.headers = r.msg
248 r.headers = r.msg
249 r.msg = r.reason
249 r.msg = r.reason
250
250
251 return r
251 return r
252
252
253 def _reuse_connection(self, h, req, host):
253 def _reuse_connection(self, h, req, host):
254 """start the transaction with a re-used connection
254 """start the transaction with a re-used connection
255 return a response object (r) upon success or None on failure.
255 return a response object (r) upon success or None on failure.
256 This DOES not close or remove bad connections in cases where
256 This DOES not close or remove bad connections in cases where
257 it returns. However, if an unexpected exception occurs, it
257 it returns. However, if an unexpected exception occurs, it
258 will close and remove the connection before re-raising.
258 will close and remove the connection before re-raising.
259 """
259 """
260 try:
260 try:
261 self._start_transaction(h, req)
261 self._start_transaction(h, req)
262 r = h.getresponse()
262 r = h.getresponse()
263 # note: just because we got something back doesn't mean it
263 # note: just because we got something back doesn't mean it
264 # worked. We'll check the version below, too.
264 # worked. We'll check the version below, too.
265 except (socket.error, httplib.HTTPException):
265 except (socket.error, httplib.HTTPException):
266 r = None
266 r = None
267 except: # re-raises
267 except: # re-raises
268 # adding this block just in case we've missed
268 # adding this block just in case we've missed
269 # something we will still raise the exception, but
269 # something we will still raise the exception, but
270 # lets try and close the connection and remove it
270 # lets try and close the connection and remove it
271 # first. We previously got into a nasty loop
271 # first. We previously got into a nasty loop
272 # where an exception was uncaught, and so the
272 # where an exception was uncaught, and so the
273 # connection stayed open. On the next try, the
273 # connection stayed open. On the next try, the
274 # same exception was raised, etc. The trade-off is
274 # same exception was raised, etc. The trade-off is
275 # that it's now possible this call will raise
275 # that it's now possible this call will raise
276 # a DIFFERENT exception
276 # a DIFFERENT exception
277 if DEBUG:
277 if DEBUG:
278 DEBUG.error("unexpected exception - closing "
278 DEBUG.error("unexpected exception - closing "
279 "connection to %s (%d)", host, id(h))
279 "connection to %s (%d)", host, id(h))
280 self._cm.remove(h)
280 self._cm.remove(h)
281 h.close()
281 h.close()
282 raise
282 raise
283
283
284 if r is None or r.version == 9:
284 if r is None or r.version == 9:
285 # httplib falls back to assuming HTTP 0.9 if it gets a
285 # httplib falls back to assuming HTTP 0.9 if it gets a
286 # bad header back. This is most likely to happen if
286 # bad header back. This is most likely to happen if
287 # the socket has been closed by the server since we
287 # the socket has been closed by the server since we
288 # last used the connection.
288 # last used the connection.
289 if DEBUG:
289 if DEBUG:
290 DEBUG.info("failed to re-use connection to %s (%d)",
290 DEBUG.info("failed to re-use connection to %s (%d)",
291 host, id(h))
291 host, id(h))
292 r = None
292 r = None
293 else:
293 else:
294 if DEBUG:
294 if DEBUG:
295 DEBUG.info("re-using connection to %s (%d)", host, id(h))
295 DEBUG.info("re-using connection to %s (%d)", host, id(h))
296
296
297 return r
297 return r
298
298
299 def _start_transaction(self, h, req):
299 def _start_transaction(self, h, req):
300 # What follows mostly reimplements HTTPConnection.request()
300 # What follows mostly reimplements HTTPConnection.request()
301 # except it adds self.parent.addheaders in the mix.
301 # except it adds self.parent.addheaders in the mix.
302 headers = dict(self.parent.addheaders)
302 headers = dict(self.parent.addheaders)
303 headers.update(req.headers)
303 headers.update(req.headers)
304 headers.update(req.unredirected_hdrs)
304 headers.update(req.unredirected_hdrs)
305 headers = dict((n.lower(), v) for n, v in headers.items())
305 headers = dict((n.lower(), v) for n, v in headers.items())
306 skipheaders = {}
306 skipheaders = {}
307 for n in ('host', 'accept-encoding'):
307 for n in ('host', 'accept-encoding'):
308 if n in headers:
308 if n in headers:
309 skipheaders['skip_' + n.replace('-', '_')] = 1
309 skipheaders['skip_' + n.replace('-', '_')] = 1
310 try:
310 try:
311 if req.has_data():
311 if req.has_data():
312 data = req.get_data()
312 data = req.get_data()
313 h.putrequest('POST', req.get_selector(), **skipheaders)
313 h.putrequest('POST', req.get_selector(), **skipheaders)
314 if 'content-type' not in headers:
314 if 'content-type' not in headers:
315 h.putheader('Content-type',
315 h.putheader('Content-type',
316 'application/x-www-form-urlencoded')
316 'application/x-www-form-urlencoded')
317 if 'content-length' not in headers:
317 if 'content-length' not in headers:
318 h.putheader('Content-length', '%d' % len(data))
318 h.putheader('Content-length', '%d' % len(data))
319 else:
319 else:
320 h.putrequest('GET', req.get_selector(), **skipheaders)
320 h.putrequest('GET', req.get_selector(), **skipheaders)
321 except socket.error as err:
321 except socket.error as err:
322 raise urlerr.urlerror(err)
322 raise urlerr.urlerror(err)
323 for k, v in headers.items():
323 for k, v in headers.items():
324 h.putheader(k, v)
324 h.putheader(k, v)
325 h.endheaders()
325 h.endheaders()
326 if req.has_data():
326 if req.has_data():
327 h.send(data)
327 h.send(data)
328
328
329 class HTTPHandler(KeepAliveHandler, urlreq.httphandler):
329 class HTTPHandler(KeepAliveHandler, urlreq.httphandler):
330 pass
330 pass
331
331
332 class HTTPResponse(httplib.HTTPResponse):
332 class HTTPResponse(httplib.HTTPResponse):
333 # we need to subclass HTTPResponse in order to
333 # we need to subclass HTTPResponse in order to
334 # 1) add readline() and readlines() methods
334 # 1) add readline() and readlines() methods
335 # 2) add close_connection() methods
335 # 2) add close_connection() methods
336 # 3) add info() and geturl() methods
336 # 3) add info() and geturl() methods
337
337
338 # in order to add readline(), read must be modified to deal with a
338 # in order to add readline(), read must be modified to deal with a
339 # buffer. example: readline must read a buffer and then spit back
339 # buffer. example: readline must read a buffer and then spit back
340 # one line at a time. The only real alternative is to read one
340 # one line at a time. The only real alternative is to read one
341 # BYTE at a time (ick). Once something has been read, it can't be
341 # BYTE at a time (ick). Once something has been read, it can't be
342 # put back (ok, maybe it can, but that's even uglier than this),
342 # put back (ok, maybe it can, but that's even uglier than this),
343 # so if you THEN do a normal read, you must first take stuff from
343 # so if you THEN do a normal read, you must first take stuff from
344 # the buffer.
344 # the buffer.
345
345
346 # the read method wraps the original to accommodate buffering,
346 # the read method wraps the original to accommodate buffering,
347 # although read() never adds to the buffer.
347 # although read() never adds to the buffer.
348 # Both readline and readlines have been stolen with almost no
348 # Both readline and readlines have been stolen with almost no
349 # modification from socket.py
349 # modification from socket.py
350
350
351
351
352 def __init__(self, sock, debuglevel=0, strict=0, method=None):
352 def __init__(self, sock, debuglevel=0, strict=0, method=None):
353 httplib.HTTPResponse.__init__(self, sock, debuglevel, method)
353 httplib.HTTPResponse.__init__(self, sock, debuglevel, method)
354 self.fileno = sock.fileno
354 self.fileno = sock.fileno
355 self.code = None
355 self.code = None
356 self._rbuf = ''
356 self._rbuf = ''
357 self._rbufsize = 8096
357 self._rbufsize = 8096
358 self._handler = None # inserted by the handler later
358 self._handler = None # inserted by the handler later
359 self._host = None # (same)
359 self._host = None # (same)
360 self._url = None # (same)
360 self._url = None # (same)
361 self._connection = None # (same)
361 self._connection = None # (same)
362
362
363 _raw_read = httplib.HTTPResponse.read
363 _raw_read = httplib.HTTPResponse.read
364
364
365 def close(self):
365 def close(self):
366 if self.fp:
366 if self.fp:
367 self.fp.close()
367 self.fp.close()
368 self.fp = None
368 self.fp = None
369 if self._handler:
369 if self._handler:
370 self._handler._request_closed(self, self._host,
370 self._handler._request_closed(self, self._host,
371 self._connection)
371 self._connection)
372
372
373 def close_connection(self):
373 def close_connection(self):
374 self._handler._remove_connection(self._host, self._connection, close=1)
374 self._handler._remove_connection(self._host, self._connection, close=1)
375 self.close()
375 self.close()
376
376
377 def info(self):
377 def info(self):
378 return self.headers
378 return self.headers
379
379
380 def geturl(self):
380 def geturl(self):
381 return self._url
381 return self._url
382
382
383 def read(self, amt=None):
383 def read(self, amt=None):
384 # the _rbuf test is only in this first if for speed. It's not
384 # the _rbuf test is only in this first if for speed. It's not
385 # logically necessary
385 # logically necessary
386 if self._rbuf and not amt is None:
386 if self._rbuf and not amt is None:
387 L = len(self._rbuf)
387 L = len(self._rbuf)
388 if amt > L:
388 if amt > L:
389 amt -= L
389 amt -= L
390 else:
390 else:
391 s = self._rbuf[:amt]
391 s = self._rbuf[:amt]
392 self._rbuf = self._rbuf[amt:]
392 self._rbuf = self._rbuf[amt:]
393 return s
393 return s
394
394
395 s = self._rbuf + self._raw_read(amt)
395 s = self._rbuf + self._raw_read(amt)
396 self._rbuf = ''
396 self._rbuf = ''
397 return s
397 return s
398
398
399 # stolen from Python SVN #68532 to fix issue1088
399 # stolen from Python SVN #68532 to fix issue1088
400 def _read_chunked(self, amt):
400 def _read_chunked(self, amt):
401 chunk_left = self.chunk_left
401 chunk_left = self.chunk_left
402 value = ''
402 parts = []
403
403
404 # XXX This accumulates chunks by repeated string concatenation,
405 # which is not efficient as the number or size of chunks gets big.
406 while True:
404 while True:
407 if chunk_left is None:
405 if chunk_left is None:
408 line = self.fp.readline()
406 line = self.fp.readline()
409 i = line.find(';')
407 i = line.find(';')
410 if i >= 0:
408 if i >= 0:
411 line = line[:i] # strip chunk-extensions
409 line = line[:i] # strip chunk-extensions
412 try:
410 try:
413 chunk_left = int(line, 16)
411 chunk_left = int(line, 16)
414 except ValueError:
412 except ValueError:
415 # close the connection as protocol synchronization is
413 # close the connection as protocol synchronization is
416 # probably lost
414 # probably lost
417 self.close()
415 self.close()
418 raise httplib.IncompleteRead(value)
416 raise httplib.IncompleteRead(''.join(parts))
419 if chunk_left == 0:
417 if chunk_left == 0:
420 break
418 break
421 if amt is None:
419 if amt is None:
422 value += self._safe_read(chunk_left)
420 parts.append(self._safe_read(chunk_left))
423 elif amt < chunk_left:
421 elif amt < chunk_left:
424 value += self._safe_read(amt)
422 parts.append(self._safe_read(amt))
425 self.chunk_left = chunk_left - amt
423 self.chunk_left = chunk_left - amt
426 return value
424 return ''.join(parts)
427 elif amt == chunk_left:
425 elif amt == chunk_left:
428 value += self._safe_read(amt)
426 parts.append(self._safe_read(amt))
429 self._safe_read(2) # toss the CRLF at the end of the chunk
427 self._safe_read(2) # toss the CRLF at the end of the chunk
430 self.chunk_left = None
428 self.chunk_left = None
431 return value
429 return ''.join(parts)
432 else:
430 else:
433 value += self._safe_read(chunk_left)
431 parts.append(self._safe_read(chunk_left))
434 amt -= chunk_left
432 amt -= chunk_left
435
433
436 # we read the whole chunk, get another
434 # we read the whole chunk, get another
437 self._safe_read(2) # toss the CRLF at the end of the chunk
435 self._safe_read(2) # toss the CRLF at the end of the chunk
438 chunk_left = None
436 chunk_left = None
439
437
440 # read and discard trailer up to the CRLF terminator
438 # read and discard trailer up to the CRLF terminator
441 ### note: we shouldn't have any trailers!
439 ### note: we shouldn't have any trailers!
442 while True:
440 while True:
443 line = self.fp.readline()
441 line = self.fp.readline()
444 if not line:
442 if not line:
445 # a vanishingly small number of sites EOF without
443 # a vanishingly small number of sites EOF without
446 # sending the trailer
444 # sending the trailer
447 break
445 break
448 if line == '\r\n':
446 if line == '\r\n':
449 break
447 break
450
448
451 # we read everything; close the "file"
449 # we read everything; close the "file"
452 self.close()
450 self.close()
453
451
454 return value
452 return ''.join(parts)
455
453
456 def readline(self, limit=-1):
454 def readline(self, limit=-1):
457 i = self._rbuf.find('\n')
455 i = self._rbuf.find('\n')
458 while i < 0 and not (0 < limit <= len(self._rbuf)):
456 while i < 0 and not (0 < limit <= len(self._rbuf)):
459 new = self._raw_read(self._rbufsize)
457 new = self._raw_read(self._rbufsize)
460 if not new:
458 if not new:
461 break
459 break
462 i = new.find('\n')
460 i = new.find('\n')
463 if i >= 0:
461 if i >= 0:
464 i = i + len(self._rbuf)
462 i = i + len(self._rbuf)
465 self._rbuf = self._rbuf + new
463 self._rbuf = self._rbuf + new
466 if i < 0:
464 if i < 0:
467 i = len(self._rbuf)
465 i = len(self._rbuf)
468 else:
466 else:
469 i = i + 1
467 i = i + 1
470 if 0 <= limit < len(self._rbuf):
468 if 0 <= limit < len(self._rbuf):
471 i = limit
469 i = limit
472 data, self._rbuf = self._rbuf[:i], self._rbuf[i:]
470 data, self._rbuf = self._rbuf[:i], self._rbuf[i:]
473 return data
471 return data
474
472
475 def readlines(self, sizehint=0):
473 def readlines(self, sizehint=0):
476 total = 0
474 total = 0
477 list = []
475 list = []
478 while True:
476 while True:
479 line = self.readline()
477 line = self.readline()
480 if not line:
478 if not line:
481 break
479 break
482 list.append(line)
480 list.append(line)
483 total += len(line)
481 total += len(line)
484 if sizehint and total >= sizehint:
482 if sizehint and total >= sizehint:
485 break
483 break
486 return list
484 return list
487
485
488 def safesend(self, str):
486 def safesend(self, str):
489 """Send `str' to the server.
487 """Send `str' to the server.
490
488
491 Shamelessly ripped off from httplib to patch a bad behavior.
489 Shamelessly ripped off from httplib to patch a bad behavior.
492 """
490 """
493 # _broken_pipe_resp is an attribute we set in this function
491 # _broken_pipe_resp is an attribute we set in this function
494 # if the socket is closed while we're sending data but
492 # if the socket is closed while we're sending data but
495 # the server sent us a response before hanging up.
493 # the server sent us a response before hanging up.
496 # In that case, we want to pretend to send the rest of the
494 # In that case, we want to pretend to send the rest of the
497 # outgoing data, and then let the user use getresponse()
495 # outgoing data, and then let the user use getresponse()
498 # (which we wrap) to get this last response before
496 # (which we wrap) to get this last response before
499 # opening a new socket.
497 # opening a new socket.
500 if getattr(self, '_broken_pipe_resp', None) is not None:
498 if getattr(self, '_broken_pipe_resp', None) is not None:
501 return
499 return
502
500
503 if self.sock is None:
501 if self.sock is None:
504 if self.auto_open:
502 if self.auto_open:
505 self.connect()
503 self.connect()
506 else:
504 else:
507 raise httplib.NotConnected
505 raise httplib.NotConnected
508
506
509 # send the data to the server. if we get a broken pipe, then close
507 # send the data to the server. if we get a broken pipe, then close
510 # the socket. we want to reconnect when somebody tries to send again.
508 # the socket. we want to reconnect when somebody tries to send again.
511 #
509 #
512 # NOTE: we DO propagate the error, though, because we cannot simply
510 # NOTE: we DO propagate the error, though, because we cannot simply
513 # ignore the error... the caller will know if they can retry.
511 # ignore the error... the caller will know if they can retry.
514 if self.debuglevel > 0:
512 if self.debuglevel > 0:
515 print("send:", repr(str))
513 print("send:", repr(str))
516 try:
514 try:
517 blocksize = 8192
515 blocksize = 8192
518 read = getattr(str, 'read', None)
516 read = getattr(str, 'read', None)
519 if read is not None:
517 if read is not None:
520 if self.debuglevel > 0:
518 if self.debuglevel > 0:
521 print("sending a read()able")
519 print("sending a read()able")
522 data = read(blocksize)
520 data = read(blocksize)
523 while data:
521 while data:
524 self.sock.sendall(data)
522 self.sock.sendall(data)
525 data = read(blocksize)
523 data = read(blocksize)
526 else:
524 else:
527 self.sock.sendall(str)
525 self.sock.sendall(str)
528 except socket.error as v:
526 except socket.error as v:
529 reraise = True
527 reraise = True
530 if v[0] == errno.EPIPE: # Broken pipe
528 if v[0] == errno.EPIPE: # Broken pipe
531 if self._HTTPConnection__state == httplib._CS_REQ_SENT:
529 if self._HTTPConnection__state == httplib._CS_REQ_SENT:
532 self._broken_pipe_resp = None
530 self._broken_pipe_resp = None
533 self._broken_pipe_resp = self.getresponse()
531 self._broken_pipe_resp = self.getresponse()
534 reraise = False
532 reraise = False
535 self.close()
533 self.close()
536 if reraise:
534 if reraise:
537 raise
535 raise
538
536
539 def wrapgetresponse(cls):
537 def wrapgetresponse(cls):
540 """Wraps getresponse in cls with a broken-pipe sane version.
538 """Wraps getresponse in cls with a broken-pipe sane version.
541 """
539 """
542 def safegetresponse(self):
540 def safegetresponse(self):
543 # In safesend() we might set the _broken_pipe_resp
541 # In safesend() we might set the _broken_pipe_resp
544 # attribute, in which case the socket has already
542 # attribute, in which case the socket has already
545 # been closed and we just need to give them the response
543 # been closed and we just need to give them the response
546 # back. Otherwise, we use the normal response path.
544 # back. Otherwise, we use the normal response path.
547 r = getattr(self, '_broken_pipe_resp', None)
545 r = getattr(self, '_broken_pipe_resp', None)
548 if r is not None:
546 if r is not None:
549 return r
547 return r
550 return cls.getresponse(self)
548 return cls.getresponse(self)
551 safegetresponse.__doc__ = cls.getresponse.__doc__
549 safegetresponse.__doc__ = cls.getresponse.__doc__
552 return safegetresponse
550 return safegetresponse
553
551
554 class HTTPConnection(httplib.HTTPConnection):
552 class HTTPConnection(httplib.HTTPConnection):
555 # use the modified response class
553 # use the modified response class
556 response_class = HTTPResponse
554 response_class = HTTPResponse
557 send = safesend
555 send = safesend
558 getresponse = wrapgetresponse(httplib.HTTPConnection)
556 getresponse = wrapgetresponse(httplib.HTTPConnection)
559
557
560
558
561 #########################################################################
559 #########################################################################
562 ##### TEST FUNCTIONS
560 ##### TEST FUNCTIONS
563 #########################################################################
561 #########################################################################
564
562
565
563
566 def continuity(url):
564 def continuity(url):
567 md5 = hashlib.md5
565 md5 = hashlib.md5
568 format = '%25s: %s'
566 format = '%25s: %s'
569
567
570 # first fetch the file with the normal http handler
568 # first fetch the file with the normal http handler
571 opener = urlreq.buildopener()
569 opener = urlreq.buildopener()
572 urlreq.installopener(opener)
570 urlreq.installopener(opener)
573 fo = urlreq.urlopen(url)
571 fo = urlreq.urlopen(url)
574 foo = fo.read()
572 foo = fo.read()
575 fo.close()
573 fo.close()
576 m = md5(foo)
574 m = md5(foo)
577 print(format % ('normal urllib', m.hexdigest()))
575 print(format % ('normal urllib', m.hexdigest()))
578
576
579 # now install the keepalive handler and try again
577 # now install the keepalive handler and try again
580 opener = urlreq.buildopener(HTTPHandler())
578 opener = urlreq.buildopener(HTTPHandler())
581 urlreq.installopener(opener)
579 urlreq.installopener(opener)
582
580
583 fo = urlreq.urlopen(url)
581 fo = urlreq.urlopen(url)
584 foo = fo.read()
582 foo = fo.read()
585 fo.close()
583 fo.close()
586 m = md5(foo)
584 m = md5(foo)
587 print(format % ('keepalive read', m.hexdigest()))
585 print(format % ('keepalive read', m.hexdigest()))
588
586
589 fo = urlreq.urlopen(url)
587 fo = urlreq.urlopen(url)
590 foo = ''
588 foo = ''
591 while True:
589 while True:
592 f = fo.readline()
590 f = fo.readline()
593 if f:
591 if f:
594 foo = foo + f
592 foo = foo + f
595 else: break
593 else: break
596 fo.close()
594 fo.close()
597 m = md5(foo)
595 m = md5(foo)
598 print(format % ('keepalive readline', m.hexdigest()))
596 print(format % ('keepalive readline', m.hexdigest()))
599
597
600 def comp(N, url):
598 def comp(N, url):
601 print(' making %i connections to:\n %s' % (N, url))
599 print(' making %i connections to:\n %s' % (N, url))
602
600
603 util.stdout.write(' first using the normal urllib handlers')
601 util.stdout.write(' first using the normal urllib handlers')
604 # first use normal opener
602 # first use normal opener
605 opener = urlreq.buildopener()
603 opener = urlreq.buildopener()
606 urlreq.installopener(opener)
604 urlreq.installopener(opener)
607 t1 = fetch(N, url)
605 t1 = fetch(N, url)
608 print(' TIME: %.3f s' % t1)
606 print(' TIME: %.3f s' % t1)
609
607
610 util.stdout.write(' now using the keepalive handler ')
608 util.stdout.write(' now using the keepalive handler ')
611 # now install the keepalive handler and try again
609 # now install the keepalive handler and try again
612 opener = urlreq.buildopener(HTTPHandler())
610 opener = urlreq.buildopener(HTTPHandler())
613 urlreq.installopener(opener)
611 urlreq.installopener(opener)
614 t2 = fetch(N, url)
612 t2 = fetch(N, url)
615 print(' TIME: %.3f s' % t2)
613 print(' TIME: %.3f s' % t2)
616 print(' improvement factor: %.2f' % (t1 / t2))
614 print(' improvement factor: %.2f' % (t1 / t2))
617
615
618 def fetch(N, url, delay=0):
616 def fetch(N, url, delay=0):
619 import time
617 import time
620 lens = []
618 lens = []
621 starttime = time.time()
619 starttime = time.time()
622 for i in range(N):
620 for i in range(N):
623 if delay and i > 0:
621 if delay and i > 0:
624 time.sleep(delay)
622 time.sleep(delay)
625 fo = urlreq.urlopen(url)
623 fo = urlreq.urlopen(url)
626 foo = fo.read()
624 foo = fo.read()
627 fo.close()
625 fo.close()
628 lens.append(len(foo))
626 lens.append(len(foo))
629 diff = time.time() - starttime
627 diff = time.time() - starttime
630
628
631 j = 0
629 j = 0
632 for i in lens[1:]:
630 for i in lens[1:]:
633 j = j + 1
631 j = j + 1
634 if not i == lens[0]:
632 if not i == lens[0]:
635 print("WARNING: inconsistent length on read %i: %i" % (j, i))
633 print("WARNING: inconsistent length on read %i: %i" % (j, i))
636
634
637 return diff
635 return diff
638
636
639 def test_timeout(url):
637 def test_timeout(url):
640 global DEBUG
638 global DEBUG
641 dbbackup = DEBUG
639 dbbackup = DEBUG
642 class FakeLogger(object):
640 class FakeLogger(object):
643 def debug(self, msg, *args):
641 def debug(self, msg, *args):
644 print(msg % args)
642 print(msg % args)
645 info = warning = error = debug
643 info = warning = error = debug
646 DEBUG = FakeLogger()
644 DEBUG = FakeLogger()
647 print(" fetching the file to establish a connection")
645 print(" fetching the file to establish a connection")
648 fo = urlreq.urlopen(url)
646 fo = urlreq.urlopen(url)
649 data1 = fo.read()
647 data1 = fo.read()
650 fo.close()
648 fo.close()
651
649
652 i = 20
650 i = 20
653 print(" waiting %i seconds for the server to close the connection" % i)
651 print(" waiting %i seconds for the server to close the connection" % i)
654 while i > 0:
652 while i > 0:
655 util.stdout.write('\r %2i' % i)
653 util.stdout.write('\r %2i' % i)
656 util.stdout.flush()
654 util.stdout.flush()
657 time.sleep(1)
655 time.sleep(1)
658 i -= 1
656 i -= 1
659 util.stderr.write('\r')
657 util.stderr.write('\r')
660
658
661 print(" fetching the file a second time")
659 print(" fetching the file a second time")
662 fo = urlreq.urlopen(url)
660 fo = urlreq.urlopen(url)
663 data2 = fo.read()
661 data2 = fo.read()
664 fo.close()
662 fo.close()
665
663
666 if data1 == data2:
664 if data1 == data2:
667 print(' data are identical')
665 print(' data are identical')
668 else:
666 else:
669 print(' ERROR: DATA DIFFER')
667 print(' ERROR: DATA DIFFER')
670
668
671 DEBUG = dbbackup
669 DEBUG = dbbackup
672
670
673
671
674 def test(url, N=10):
672 def test(url, N=10):
675 print("performing continuity test (making sure stuff isn't corrupted)")
673 print("performing continuity test (making sure stuff isn't corrupted)")
676 continuity(url)
674 continuity(url)
677 print('')
675 print('')
678 print("performing speed comparison")
676 print("performing speed comparison")
679 comp(N, url)
677 comp(N, url)
680 print('')
678 print('')
681 print("performing dropped-connection check")
679 print("performing dropped-connection check")
682 test_timeout(url)
680 test_timeout(url)
683
681
684 if __name__ == '__main__':
682 if __name__ == '__main__':
685 import time
683 import time
686 try:
684 try:
687 N = int(sys.argv[1])
685 N = int(sys.argv[1])
688 url = sys.argv[2]
686 url = sys.argv[2]
689 except (IndexError, ValueError):
687 except (IndexError, ValueError):
690 print("%s <integer> <url>" % sys.argv[0])
688 print("%s <integer> <url>" % sys.argv[0])
691 else:
689 else:
692 test(url, N)
690 test(url, N)
General Comments 0
You need to be logged in to leave comments. Login now