##// END OF EJS Templates
keepalive: be more careful about self._rbuf when calling super impls...
Augie Fackler -
r39841:1cf1680b default
parent child Browse files
Show More
@@ -1,753 +1,756 b''
1 # This library is free software; you can redistribute it and/or
1 # This library is free software; you can redistribute it and/or
2 # modify it under the terms of the GNU Lesser General Public
2 # modify it under the terms of the GNU Lesser General Public
3 # License as published by the Free Software Foundation; either
3 # License as published by the Free Software Foundation; either
4 # version 2.1 of the License, or (at your option) any later version.
4 # version 2.1 of the License, or (at your option) any later version.
5 #
5 #
6 # This library is distributed in the hope that it will be useful,
6 # This library is distributed in the hope that it will be useful,
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9 # Lesser General Public License for more details.
9 # Lesser General Public License for more details.
10 #
10 #
11 # You should have received a copy of the GNU Lesser General Public
11 # You should have received a copy of the GNU Lesser General Public
12 # License along with this library; if not, see
12 # License along with this library; if not, see
13 # <http://www.gnu.org/licenses/>.
13 # <http://www.gnu.org/licenses/>.
14
14
15 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
15 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
16 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
16 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
17
17
18 # Modified by Benoit Boissinot:
18 # Modified by Benoit Boissinot:
19 # - fix for digest auth (inspired from urllib2.py @ Python v2.4)
19 # - fix for digest auth (inspired from urllib2.py @ Python v2.4)
20 # Modified by Dirkjan Ochtman:
20 # Modified by Dirkjan Ochtman:
21 # - import md5 function from a local util module
21 # - import md5 function from a local util module
22 # Modified by Augie Fackler:
22 # Modified by Augie Fackler:
23 # - add safesend method and use it to prevent broken pipe errors
23 # - add safesend method and use it to prevent broken pipe errors
24 # on large POST requests
24 # on large POST requests
25
25
26 """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
26 """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
27
27
28 >>> import urllib2
28 >>> import urllib2
29 >>> from keepalive import HTTPHandler
29 >>> from keepalive import HTTPHandler
30 >>> keepalive_handler = HTTPHandler()
30 >>> keepalive_handler = HTTPHandler()
31 >>> opener = urlreq.buildopener(keepalive_handler)
31 >>> opener = urlreq.buildopener(keepalive_handler)
32 >>> urlreq.installopener(opener)
32 >>> urlreq.installopener(opener)
33 >>>
33 >>>
34 >>> fo = urlreq.urlopen('http://www.python.org')
34 >>> fo = urlreq.urlopen('http://www.python.org')
35
35
36 If a connection to a given host is requested, and all of the existing
36 If a connection to a given host is requested, and all of the existing
37 connections are still in use, another connection will be opened. If
37 connections are still in use, another connection will be opened. If
38 the handler tries to use an existing connection but it fails in some
38 the handler tries to use an existing connection but it fails in some
39 way, it will be closed and removed from the pool.
39 way, it will be closed and removed from the pool.
40
40
41 To remove the handler, simply re-run build_opener with no arguments, and
41 To remove the handler, simply re-run build_opener with no arguments, and
42 install that opener.
42 install that opener.
43
43
44 You can explicitly close connections by using the close_connection()
44 You can explicitly close connections by using the close_connection()
45 method of the returned file-like object (described below) or you can
45 method of the returned file-like object (described below) or you can
46 use the handler methods:
46 use the handler methods:
47
47
48 close_connection(host)
48 close_connection(host)
49 close_all()
49 close_all()
50 open_connections()
50 open_connections()
51
51
52 NOTE: using the close_connection and close_all methods of the handler
52 NOTE: using the close_connection and close_all methods of the handler
53 should be done with care when using multiple threads.
53 should be done with care when using multiple threads.
54 * there is nothing that prevents another thread from creating new
54 * there is nothing that prevents another thread from creating new
55 connections immediately after connections are closed
55 connections immediately after connections are closed
56 * no checks are done to prevent in-use connections from being closed
56 * no checks are done to prevent in-use connections from being closed
57
57
58 >>> keepalive_handler.close_all()
58 >>> keepalive_handler.close_all()
59
59
60 EXTRA ATTRIBUTES AND METHODS
60 EXTRA ATTRIBUTES AND METHODS
61
61
62 Upon a status of 200, the object returned has a few additional
62 Upon a status of 200, the object returned has a few additional
63 attributes and methods, which should not be used if you want to
63 attributes and methods, which should not be used if you want to
64 remain consistent with the normal urllib2-returned objects:
64 remain consistent with the normal urllib2-returned objects:
65
65
66 close_connection() - close the connection to the host
66 close_connection() - close the connection to the host
67 readlines() - you know, readlines()
67 readlines() - you know, readlines()
68 status - the return status (i.e. 404)
68 status - the return status (i.e. 404)
69 reason - english translation of status (i.e. 'File not found')
69 reason - english translation of status (i.e. 'File not found')
70
70
71 If you want the best of both worlds, use this inside an
71 If you want the best of both worlds, use this inside an
72 AttributeError-catching try:
72 AttributeError-catching try:
73
73
74 >>> try: status = fo.status
74 >>> try: status = fo.status
75 >>> except AttributeError: status = None
75 >>> except AttributeError: status = None
76
76
77 Unfortunately, these are ONLY there if status == 200, so it's not
77 Unfortunately, these are ONLY there if status == 200, so it's not
78 easy to distinguish between non-200 responses. The reason is that
78 easy to distinguish between non-200 responses. The reason is that
79 urllib2 tries to do clever things with error codes 301, 302, 401,
79 urllib2 tries to do clever things with error codes 301, 302, 401,
80 and 407, and it wraps the object upon return.
80 and 407, and it wraps the object upon return.
81 """
81 """
82
82
83 # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $
83 # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $
84
84
85 from __future__ import absolute_import, print_function
85 from __future__ import absolute_import, print_function
86
86
87 import errno
87 import errno
88 import hashlib
88 import hashlib
89 import socket
89 import socket
90 import sys
90 import sys
91 import threading
91 import threading
92
92
93 from .i18n import _
93 from .i18n import _
94 from . import (
94 from . import (
95 node,
95 node,
96 pycompat,
96 pycompat,
97 urllibcompat,
97 urllibcompat,
98 util,
98 util,
99 )
99 )
100 from .utils import (
100 from .utils import (
101 procutil,
101 procutil,
102 )
102 )
103
103
104 httplib = util.httplib
104 httplib = util.httplib
105 urlerr = util.urlerr
105 urlerr = util.urlerr
106 urlreq = util.urlreq
106 urlreq = util.urlreq
107
107
108 DEBUG = None
108 DEBUG = None
109
109
110 class ConnectionManager(object):
110 class ConnectionManager(object):
111 """
111 """
112 The connection manager must be able to:
112 The connection manager must be able to:
113 * keep track of all existing
113 * keep track of all existing
114 """
114 """
115 def __init__(self):
115 def __init__(self):
116 self._lock = threading.Lock()
116 self._lock = threading.Lock()
117 self._hostmap = {} # map hosts to a list of connections
117 self._hostmap = {} # map hosts to a list of connections
118 self._connmap = {} # map connections to host
118 self._connmap = {} # map connections to host
119 self._readymap = {} # map connection to ready state
119 self._readymap = {} # map connection to ready state
120
120
121 def add(self, host, connection, ready):
121 def add(self, host, connection, ready):
122 self._lock.acquire()
122 self._lock.acquire()
123 try:
123 try:
124 if host not in self._hostmap:
124 if host not in self._hostmap:
125 self._hostmap[host] = []
125 self._hostmap[host] = []
126 self._hostmap[host].append(connection)
126 self._hostmap[host].append(connection)
127 self._connmap[connection] = host
127 self._connmap[connection] = host
128 self._readymap[connection] = ready
128 self._readymap[connection] = ready
129 finally:
129 finally:
130 self._lock.release()
130 self._lock.release()
131
131
132 def remove(self, connection):
132 def remove(self, connection):
133 self._lock.acquire()
133 self._lock.acquire()
134 try:
134 try:
135 try:
135 try:
136 host = self._connmap[connection]
136 host = self._connmap[connection]
137 except KeyError:
137 except KeyError:
138 pass
138 pass
139 else:
139 else:
140 del self._connmap[connection]
140 del self._connmap[connection]
141 del self._readymap[connection]
141 del self._readymap[connection]
142 self._hostmap[host].remove(connection)
142 self._hostmap[host].remove(connection)
143 if not self._hostmap[host]:
143 if not self._hostmap[host]:
144 del self._hostmap[host]
144 del self._hostmap[host]
145 finally:
145 finally:
146 self._lock.release()
146 self._lock.release()
147
147
148 def set_ready(self, connection, ready):
148 def set_ready(self, connection, ready):
149 try:
149 try:
150 self._readymap[connection] = ready
150 self._readymap[connection] = ready
151 except KeyError:
151 except KeyError:
152 pass
152 pass
153
153
154 def get_ready_conn(self, host):
154 def get_ready_conn(self, host):
155 conn = None
155 conn = None
156 self._lock.acquire()
156 self._lock.acquire()
157 try:
157 try:
158 if host in self._hostmap:
158 if host in self._hostmap:
159 for c in self._hostmap[host]:
159 for c in self._hostmap[host]:
160 if self._readymap[c]:
160 if self._readymap[c]:
161 self._readymap[c] = 0
161 self._readymap[c] = 0
162 conn = c
162 conn = c
163 break
163 break
164 finally:
164 finally:
165 self._lock.release()
165 self._lock.release()
166 return conn
166 return conn
167
167
168 def get_all(self, host=None):
168 def get_all(self, host=None):
169 if host:
169 if host:
170 return list(self._hostmap.get(host, []))
170 return list(self._hostmap.get(host, []))
171 else:
171 else:
172 return dict(self._hostmap)
172 return dict(self._hostmap)
173
173
174 class KeepAliveHandler(object):
174 class KeepAliveHandler(object):
175 def __init__(self):
175 def __init__(self):
176 self._cm = ConnectionManager()
176 self._cm = ConnectionManager()
177
177
178 #### Connection Management
178 #### Connection Management
179 def open_connections(self):
179 def open_connections(self):
180 """return a list of connected hosts and the number of connections
180 """return a list of connected hosts and the number of connections
181 to each. [('foo.com:80', 2), ('bar.org', 1)]"""
181 to each. [('foo.com:80', 2), ('bar.org', 1)]"""
182 return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
182 return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
183
183
184 def close_connection(self, host):
184 def close_connection(self, host):
185 """close connection(s) to <host>
185 """close connection(s) to <host>
186 host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
186 host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
187 no error occurs if there is no connection to that host."""
187 no error occurs if there is no connection to that host."""
188 for h in self._cm.get_all(host):
188 for h in self._cm.get_all(host):
189 self._cm.remove(h)
189 self._cm.remove(h)
190 h.close()
190 h.close()
191
191
192 def close_all(self):
192 def close_all(self):
193 """close all open connections"""
193 """close all open connections"""
194 for host, conns in self._cm.get_all().iteritems():
194 for host, conns in self._cm.get_all().iteritems():
195 for h in conns:
195 for h in conns:
196 self._cm.remove(h)
196 self._cm.remove(h)
197 h.close()
197 h.close()
198
198
199 def _request_closed(self, request, host, connection):
199 def _request_closed(self, request, host, connection):
200 """tells us that this request is now closed and that the
200 """tells us that this request is now closed and that the
201 connection is ready for another request"""
201 connection is ready for another request"""
202 self._cm.set_ready(connection, 1)
202 self._cm.set_ready(connection, 1)
203
203
204 def _remove_connection(self, host, connection, close=0):
204 def _remove_connection(self, host, connection, close=0):
205 if close:
205 if close:
206 connection.close()
206 connection.close()
207 self._cm.remove(connection)
207 self._cm.remove(connection)
208
208
209 #### Transaction Execution
209 #### Transaction Execution
210 def http_open(self, req):
210 def http_open(self, req):
211 return self.do_open(HTTPConnection, req)
211 return self.do_open(HTTPConnection, req)
212
212
213 def do_open(self, http_class, req):
213 def do_open(self, http_class, req):
214 host = urllibcompat.gethost(req)
214 host = urllibcompat.gethost(req)
215 if not host:
215 if not host:
216 raise urlerr.urlerror('no host given')
216 raise urlerr.urlerror('no host given')
217
217
218 try:
218 try:
219 h = self._cm.get_ready_conn(host)
219 h = self._cm.get_ready_conn(host)
220 while h:
220 while h:
221 r = self._reuse_connection(h, req, host)
221 r = self._reuse_connection(h, req, host)
222
222
223 # if this response is non-None, then it worked and we're
223 # if this response is non-None, then it worked and we're
224 # done. Break out, skipping the else block.
224 # done. Break out, skipping the else block.
225 if r:
225 if r:
226 break
226 break
227
227
228 # connection is bad - possibly closed by server
228 # connection is bad - possibly closed by server
229 # discard it and ask for the next free connection
229 # discard it and ask for the next free connection
230 h.close()
230 h.close()
231 self._cm.remove(h)
231 self._cm.remove(h)
232 h = self._cm.get_ready_conn(host)
232 h = self._cm.get_ready_conn(host)
233 else:
233 else:
234 # no (working) free connections were found. Create a new one.
234 # no (working) free connections were found. Create a new one.
235 h = http_class(host)
235 h = http_class(host)
236 if DEBUG:
236 if DEBUG:
237 DEBUG.info("creating new connection to %s (%d)",
237 DEBUG.info("creating new connection to %s (%d)",
238 host, id(h))
238 host, id(h))
239 self._cm.add(host, h, 0)
239 self._cm.add(host, h, 0)
240 self._start_transaction(h, req)
240 self._start_transaction(h, req)
241 r = h.getresponse()
241 r = h.getresponse()
242 # The string form of BadStatusLine is the status line. Add some context
242 # The string form of BadStatusLine is the status line. Add some context
243 # to make the error message slightly more useful.
243 # to make the error message slightly more useful.
244 except httplib.BadStatusLine as err:
244 except httplib.BadStatusLine as err:
245 raise urlerr.urlerror(
245 raise urlerr.urlerror(
246 _('bad HTTP status line: %s') % pycompat.sysbytes(err.line))
246 _('bad HTTP status line: %s') % pycompat.sysbytes(err.line))
247 except (socket.error, httplib.HTTPException) as err:
247 except (socket.error, httplib.HTTPException) as err:
248 raise urlerr.urlerror(err)
248 raise urlerr.urlerror(err)
249
249
250 # If not a persistent connection, don't try to reuse it. Look
250 # If not a persistent connection, don't try to reuse it. Look
251 # for this using getattr() since vcr doesn't define this
251 # for this using getattr() since vcr doesn't define this
252 # attribute, and in that case always close the connection.
252 # attribute, and in that case always close the connection.
253 if getattr(r, r'will_close', True):
253 if getattr(r, r'will_close', True):
254 self._cm.remove(h)
254 self._cm.remove(h)
255
255
256 if DEBUG:
256 if DEBUG:
257 DEBUG.info("STATUS: %s, %s", r.status, r.reason)
257 DEBUG.info("STATUS: %s, %s", r.status, r.reason)
258 r._handler = self
258 r._handler = self
259 r._host = host
259 r._host = host
260 r._url = req.get_full_url()
260 r._url = req.get_full_url()
261 r._connection = h
261 r._connection = h
262 r.code = r.status
262 r.code = r.status
263 r.headers = r.msg
263 r.headers = r.msg
264 r.msg = r.reason
264 r.msg = r.reason
265
265
266 return r
266 return r
267
267
268 def _reuse_connection(self, h, req, host):
268 def _reuse_connection(self, h, req, host):
269 """start the transaction with a re-used connection
269 """start the transaction with a re-used connection
270 return a response object (r) upon success or None on failure.
270 return a response object (r) upon success or None on failure.
271 This DOES not close or remove bad connections in cases where
271 This DOES not close or remove bad connections in cases where
272 it returns. However, if an unexpected exception occurs, it
272 it returns. However, if an unexpected exception occurs, it
273 will close and remove the connection before re-raising.
273 will close and remove the connection before re-raising.
274 """
274 """
275 try:
275 try:
276 self._start_transaction(h, req)
276 self._start_transaction(h, req)
277 r = h.getresponse()
277 r = h.getresponse()
278 # note: just because we got something back doesn't mean it
278 # note: just because we got something back doesn't mean it
279 # worked. We'll check the version below, too.
279 # worked. We'll check the version below, too.
280 except (socket.error, httplib.HTTPException):
280 except (socket.error, httplib.HTTPException):
281 r = None
281 r = None
282 except: # re-raises
282 except: # re-raises
283 # adding this block just in case we've missed
283 # adding this block just in case we've missed
284 # something we will still raise the exception, but
284 # something we will still raise the exception, but
285 # lets try and close the connection and remove it
285 # lets try and close the connection and remove it
286 # first. We previously got into a nasty loop
286 # first. We previously got into a nasty loop
287 # where an exception was uncaught, and so the
287 # where an exception was uncaught, and so the
288 # connection stayed open. On the next try, the
288 # connection stayed open. On the next try, the
289 # same exception was raised, etc. The trade-off is
289 # same exception was raised, etc. The trade-off is
290 # that it's now possible this call will raise
290 # that it's now possible this call will raise
291 # a DIFFERENT exception
291 # a DIFFERENT exception
292 if DEBUG:
292 if DEBUG:
293 DEBUG.error("unexpected exception - closing "
293 DEBUG.error("unexpected exception - closing "
294 "connection to %s (%d)", host, id(h))
294 "connection to %s (%d)", host, id(h))
295 self._cm.remove(h)
295 self._cm.remove(h)
296 h.close()
296 h.close()
297 raise
297 raise
298
298
299 if r is None or r.version == 9:
299 if r is None or r.version == 9:
300 # httplib falls back to assuming HTTP 0.9 if it gets a
300 # httplib falls back to assuming HTTP 0.9 if it gets a
301 # bad header back. This is most likely to happen if
301 # bad header back. This is most likely to happen if
302 # the socket has been closed by the server since we
302 # the socket has been closed by the server since we
303 # last used the connection.
303 # last used the connection.
304 if DEBUG:
304 if DEBUG:
305 DEBUG.info("failed to re-use connection to %s (%d)",
305 DEBUG.info("failed to re-use connection to %s (%d)",
306 host, id(h))
306 host, id(h))
307 r = None
307 r = None
308 else:
308 else:
309 if DEBUG:
309 if DEBUG:
310 DEBUG.info("re-using connection to %s (%d)", host, id(h))
310 DEBUG.info("re-using connection to %s (%d)", host, id(h))
311
311
312 return r
312 return r
313
313
314 def _start_transaction(self, h, req):
314 def _start_transaction(self, h, req):
315 # What follows mostly reimplements HTTPConnection.request()
315 # What follows mostly reimplements HTTPConnection.request()
316 # except it adds self.parent.addheaders in the mix and sends headers
316 # except it adds self.parent.addheaders in the mix and sends headers
317 # in a deterministic order (to make testing easier).
317 # in a deterministic order (to make testing easier).
318 headers = util.sortdict(self.parent.addheaders)
318 headers = util.sortdict(self.parent.addheaders)
319 headers.update(sorted(req.headers.items()))
319 headers.update(sorted(req.headers.items()))
320 headers.update(sorted(req.unredirected_hdrs.items()))
320 headers.update(sorted(req.unredirected_hdrs.items()))
321 headers = util.sortdict((n.lower(), v) for n, v in headers.items())
321 headers = util.sortdict((n.lower(), v) for n, v in headers.items())
322 skipheaders = {}
322 skipheaders = {}
323 for n in (r'host', r'accept-encoding'):
323 for n in (r'host', r'accept-encoding'):
324 if n in headers:
324 if n in headers:
325 skipheaders[r'skip_' + n.replace(r'-', r'_')] = 1
325 skipheaders[r'skip_' + n.replace(r'-', r'_')] = 1
326 try:
326 try:
327 if urllibcompat.hasdata(req):
327 if urllibcompat.hasdata(req):
328 data = urllibcompat.getdata(req)
328 data = urllibcompat.getdata(req)
329 h.putrequest(
329 h.putrequest(
330 req.get_method(), urllibcompat.getselector(req),
330 req.get_method(), urllibcompat.getselector(req),
331 **skipheaders)
331 **skipheaders)
332 if r'content-type' not in headers:
332 if r'content-type' not in headers:
333 h.putheader(r'Content-type',
333 h.putheader(r'Content-type',
334 r'application/x-www-form-urlencoded')
334 r'application/x-www-form-urlencoded')
335 if r'content-length' not in headers:
335 if r'content-length' not in headers:
336 h.putheader(r'Content-length', r'%d' % len(data))
336 h.putheader(r'Content-length', r'%d' % len(data))
337 else:
337 else:
338 h.putrequest(
338 h.putrequest(
339 req.get_method(), urllibcompat.getselector(req),
339 req.get_method(), urllibcompat.getselector(req),
340 **skipheaders)
340 **skipheaders)
341 except socket.error as err:
341 except socket.error as err:
342 raise urlerr.urlerror(err)
342 raise urlerr.urlerror(err)
343 for k, v in headers.items():
343 for k, v in headers.items():
344 h.putheader(k, v)
344 h.putheader(k, v)
345 h.endheaders()
345 h.endheaders()
346 if urllibcompat.hasdata(req):
346 if urllibcompat.hasdata(req):
347 h.send(data)
347 h.send(data)
348
348
349 class HTTPHandler(KeepAliveHandler, urlreq.httphandler):
349 class HTTPHandler(KeepAliveHandler, urlreq.httphandler):
350 pass
350 pass
351
351
352 class HTTPResponse(httplib.HTTPResponse):
352 class HTTPResponse(httplib.HTTPResponse):
353 # we need to subclass HTTPResponse in order to
353 # we need to subclass HTTPResponse in order to
354 # 1) add readline(), readlines(), and readinto() methods
354 # 1) add readline(), readlines(), and readinto() methods
355 # 2) add close_connection() methods
355 # 2) add close_connection() methods
356 # 3) add info() and geturl() methods
356 # 3) add info() and geturl() methods
357
357
358 # in order to add readline(), read must be modified to deal with a
358 # in order to add readline(), read must be modified to deal with a
359 # buffer. example: readline must read a buffer and then spit back
359 # buffer. example: readline must read a buffer and then spit back
360 # one line at a time. The only real alternative is to read one
360 # one line at a time. The only real alternative is to read one
361 # BYTE at a time (ick). Once something has been read, it can't be
361 # BYTE at a time (ick). Once something has been read, it can't be
362 # put back (ok, maybe it can, but that's even uglier than this),
362 # put back (ok, maybe it can, but that's even uglier than this),
363 # so if you THEN do a normal read, you must first take stuff from
363 # so if you THEN do a normal read, you must first take stuff from
364 # the buffer.
364 # the buffer.
365
365
366 # the read method wraps the original to accommodate buffering,
366 # the read method wraps the original to accommodate buffering,
367 # although read() never adds to the buffer.
367 # although read() never adds to the buffer.
368 # Both readline and readlines have been stolen with almost no
368 # Both readline and readlines have been stolen with almost no
369 # modification from socket.py
369 # modification from socket.py
370
370
371
371
372 def __init__(self, sock, debuglevel=0, strict=0, method=None):
372 def __init__(self, sock, debuglevel=0, strict=0, method=None):
373 extrakw = {}
373 extrakw = {}
374 if not pycompat.ispy3:
374 if not pycompat.ispy3:
375 extrakw[r'strict'] = True
375 extrakw[r'strict'] = True
376 extrakw[r'buffering'] = True
376 extrakw[r'buffering'] = True
377 httplib.HTTPResponse.__init__(self, sock, debuglevel=debuglevel,
377 httplib.HTTPResponse.__init__(self, sock, debuglevel=debuglevel,
378 method=method, **extrakw)
378 method=method, **extrakw)
379 self.fileno = sock.fileno
379 self.fileno = sock.fileno
380 self.code = None
380 self.code = None
381 self._rbuf = ''
381 self._rbuf = ''
382 self._rbufsize = 8096
382 self._rbufsize = 8096
383 self._handler = None # inserted by the handler later
383 self._handler = None # inserted by the handler later
384 self._host = None # (same)
384 self._host = None # (same)
385 self._url = None # (same)
385 self._url = None # (same)
386 self._connection = None # (same)
386 self._connection = None # (same)
387
387
388 _raw_read = httplib.HTTPResponse.read
388 _raw_read = httplib.HTTPResponse.read
389 _raw_readinto = getattr(httplib.HTTPResponse, 'readinto', None)
389 _raw_readinto = getattr(httplib.HTTPResponse, 'readinto', None)
390
390
391 def close(self):
391 def close(self):
392 if self.fp:
392 if self.fp:
393 self.fp.close()
393 self.fp.close()
394 self.fp = None
394 self.fp = None
395 if self._handler:
395 if self._handler:
396 self._handler._request_closed(self, self._host,
396 self._handler._request_closed(self, self._host,
397 self._connection)
397 self._connection)
398
398
399 def close_connection(self):
399 def close_connection(self):
400 self._handler._remove_connection(self._host, self._connection, close=1)
400 self._handler._remove_connection(self._host, self._connection, close=1)
401 self.close()
401 self.close()
402
402
403 def info(self):
403 def info(self):
404 return self.headers
404 return self.headers
405
405
406 def geturl(self):
406 def geturl(self):
407 return self._url
407 return self._url
408
408
409 def read(self, amt=None):
409 def read(self, amt=None):
410 # the _rbuf test is only in this first if for speed. It's not
410 # the _rbuf test is only in this first if for speed. It's not
411 # logically necessary
411 # logically necessary
412 if self._rbuf and amt is not None:
412 if self._rbuf and amt is not None:
413 L = len(self._rbuf)
413 L = len(self._rbuf)
414 if amt > L:
414 if amt > L:
415 amt -= L
415 amt -= L
416 else:
416 else:
417 s = self._rbuf[:amt]
417 s = self._rbuf[:amt]
418 self._rbuf = self._rbuf[amt:]
418 self._rbuf = self._rbuf[amt:]
419 return s
419 return s
420
420 # Careful! http.client.HTTPResponse.read() on Python 3 is
421 s = self._rbuf + self._raw_read(amt)
421 # implemented using readinto(), which can duplicate self._rbuf
422 # if it's not empty.
423 s = self._rbuf
422 self._rbuf = ''
424 self._rbuf = ''
425 s += self._raw_read(amt)
423 return s
426 return s
424
427
425 # stolen from Python SVN #68532 to fix issue1088
428 # stolen from Python SVN #68532 to fix issue1088
426 def _read_chunked(self, amt):
429 def _read_chunked(self, amt):
427 chunk_left = self.chunk_left
430 chunk_left = self.chunk_left
428 parts = []
431 parts = []
429
432
430 while True:
433 while True:
431 if chunk_left is None:
434 if chunk_left is None:
432 line = self.fp.readline()
435 line = self.fp.readline()
433 i = line.find(';')
436 i = line.find(';')
434 if i >= 0:
437 if i >= 0:
435 line = line[:i] # strip chunk-extensions
438 line = line[:i] # strip chunk-extensions
436 try:
439 try:
437 chunk_left = int(line, 16)
440 chunk_left = int(line, 16)
438 except ValueError:
441 except ValueError:
439 # close the connection as protocol synchronization is
442 # close the connection as protocol synchronization is
440 # probably lost
443 # probably lost
441 self.close()
444 self.close()
442 raise httplib.IncompleteRead(''.join(parts))
445 raise httplib.IncompleteRead(''.join(parts))
443 if chunk_left == 0:
446 if chunk_left == 0:
444 break
447 break
445 if amt is None:
448 if amt is None:
446 parts.append(self._safe_read(chunk_left))
449 parts.append(self._safe_read(chunk_left))
447 elif amt < chunk_left:
450 elif amt < chunk_left:
448 parts.append(self._safe_read(amt))
451 parts.append(self._safe_read(amt))
449 self.chunk_left = chunk_left - amt
452 self.chunk_left = chunk_left - amt
450 return ''.join(parts)
453 return ''.join(parts)
451 elif amt == chunk_left:
454 elif amt == chunk_left:
452 parts.append(self._safe_read(amt))
455 parts.append(self._safe_read(amt))
453 self._safe_read(2) # toss the CRLF at the end of the chunk
456 self._safe_read(2) # toss the CRLF at the end of the chunk
454 self.chunk_left = None
457 self.chunk_left = None
455 return ''.join(parts)
458 return ''.join(parts)
456 else:
459 else:
457 parts.append(self._safe_read(chunk_left))
460 parts.append(self._safe_read(chunk_left))
458 amt -= chunk_left
461 amt -= chunk_left
459
462
460 # we read the whole chunk, get another
463 # we read the whole chunk, get another
461 self._safe_read(2) # toss the CRLF at the end of the chunk
464 self._safe_read(2) # toss the CRLF at the end of the chunk
462 chunk_left = None
465 chunk_left = None
463
466
464 # read and discard trailer up to the CRLF terminator
467 # read and discard trailer up to the CRLF terminator
465 ### note: we shouldn't have any trailers!
468 ### note: we shouldn't have any trailers!
466 while True:
469 while True:
467 line = self.fp.readline()
470 line = self.fp.readline()
468 if not line:
471 if not line:
469 # a vanishingly small number of sites EOF without
472 # a vanishingly small number of sites EOF without
470 # sending the trailer
473 # sending the trailer
471 break
474 break
472 if line == '\r\n':
475 if line == '\r\n':
473 break
476 break
474
477
475 # we read everything; close the "file"
478 # we read everything; close the "file"
476 self.close()
479 self.close()
477
480
478 return ''.join(parts)
481 return ''.join(parts)
479
482
480 def readline(self):
483 def readline(self):
481 # Fast path for a line is already available in read buffer.
484 # Fast path for a line is already available in read buffer.
482 i = self._rbuf.find('\n')
485 i = self._rbuf.find('\n')
483 if i >= 0:
486 if i >= 0:
484 i += 1
487 i += 1
485 line = self._rbuf[:i]
488 line = self._rbuf[:i]
486 self._rbuf = self._rbuf[i:]
489 self._rbuf = self._rbuf[i:]
487 return line
490 return line
488
491
489 # No newline in local buffer. Read until we find one.
492 # No newline in local buffer. Read until we find one.
490 chunks = [self._rbuf]
493 chunks = [self._rbuf]
491 i = -1
494 i = -1
492 readsize = self._rbufsize
495 readsize = self._rbufsize
493 while True:
496 while True:
494 new = self._raw_read(readsize)
497 new = self._raw_read(readsize)
495 if not new:
498 if not new:
496 break
499 break
497
500
498 chunks.append(new)
501 chunks.append(new)
499 i = new.find('\n')
502 i = new.find('\n')
500 if i >= 0:
503 if i >= 0:
501 break
504 break
502
505
503 # We either have exhausted the stream or have a newline in chunks[-1].
506 # We either have exhausted the stream or have a newline in chunks[-1].
504
507
505 # EOF
508 # EOF
506 if i == -1:
509 if i == -1:
507 self._rbuf = ''
510 self._rbuf = ''
508 return ''.join(chunks)
511 return ''.join(chunks)
509
512
510 i += 1
513 i += 1
511 self._rbuf = chunks[-1][i:]
514 self._rbuf = chunks[-1][i:]
512 chunks[-1] = chunks[-1][:i]
515 chunks[-1] = chunks[-1][:i]
513 return ''.join(chunks)
516 return ''.join(chunks)
514
517
515 def readlines(self, sizehint=0):
518 def readlines(self, sizehint=0):
516 total = 0
519 total = 0
517 list = []
520 list = []
518 while True:
521 while True:
519 line = self.readline()
522 line = self.readline()
520 if not line:
523 if not line:
521 break
524 break
522 list.append(line)
525 list.append(line)
523 total += len(line)
526 total += len(line)
524 if sizehint and total >= sizehint:
527 if sizehint and total >= sizehint:
525 break
528 break
526 return list
529 return list
527
530
528 def readinto(self, dest):
531 def readinto(self, dest):
529 if self._raw_readinto is None:
532 if self._raw_readinto is None:
530 res = self.read(len(dest))
533 res = self.read(len(dest))
531 if not res:
534 if not res:
532 return 0
535 return 0
533 dest[0:len(res)] = res
536 dest[0:len(res)] = res
534 return len(res)
537 return len(res)
535 total = len(dest)
538 total = len(dest)
536 have = len(self._rbuf)
539 have = len(self._rbuf)
537 if have >= total:
540 if have >= total:
538 dest[0:total] = self._rbuf[:total]
541 dest[0:total] = self._rbuf[:total]
539 self._rbuf = self._rbuf[total:]
542 self._rbuf = self._rbuf[total:]
540 return total
543 return total
541 mv = memoryview(dest)
544 mv = memoryview(dest)
542 got = self._raw_readinto(mv[have:total])
545 got = self._raw_readinto(mv[have:total])
543 dest[0:have] = self._rbuf
546 dest[0:have] = self._rbuf
544 got += len(self._rbuf)
547 got += len(self._rbuf)
545 self._rbuf = ''
548 self._rbuf = ''
546 return got
549 return got
547
550
548 def safesend(self, str):
551 def safesend(self, str):
549 """Send `str' to the server.
552 """Send `str' to the server.
550
553
551 Shamelessly ripped off from httplib to patch a bad behavior.
554 Shamelessly ripped off from httplib to patch a bad behavior.
552 """
555 """
553 # _broken_pipe_resp is an attribute we set in this function
556 # _broken_pipe_resp is an attribute we set in this function
554 # if the socket is closed while we're sending data but
557 # if the socket is closed while we're sending data but
555 # the server sent us a response before hanging up.
558 # the server sent us a response before hanging up.
556 # In that case, we want to pretend to send the rest of the
559 # In that case, we want to pretend to send the rest of the
557 # outgoing data, and then let the user use getresponse()
560 # outgoing data, and then let the user use getresponse()
558 # (which we wrap) to get this last response before
561 # (which we wrap) to get this last response before
559 # opening a new socket.
562 # opening a new socket.
560 if getattr(self, '_broken_pipe_resp', None) is not None:
563 if getattr(self, '_broken_pipe_resp', None) is not None:
561 return
564 return
562
565
563 if self.sock is None:
566 if self.sock is None:
564 if self.auto_open:
567 if self.auto_open:
565 self.connect()
568 self.connect()
566 else:
569 else:
567 raise httplib.NotConnected
570 raise httplib.NotConnected
568
571
569 # send the data to the server. if we get a broken pipe, then close
572 # send the data to the server. if we get a broken pipe, then close
570 # the socket. we want to reconnect when somebody tries to send again.
573 # the socket. we want to reconnect when somebody tries to send again.
571 #
574 #
572 # NOTE: we DO propagate the error, though, because we cannot simply
575 # NOTE: we DO propagate the error, though, because we cannot simply
573 # ignore the error... the caller will know if they can retry.
576 # ignore the error... the caller will know if they can retry.
574 if self.debuglevel > 0:
577 if self.debuglevel > 0:
575 print("send:", repr(str))
578 print("send:", repr(str))
576 try:
579 try:
577 blocksize = 8192
580 blocksize = 8192
578 read = getattr(str, 'read', None)
581 read = getattr(str, 'read', None)
579 if read is not None:
582 if read is not None:
580 if self.debuglevel > 0:
583 if self.debuglevel > 0:
581 print("sending a read()able")
584 print("sending a read()able")
582 data = read(blocksize)
585 data = read(blocksize)
583 while data:
586 while data:
584 self.sock.sendall(data)
587 self.sock.sendall(data)
585 data = read(blocksize)
588 data = read(blocksize)
586 else:
589 else:
587 self.sock.sendall(str)
590 self.sock.sendall(str)
588 except socket.error as v:
591 except socket.error as v:
589 reraise = True
592 reraise = True
590 if v[0] == errno.EPIPE: # Broken pipe
593 if v[0] == errno.EPIPE: # Broken pipe
591 if self._HTTPConnection__state == httplib._CS_REQ_SENT:
594 if self._HTTPConnection__state == httplib._CS_REQ_SENT:
592 self._broken_pipe_resp = None
595 self._broken_pipe_resp = None
593 self._broken_pipe_resp = self.getresponse()
596 self._broken_pipe_resp = self.getresponse()
594 reraise = False
597 reraise = False
595 self.close()
598 self.close()
596 if reraise:
599 if reraise:
597 raise
600 raise
598
601
599 def wrapgetresponse(cls):
602 def wrapgetresponse(cls):
600 """Wraps getresponse in cls with a broken-pipe sane version.
603 """Wraps getresponse in cls with a broken-pipe sane version.
601 """
604 """
602 def safegetresponse(self):
605 def safegetresponse(self):
603 # In safesend() we might set the _broken_pipe_resp
606 # In safesend() we might set the _broken_pipe_resp
604 # attribute, in which case the socket has already
607 # attribute, in which case the socket has already
605 # been closed and we just need to give them the response
608 # been closed and we just need to give them the response
606 # back. Otherwise, we use the normal response path.
609 # back. Otherwise, we use the normal response path.
607 r = getattr(self, '_broken_pipe_resp', None)
610 r = getattr(self, '_broken_pipe_resp', None)
608 if r is not None:
611 if r is not None:
609 return r
612 return r
610 return cls.getresponse(self)
613 return cls.getresponse(self)
611 safegetresponse.__doc__ = cls.getresponse.__doc__
614 safegetresponse.__doc__ = cls.getresponse.__doc__
612 return safegetresponse
615 return safegetresponse
613
616
614 class HTTPConnection(httplib.HTTPConnection):
617 class HTTPConnection(httplib.HTTPConnection):
615 # use the modified response class
618 # use the modified response class
616 response_class = HTTPResponse
619 response_class = HTTPResponse
617 send = safesend
620 send = safesend
618 getresponse = wrapgetresponse(httplib.HTTPConnection)
621 getresponse = wrapgetresponse(httplib.HTTPConnection)
619
622
620
623
621 #########################################################################
624 #########################################################################
622 ##### TEST FUNCTIONS
625 ##### TEST FUNCTIONS
623 #########################################################################
626 #########################################################################
624
627
625
628
626 def continuity(url):
629 def continuity(url):
627 md5 = hashlib.md5
630 md5 = hashlib.md5
628 format = '%25s: %s'
631 format = '%25s: %s'
629
632
630 # first fetch the file with the normal http handler
633 # first fetch the file with the normal http handler
631 opener = urlreq.buildopener()
634 opener = urlreq.buildopener()
632 urlreq.installopener(opener)
635 urlreq.installopener(opener)
633 fo = urlreq.urlopen(url)
636 fo = urlreq.urlopen(url)
634 foo = fo.read()
637 foo = fo.read()
635 fo.close()
638 fo.close()
636 m = md5(foo)
639 m = md5(foo)
637 print(format % ('normal urllib', node.hex(m.digest())))
640 print(format % ('normal urllib', node.hex(m.digest())))
638
641
639 # now install the keepalive handler and try again
642 # now install the keepalive handler and try again
640 opener = urlreq.buildopener(HTTPHandler())
643 opener = urlreq.buildopener(HTTPHandler())
641 urlreq.installopener(opener)
644 urlreq.installopener(opener)
642
645
643 fo = urlreq.urlopen(url)
646 fo = urlreq.urlopen(url)
644 foo = fo.read()
647 foo = fo.read()
645 fo.close()
648 fo.close()
646 m = md5(foo)
649 m = md5(foo)
647 print(format % ('keepalive read', node.hex(m.digest())))
650 print(format % ('keepalive read', node.hex(m.digest())))
648
651
649 fo = urlreq.urlopen(url)
652 fo = urlreq.urlopen(url)
650 foo = ''
653 foo = ''
651 while True:
654 while True:
652 f = fo.readline()
655 f = fo.readline()
653 if f:
656 if f:
654 foo = foo + f
657 foo = foo + f
655 else:
658 else:
656 break
659 break
657 fo.close()
660 fo.close()
658 m = md5(foo)
661 m = md5(foo)
659 print(format % ('keepalive readline', node.hex(m.digest())))
662 print(format % ('keepalive readline', node.hex(m.digest())))
660
663
661 def comp(N, url):
664 def comp(N, url):
662 print(' making %i connections to:\n %s' % (N, url))
665 print(' making %i connections to:\n %s' % (N, url))
663
666
664 procutil.stdout.write(' first using the normal urllib handlers')
667 procutil.stdout.write(' first using the normal urllib handlers')
665 # first use normal opener
668 # first use normal opener
666 opener = urlreq.buildopener()
669 opener = urlreq.buildopener()
667 urlreq.installopener(opener)
670 urlreq.installopener(opener)
668 t1 = fetch(N, url)
671 t1 = fetch(N, url)
669 print(' TIME: %.3f s' % t1)
672 print(' TIME: %.3f s' % t1)
670
673
671 procutil.stdout.write(' now using the keepalive handler ')
674 procutil.stdout.write(' now using the keepalive handler ')
672 # now install the keepalive handler and try again
675 # now install the keepalive handler and try again
673 opener = urlreq.buildopener(HTTPHandler())
676 opener = urlreq.buildopener(HTTPHandler())
674 urlreq.installopener(opener)
677 urlreq.installopener(opener)
675 t2 = fetch(N, url)
678 t2 = fetch(N, url)
676 print(' TIME: %.3f s' % t2)
679 print(' TIME: %.3f s' % t2)
677 print(' improvement factor: %.2f' % (t1 / t2))
680 print(' improvement factor: %.2f' % (t1 / t2))
678
681
679 def fetch(N, url, delay=0):
682 def fetch(N, url, delay=0):
680 import time
683 import time
681 lens = []
684 lens = []
682 starttime = time.time()
685 starttime = time.time()
683 for i in range(N):
686 for i in range(N):
684 if delay and i > 0:
687 if delay and i > 0:
685 time.sleep(delay)
688 time.sleep(delay)
686 fo = urlreq.urlopen(url)
689 fo = urlreq.urlopen(url)
687 foo = fo.read()
690 foo = fo.read()
688 fo.close()
691 fo.close()
689 lens.append(len(foo))
692 lens.append(len(foo))
690 diff = time.time() - starttime
693 diff = time.time() - starttime
691
694
692 j = 0
695 j = 0
693 for i in lens[1:]:
696 for i in lens[1:]:
694 j = j + 1
697 j = j + 1
695 if not i == lens[0]:
698 if not i == lens[0]:
696 print("WARNING: inconsistent length on read %i: %i" % (j, i))
699 print("WARNING: inconsistent length on read %i: %i" % (j, i))
697
700
698 return diff
701 return diff
699
702
700 def test_timeout(url):
703 def test_timeout(url):
701 global DEBUG
704 global DEBUG
702 dbbackup = DEBUG
705 dbbackup = DEBUG
703 class FakeLogger(object):
706 class FakeLogger(object):
704 def debug(self, msg, *args):
707 def debug(self, msg, *args):
705 print(msg % args)
708 print(msg % args)
706 info = warning = error = debug
709 info = warning = error = debug
707 DEBUG = FakeLogger()
710 DEBUG = FakeLogger()
708 print(" fetching the file to establish a connection")
711 print(" fetching the file to establish a connection")
709 fo = urlreq.urlopen(url)
712 fo = urlreq.urlopen(url)
710 data1 = fo.read()
713 data1 = fo.read()
711 fo.close()
714 fo.close()
712
715
713 i = 20
716 i = 20
714 print(" waiting %i seconds for the server to close the connection" % i)
717 print(" waiting %i seconds for the server to close the connection" % i)
715 while i > 0:
718 while i > 0:
716 procutil.stdout.write('\r %2i' % i)
719 procutil.stdout.write('\r %2i' % i)
717 procutil.stdout.flush()
720 procutil.stdout.flush()
718 time.sleep(1)
721 time.sleep(1)
719 i -= 1
722 i -= 1
720 procutil.stderr.write('\r')
723 procutil.stderr.write('\r')
721
724
722 print(" fetching the file a second time")
725 print(" fetching the file a second time")
723 fo = urlreq.urlopen(url)
726 fo = urlreq.urlopen(url)
724 data2 = fo.read()
727 data2 = fo.read()
725 fo.close()
728 fo.close()
726
729
727 if data1 == data2:
730 if data1 == data2:
728 print(' data are identical')
731 print(' data are identical')
729 else:
732 else:
730 print(' ERROR: DATA DIFFER')
733 print(' ERROR: DATA DIFFER')
731
734
732 DEBUG = dbbackup
735 DEBUG = dbbackup
733
736
734
737
735 def test(url, N=10):
738 def test(url, N=10):
736 print("performing continuity test (making sure stuff isn't corrupted)")
739 print("performing continuity test (making sure stuff isn't corrupted)")
737 continuity(url)
740 continuity(url)
738 print('')
741 print('')
739 print("performing speed comparison")
742 print("performing speed comparison")
740 comp(N, url)
743 comp(N, url)
741 print('')
744 print('')
742 print("performing dropped-connection check")
745 print("performing dropped-connection check")
743 test_timeout(url)
746 test_timeout(url)
744
747
745 if __name__ == '__main__':
748 if __name__ == '__main__':
746 import time
749 import time
747 try:
750 try:
748 N = int(sys.argv[1])
751 N = int(sys.argv[1])
749 url = sys.argv[2]
752 url = sys.argv[2]
750 except (IndexError, ValueError):
753 except (IndexError, ValueError):
751 print("%s <integer> <url>" % sys.argv[0])
754 print("%s <integer> <url>" % sys.argv[0])
752 else:
755 else:
753 test(url, N)
756 test(url, N)
General Comments 0
You need to be logged in to leave comments. Login now