##// END OF EJS Templates
py3: handle keyword arguments correctly in keepalive.py...
Pulkit Goyal -
r35365:03112a2c default
parent child Browse files
Show More
@@ -1,726 +1,726
1 # This library is free software; you can redistribute it and/or
1 # This library is free software; you can redistribute it and/or
2 # modify it under the terms of the GNU Lesser General Public
2 # modify it under the terms of the GNU Lesser General Public
3 # License as published by the Free Software Foundation; either
3 # License as published by the Free Software Foundation; either
4 # version 2.1 of the License, or (at your option) any later version.
4 # version 2.1 of the License, or (at your option) any later version.
5 #
5 #
6 # This library is distributed in the hope that it will be useful,
6 # This library is distributed in the hope that it will be useful,
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9 # Lesser General Public License for more details.
9 # Lesser General Public License for more details.
10 #
10 #
11 # You should have received a copy of the GNU Lesser General Public
11 # You should have received a copy of the GNU Lesser General Public
12 # License along with this library; if not, see
12 # License along with this library; if not, see
13 # <http://www.gnu.org/licenses/>.
13 # <http://www.gnu.org/licenses/>.
14
14
15 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
15 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
16 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
16 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
17
17
18 # Modified by Benoit Boissinot:
18 # Modified by Benoit Boissinot:
19 # - fix for digest auth (inspired from urllib2.py @ Python v2.4)
19 # - fix for digest auth (inspired from urllib2.py @ Python v2.4)
20 # Modified by Dirkjan Ochtman:
20 # Modified by Dirkjan Ochtman:
21 # - import md5 function from a local util module
21 # - import md5 function from a local util module
22 # Modified by Augie Fackler:
22 # Modified by Augie Fackler:
23 # - add safesend method and use it to prevent broken pipe errors
23 # - add safesend method and use it to prevent broken pipe errors
24 # on large POST requests
24 # on large POST requests
25
25
26 """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
26 """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
27
27
28 >>> import urllib2
28 >>> import urllib2
29 >>> from keepalive import HTTPHandler
29 >>> from keepalive import HTTPHandler
30 >>> keepalive_handler = HTTPHandler()
30 >>> keepalive_handler = HTTPHandler()
31 >>> opener = urlreq.buildopener(keepalive_handler)
31 >>> opener = urlreq.buildopener(keepalive_handler)
32 >>> urlreq.installopener(opener)
32 >>> urlreq.installopener(opener)
33 >>>
33 >>>
34 >>> fo = urlreq.urlopen('http://www.python.org')
34 >>> fo = urlreq.urlopen('http://www.python.org')
35
35
36 If a connection to a given host is requested, and all of the existing
36 If a connection to a given host is requested, and all of the existing
37 connections are still in use, another connection will be opened. If
37 connections are still in use, another connection will be opened. If
38 the handler tries to use an existing connection but it fails in some
38 the handler tries to use an existing connection but it fails in some
39 way, it will be closed and removed from the pool.
39 way, it will be closed and removed from the pool.
40
40
41 To remove the handler, simply re-run build_opener with no arguments, and
41 To remove the handler, simply re-run build_opener with no arguments, and
42 install that opener.
42 install that opener.
43
43
44 You can explicitly close connections by using the close_connection()
44 You can explicitly close connections by using the close_connection()
45 method of the returned file-like object (described below) or you can
45 method of the returned file-like object (described below) or you can
46 use the handler methods:
46 use the handler methods:
47
47
48 close_connection(host)
48 close_connection(host)
49 close_all()
49 close_all()
50 open_connections()
50 open_connections()
51
51
52 NOTE: using the close_connection and close_all methods of the handler
52 NOTE: using the close_connection and close_all methods of the handler
53 should be done with care when using multiple threads.
53 should be done with care when using multiple threads.
54 * there is nothing that prevents another thread from creating new
54 * there is nothing that prevents another thread from creating new
55 connections immediately after connections are closed
55 connections immediately after connections are closed
56 * no checks are done to prevent in-use connections from being closed
56 * no checks are done to prevent in-use connections from being closed
57
57
58 >>> keepalive_handler.close_all()
58 >>> keepalive_handler.close_all()
59
59
60 EXTRA ATTRIBUTES AND METHODS
60 EXTRA ATTRIBUTES AND METHODS
61
61
62 Upon a status of 200, the object returned has a few additional
62 Upon a status of 200, the object returned has a few additional
63 attributes and methods, which should not be used if you want to
63 attributes and methods, which should not be used if you want to
64 remain consistent with the normal urllib2-returned objects:
64 remain consistent with the normal urllib2-returned objects:
65
65
66 close_connection() - close the connection to the host
66 close_connection() - close the connection to the host
67 readlines() - you know, readlines()
67 readlines() - you know, readlines()
68 status - the return status (i.e. 404)
68 status - the return status (i.e. 404)
69 reason - english translation of status (i.e. 'File not found')
69 reason - english translation of status (i.e. 'File not found')
70
70
71 If you want the best of both worlds, use this inside an
71 If you want the best of both worlds, use this inside an
72 AttributeError-catching try:
72 AttributeError-catching try:
73
73
74 >>> try: status = fo.status
74 >>> try: status = fo.status
75 >>> except AttributeError: status = None
75 >>> except AttributeError: status = None
76
76
77 Unfortunately, these are ONLY there if status == 200, so it's not
77 Unfortunately, these are ONLY there if status == 200, so it's not
78 easy to distinguish between non-200 responses. The reason is that
78 easy to distinguish between non-200 responses. The reason is that
79 urllib2 tries to do clever things with error codes 301, 302, 401,
79 urllib2 tries to do clever things with error codes 301, 302, 401,
80 and 407, and it wraps the object upon return.
80 and 407, and it wraps the object upon return.
81 """
81 """
82
82
83 # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $
83 # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $
84
84
85 from __future__ import absolute_import, print_function
85 from __future__ import absolute_import, print_function
86
86
87 import errno
87 import errno
88 import hashlib
88 import hashlib
89 import socket
89 import socket
90 import sys
90 import sys
91 import threading
91 import threading
92
92
93 from .i18n import _
93 from .i18n import _
94 from . import (
94 from . import (
95 pycompat,
95 pycompat,
96 urllibcompat,
96 urllibcompat,
97 util,
97 util,
98 )
98 )
99
99
100 httplib = util.httplib
100 httplib = util.httplib
101 urlerr = util.urlerr
101 urlerr = util.urlerr
102 urlreq = util.urlreq
102 urlreq = util.urlreq
103
103
104 DEBUG = None
104 DEBUG = None
105
105
106 class ConnectionManager(object):
106 class ConnectionManager(object):
107 """
107 """
108 The connection manager must be able to:
108 The connection manager must be able to:
109 * keep track of all existing
109 * keep track of all existing
110 """
110 """
111 def __init__(self):
111 def __init__(self):
112 self._lock = threading.Lock()
112 self._lock = threading.Lock()
113 self._hostmap = {} # map hosts to a list of connections
113 self._hostmap = {} # map hosts to a list of connections
114 self._connmap = {} # map connections to host
114 self._connmap = {} # map connections to host
115 self._readymap = {} # map connection to ready state
115 self._readymap = {} # map connection to ready state
116
116
117 def add(self, host, connection, ready):
117 def add(self, host, connection, ready):
118 self._lock.acquire()
118 self._lock.acquire()
119 try:
119 try:
120 if host not in self._hostmap:
120 if host not in self._hostmap:
121 self._hostmap[host] = []
121 self._hostmap[host] = []
122 self._hostmap[host].append(connection)
122 self._hostmap[host].append(connection)
123 self._connmap[connection] = host
123 self._connmap[connection] = host
124 self._readymap[connection] = ready
124 self._readymap[connection] = ready
125 finally:
125 finally:
126 self._lock.release()
126 self._lock.release()
127
127
128 def remove(self, connection):
128 def remove(self, connection):
129 self._lock.acquire()
129 self._lock.acquire()
130 try:
130 try:
131 try:
131 try:
132 host = self._connmap[connection]
132 host = self._connmap[connection]
133 except KeyError:
133 except KeyError:
134 pass
134 pass
135 else:
135 else:
136 del self._connmap[connection]
136 del self._connmap[connection]
137 del self._readymap[connection]
137 del self._readymap[connection]
138 self._hostmap[host].remove(connection)
138 self._hostmap[host].remove(connection)
139 if not self._hostmap[host]:
139 if not self._hostmap[host]:
140 del self._hostmap[host]
140 del self._hostmap[host]
141 finally:
141 finally:
142 self._lock.release()
142 self._lock.release()
143
143
144 def set_ready(self, connection, ready):
144 def set_ready(self, connection, ready):
145 try:
145 try:
146 self._readymap[connection] = ready
146 self._readymap[connection] = ready
147 except KeyError:
147 except KeyError:
148 pass
148 pass
149
149
150 def get_ready_conn(self, host):
150 def get_ready_conn(self, host):
151 conn = None
151 conn = None
152 self._lock.acquire()
152 self._lock.acquire()
153 try:
153 try:
154 if host in self._hostmap:
154 if host in self._hostmap:
155 for c in self._hostmap[host]:
155 for c in self._hostmap[host]:
156 if self._readymap[c]:
156 if self._readymap[c]:
157 self._readymap[c] = 0
157 self._readymap[c] = 0
158 conn = c
158 conn = c
159 break
159 break
160 finally:
160 finally:
161 self._lock.release()
161 self._lock.release()
162 return conn
162 return conn
163
163
164 def get_all(self, host=None):
164 def get_all(self, host=None):
165 if host:
165 if host:
166 return list(self._hostmap.get(host, []))
166 return list(self._hostmap.get(host, []))
167 else:
167 else:
168 return dict(self._hostmap)
168 return dict(self._hostmap)
169
169
170 class KeepAliveHandler(object):
170 class KeepAliveHandler(object):
171 def __init__(self):
171 def __init__(self):
172 self._cm = ConnectionManager()
172 self._cm = ConnectionManager()
173
173
174 #### Connection Management
174 #### Connection Management
175 def open_connections(self):
175 def open_connections(self):
176 """return a list of connected hosts and the number of connections
176 """return a list of connected hosts and the number of connections
177 to each. [('foo.com:80', 2), ('bar.org', 1)]"""
177 to each. [('foo.com:80', 2), ('bar.org', 1)]"""
178 return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
178 return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
179
179
180 def close_connection(self, host):
180 def close_connection(self, host):
181 """close connection(s) to <host>
181 """close connection(s) to <host>
182 host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
182 host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
183 no error occurs if there is no connection to that host."""
183 no error occurs if there is no connection to that host."""
184 for h in self._cm.get_all(host):
184 for h in self._cm.get_all(host):
185 self._cm.remove(h)
185 self._cm.remove(h)
186 h.close()
186 h.close()
187
187
188 def close_all(self):
188 def close_all(self):
189 """close all open connections"""
189 """close all open connections"""
190 for host, conns in self._cm.get_all().iteritems():
190 for host, conns in self._cm.get_all().iteritems():
191 for h in conns:
191 for h in conns:
192 self._cm.remove(h)
192 self._cm.remove(h)
193 h.close()
193 h.close()
194
194
195 def _request_closed(self, request, host, connection):
195 def _request_closed(self, request, host, connection):
196 """tells us that this request is now closed and that the
196 """tells us that this request is now closed and that the
197 connection is ready for another request"""
197 connection is ready for another request"""
198 self._cm.set_ready(connection, 1)
198 self._cm.set_ready(connection, 1)
199
199
200 def _remove_connection(self, host, connection, close=0):
200 def _remove_connection(self, host, connection, close=0):
201 if close:
201 if close:
202 connection.close()
202 connection.close()
203 self._cm.remove(connection)
203 self._cm.remove(connection)
204
204
205 #### Transaction Execution
205 #### Transaction Execution
206 def http_open(self, req):
206 def http_open(self, req):
207 return self.do_open(HTTPConnection, req)
207 return self.do_open(HTTPConnection, req)
208
208
209 def do_open(self, http_class, req):
209 def do_open(self, http_class, req):
210 host = urllibcompat.gethost(req)
210 host = urllibcompat.gethost(req)
211 if not host:
211 if not host:
212 raise urlerr.urlerror('no host given')
212 raise urlerr.urlerror('no host given')
213
213
214 try:
214 try:
215 h = self._cm.get_ready_conn(host)
215 h = self._cm.get_ready_conn(host)
216 while h:
216 while h:
217 r = self._reuse_connection(h, req, host)
217 r = self._reuse_connection(h, req, host)
218
218
219 # if this response is non-None, then it worked and we're
219 # if this response is non-None, then it worked and we're
220 # done. Break out, skipping the else block.
220 # done. Break out, skipping the else block.
221 if r:
221 if r:
222 break
222 break
223
223
224 # connection is bad - possibly closed by server
224 # connection is bad - possibly closed by server
225 # discard it and ask for the next free connection
225 # discard it and ask for the next free connection
226 h.close()
226 h.close()
227 self._cm.remove(h)
227 self._cm.remove(h)
228 h = self._cm.get_ready_conn(host)
228 h = self._cm.get_ready_conn(host)
229 else:
229 else:
230 # no (working) free connections were found. Create a new one.
230 # no (working) free connections were found. Create a new one.
231 h = http_class(host)
231 h = http_class(host)
232 if DEBUG:
232 if DEBUG:
233 DEBUG.info("creating new connection to %s (%d)",
233 DEBUG.info("creating new connection to %s (%d)",
234 host, id(h))
234 host, id(h))
235 self._cm.add(host, h, 0)
235 self._cm.add(host, h, 0)
236 self._start_transaction(h, req)
236 self._start_transaction(h, req)
237 r = h.getresponse()
237 r = h.getresponse()
238 # The string form of BadStatusLine is the status line. Add some context
238 # The string form of BadStatusLine is the status line. Add some context
239 # to make the error message slightly more useful.
239 # to make the error message slightly more useful.
240 except httplib.BadStatusLine as err:
240 except httplib.BadStatusLine as err:
241 raise urlerr.urlerror(
241 raise urlerr.urlerror(
242 _('bad HTTP status line: %s') % pycompat.sysbytes(err.line))
242 _('bad HTTP status line: %s') % pycompat.sysbytes(err.line))
243 except (socket.error, httplib.HTTPException) as err:
243 except (socket.error, httplib.HTTPException) as err:
244 raise urlerr.urlerror(err)
244 raise urlerr.urlerror(err)
245
245
246 # if not a persistent connection, don't try to reuse it
246 # if not a persistent connection, don't try to reuse it
247 if r.will_close:
247 if r.will_close:
248 self._cm.remove(h)
248 self._cm.remove(h)
249
249
250 if DEBUG:
250 if DEBUG:
251 DEBUG.info("STATUS: %s, %s", r.status, r.reason)
251 DEBUG.info("STATUS: %s, %s", r.status, r.reason)
252 r._handler = self
252 r._handler = self
253 r._host = host
253 r._host = host
254 r._url = req.get_full_url()
254 r._url = req.get_full_url()
255 r._connection = h
255 r._connection = h
256 r.code = r.status
256 r.code = r.status
257 r.headers = r.msg
257 r.headers = r.msg
258 r.msg = r.reason
258 r.msg = r.reason
259
259
260 return r
260 return r
261
261
262 def _reuse_connection(self, h, req, host):
262 def _reuse_connection(self, h, req, host):
263 """start the transaction with a re-used connection
263 """start the transaction with a re-used connection
264 return a response object (r) upon success or None on failure.
264 return a response object (r) upon success or None on failure.
265 This DOES not close or remove bad connections in cases where
265 This DOES not close or remove bad connections in cases where
266 it returns. However, if an unexpected exception occurs, it
266 it returns. However, if an unexpected exception occurs, it
267 will close and remove the connection before re-raising.
267 will close and remove the connection before re-raising.
268 """
268 """
269 try:
269 try:
270 self._start_transaction(h, req)
270 self._start_transaction(h, req)
271 r = h.getresponse()
271 r = h.getresponse()
272 # note: just because we got something back doesn't mean it
272 # note: just because we got something back doesn't mean it
273 # worked. We'll check the version below, too.
273 # worked. We'll check the version below, too.
274 except (socket.error, httplib.HTTPException):
274 except (socket.error, httplib.HTTPException):
275 r = None
275 r = None
276 except: # re-raises
276 except: # re-raises
277 # adding this block just in case we've missed
277 # adding this block just in case we've missed
278 # something we will still raise the exception, but
278 # something we will still raise the exception, but
279 # lets try and close the connection and remove it
279 # lets try and close the connection and remove it
280 # first. We previously got into a nasty loop
280 # first. We previously got into a nasty loop
281 # where an exception was uncaught, and so the
281 # where an exception was uncaught, and so the
282 # connection stayed open. On the next try, the
282 # connection stayed open. On the next try, the
283 # same exception was raised, etc. The trade-off is
283 # same exception was raised, etc. The trade-off is
284 # that it's now possible this call will raise
284 # that it's now possible this call will raise
285 # a DIFFERENT exception
285 # a DIFFERENT exception
286 if DEBUG:
286 if DEBUG:
287 DEBUG.error("unexpected exception - closing "
287 DEBUG.error("unexpected exception - closing "
288 "connection to %s (%d)", host, id(h))
288 "connection to %s (%d)", host, id(h))
289 self._cm.remove(h)
289 self._cm.remove(h)
290 h.close()
290 h.close()
291 raise
291 raise
292
292
293 if r is None or r.version == 9:
293 if r is None or r.version == 9:
294 # httplib falls back to assuming HTTP 0.9 if it gets a
294 # httplib falls back to assuming HTTP 0.9 if it gets a
295 # bad header back. This is most likely to happen if
295 # bad header back. This is most likely to happen if
296 # the socket has been closed by the server since we
296 # the socket has been closed by the server since we
297 # last used the connection.
297 # last used the connection.
298 if DEBUG:
298 if DEBUG:
299 DEBUG.info("failed to re-use connection to %s (%d)",
299 DEBUG.info("failed to re-use connection to %s (%d)",
300 host, id(h))
300 host, id(h))
301 r = None
301 r = None
302 else:
302 else:
303 if DEBUG:
303 if DEBUG:
304 DEBUG.info("re-using connection to %s (%d)", host, id(h))
304 DEBUG.info("re-using connection to %s (%d)", host, id(h))
305
305
306 return r
306 return r
307
307
308 def _start_transaction(self, h, req):
308 def _start_transaction(self, h, req):
309 # What follows mostly reimplements HTTPConnection.request()
309 # What follows mostly reimplements HTTPConnection.request()
310 # except it adds self.parent.addheaders in the mix and sends headers
310 # except it adds self.parent.addheaders in the mix and sends headers
311 # in a deterministic order (to make testing easier).
311 # in a deterministic order (to make testing easier).
312 headers = util.sortdict(self.parent.addheaders)
312 headers = util.sortdict(self.parent.addheaders)
313 headers.update(sorted(req.headers.items()))
313 headers.update(sorted(req.headers.items()))
314 headers.update(sorted(req.unredirected_hdrs.items()))
314 headers.update(sorted(req.unredirected_hdrs.items()))
315 headers = util.sortdict((n.lower(), v) for n, v in headers.items())
315 headers = util.sortdict((n.lower(), v) for n, v in headers.items())
316 skipheaders = {}
316 skipheaders = {}
317 for n in ('host', 'accept-encoding'):
317 for n in ('host', 'accept-encoding'):
318 if n in headers:
318 if n in headers:
319 skipheaders['skip_' + n.replace('-', '_')] = 1
319 skipheaders['skip_' + n.replace('-', '_')] = 1
320 try:
320 try:
321 if urllibcompat.hasdata(req):
321 if urllibcompat.hasdata(req):
322 data = urllibcompat.getdata(req)
322 data = urllibcompat.getdata(req)
323 h.putrequest(
323 h.putrequest(
324 req.get_method(), urllibcompat.getselector(req),
324 req.get_method(), urllibcompat.getselector(req),
325 **skipheaders)
325 **pycompat.strkwargs(skipheaders))
326 if 'content-type' not in headers:
326 if 'content-type' not in headers:
327 h.putheader('Content-type',
327 h.putheader('Content-type',
328 'application/x-www-form-urlencoded')
328 'application/x-www-form-urlencoded')
329 if 'content-length' not in headers:
329 if 'content-length' not in headers:
330 h.putheader('Content-length', '%d' % len(data))
330 h.putheader('Content-length', '%d' % len(data))
331 else:
331 else:
332 h.putrequest(
332 h.putrequest(
333 req.get_method(), urllibcompat.getselector(req),
333 req.get_method(), urllibcompat.getselector(req),
334 **skipheaders)
334 **pycompat.strkwargs(skipheaders))
335 except socket.error as err:
335 except socket.error as err:
336 raise urlerr.urlerror(err)
336 raise urlerr.urlerror(err)
337 for k, v in headers.items():
337 for k, v in headers.items():
338 h.putheader(k, v)
338 h.putheader(k, v)
339 h.endheaders()
339 h.endheaders()
340 if urllibcompat.hasdata(req):
340 if urllibcompat.hasdata(req):
341 h.send(data)
341 h.send(data)
342
342
343 class HTTPHandler(KeepAliveHandler, urlreq.httphandler):
343 class HTTPHandler(KeepAliveHandler, urlreq.httphandler):
344 pass
344 pass
345
345
346 class HTTPResponse(httplib.HTTPResponse):
346 class HTTPResponse(httplib.HTTPResponse):
347 # we need to subclass HTTPResponse in order to
347 # we need to subclass HTTPResponse in order to
348 # 1) add readline() and readlines() methods
348 # 1) add readline() and readlines() methods
349 # 2) add close_connection() methods
349 # 2) add close_connection() methods
350 # 3) add info() and geturl() methods
350 # 3) add info() and geturl() methods
351
351
352 # in order to add readline(), read must be modified to deal with a
352 # in order to add readline(), read must be modified to deal with a
353 # buffer. example: readline must read a buffer and then spit back
353 # buffer. example: readline must read a buffer and then spit back
354 # one line at a time. The only real alternative is to read one
354 # one line at a time. The only real alternative is to read one
355 # BYTE at a time (ick). Once something has been read, it can't be
355 # BYTE at a time (ick). Once something has been read, it can't be
356 # put back (ok, maybe it can, but that's even uglier than this),
356 # put back (ok, maybe it can, but that's even uglier than this),
357 # so if you THEN do a normal read, you must first take stuff from
357 # so if you THEN do a normal read, you must first take stuff from
358 # the buffer.
358 # the buffer.
359
359
360 # the read method wraps the original to accommodate buffering,
360 # the read method wraps the original to accommodate buffering,
361 # although read() never adds to the buffer.
361 # although read() never adds to the buffer.
362 # Both readline and readlines have been stolen with almost no
362 # Both readline and readlines have been stolen with almost no
363 # modification from socket.py
363 # modification from socket.py
364
364
365
365
366 def __init__(self, sock, debuglevel=0, strict=0, method=None):
366 def __init__(self, sock, debuglevel=0, strict=0, method=None):
367 extrakw = {}
367 extrakw = {}
368 if not pycompat.ispy3:
368 if not pycompat.ispy3:
369 extrakw['strict'] = True
369 extrakw[r'strict'] = True
370 extrakw['buffering'] = True
370 extrakw[r'buffering'] = True
371 httplib.HTTPResponse.__init__(self, sock, debuglevel=debuglevel,
371 httplib.HTTPResponse.__init__(self, sock, debuglevel=debuglevel,
372 method=method, **extrakw)
372 method=method, **extrakw)
373 self.fileno = sock.fileno
373 self.fileno = sock.fileno
374 self.code = None
374 self.code = None
375 self._rbuf = ''
375 self._rbuf = ''
376 self._rbufsize = 8096
376 self._rbufsize = 8096
377 self._handler = None # inserted by the handler later
377 self._handler = None # inserted by the handler later
378 self._host = None # (same)
378 self._host = None # (same)
379 self._url = None # (same)
379 self._url = None # (same)
380 self._connection = None # (same)
380 self._connection = None # (same)
381
381
382 _raw_read = httplib.HTTPResponse.read
382 _raw_read = httplib.HTTPResponse.read
383
383
384 def close(self):
384 def close(self):
385 if self.fp:
385 if self.fp:
386 self.fp.close()
386 self.fp.close()
387 self.fp = None
387 self.fp = None
388 if self._handler:
388 if self._handler:
389 self._handler._request_closed(self, self._host,
389 self._handler._request_closed(self, self._host,
390 self._connection)
390 self._connection)
391
391
392 def close_connection(self):
392 def close_connection(self):
393 self._handler._remove_connection(self._host, self._connection, close=1)
393 self._handler._remove_connection(self._host, self._connection, close=1)
394 self.close()
394 self.close()
395
395
396 def info(self):
396 def info(self):
397 return self.headers
397 return self.headers
398
398
399 def geturl(self):
399 def geturl(self):
400 return self._url
400 return self._url
401
401
402 def read(self, amt=None):
402 def read(self, amt=None):
403 # the _rbuf test is only in this first if for speed. It's not
403 # the _rbuf test is only in this first if for speed. It's not
404 # logically necessary
404 # logically necessary
405 if self._rbuf and amt is not None:
405 if self._rbuf and amt is not None:
406 L = len(self._rbuf)
406 L = len(self._rbuf)
407 if amt > L:
407 if amt > L:
408 amt -= L
408 amt -= L
409 else:
409 else:
410 s = self._rbuf[:amt]
410 s = self._rbuf[:amt]
411 self._rbuf = self._rbuf[amt:]
411 self._rbuf = self._rbuf[amt:]
412 return s
412 return s
413
413
414 s = self._rbuf + self._raw_read(amt)
414 s = self._rbuf + self._raw_read(amt)
415 self._rbuf = ''
415 self._rbuf = ''
416 return s
416 return s
417
417
418 # stolen from Python SVN #68532 to fix issue1088
418 # stolen from Python SVN #68532 to fix issue1088
419 def _read_chunked(self, amt):
419 def _read_chunked(self, amt):
420 chunk_left = self.chunk_left
420 chunk_left = self.chunk_left
421 parts = []
421 parts = []
422
422
423 while True:
423 while True:
424 if chunk_left is None:
424 if chunk_left is None:
425 line = self.fp.readline()
425 line = self.fp.readline()
426 i = line.find(';')
426 i = line.find(';')
427 if i >= 0:
427 if i >= 0:
428 line = line[:i] # strip chunk-extensions
428 line = line[:i] # strip chunk-extensions
429 try:
429 try:
430 chunk_left = int(line, 16)
430 chunk_left = int(line, 16)
431 except ValueError:
431 except ValueError:
432 # close the connection as protocol synchronization is
432 # close the connection as protocol synchronization is
433 # probably lost
433 # probably lost
434 self.close()
434 self.close()
435 raise httplib.IncompleteRead(''.join(parts))
435 raise httplib.IncompleteRead(''.join(parts))
436 if chunk_left == 0:
436 if chunk_left == 0:
437 break
437 break
438 if amt is None:
438 if amt is None:
439 parts.append(self._safe_read(chunk_left))
439 parts.append(self._safe_read(chunk_left))
440 elif amt < chunk_left:
440 elif amt < chunk_left:
441 parts.append(self._safe_read(amt))
441 parts.append(self._safe_read(amt))
442 self.chunk_left = chunk_left - amt
442 self.chunk_left = chunk_left - amt
443 return ''.join(parts)
443 return ''.join(parts)
444 elif amt == chunk_left:
444 elif amt == chunk_left:
445 parts.append(self._safe_read(amt))
445 parts.append(self._safe_read(amt))
446 self._safe_read(2) # toss the CRLF at the end of the chunk
446 self._safe_read(2) # toss the CRLF at the end of the chunk
447 self.chunk_left = None
447 self.chunk_left = None
448 return ''.join(parts)
448 return ''.join(parts)
449 else:
449 else:
450 parts.append(self._safe_read(chunk_left))
450 parts.append(self._safe_read(chunk_left))
451 amt -= chunk_left
451 amt -= chunk_left
452
452
453 # we read the whole chunk, get another
453 # we read the whole chunk, get another
454 self._safe_read(2) # toss the CRLF at the end of the chunk
454 self._safe_read(2) # toss the CRLF at the end of the chunk
455 chunk_left = None
455 chunk_left = None
456
456
457 # read and discard trailer up to the CRLF terminator
457 # read and discard trailer up to the CRLF terminator
458 ### note: we shouldn't have any trailers!
458 ### note: we shouldn't have any trailers!
459 while True:
459 while True:
460 line = self.fp.readline()
460 line = self.fp.readline()
461 if not line:
461 if not line:
462 # a vanishingly small number of sites EOF without
462 # a vanishingly small number of sites EOF without
463 # sending the trailer
463 # sending the trailer
464 break
464 break
465 if line == '\r\n':
465 if line == '\r\n':
466 break
466 break
467
467
468 # we read everything; close the "file"
468 # we read everything; close the "file"
469 self.close()
469 self.close()
470
470
471 return ''.join(parts)
471 return ''.join(parts)
472
472
473 def readline(self):
473 def readline(self):
474 # Fast path for a line is already available in read buffer.
474 # Fast path for a line is already available in read buffer.
475 i = self._rbuf.find('\n')
475 i = self._rbuf.find('\n')
476 if i >= 0:
476 if i >= 0:
477 i += 1
477 i += 1
478 line = self._rbuf[:i]
478 line = self._rbuf[:i]
479 self._rbuf = self._rbuf[i:]
479 self._rbuf = self._rbuf[i:]
480 return line
480 return line
481
481
482 # No newline in local buffer. Read until we find one.
482 # No newline in local buffer. Read until we find one.
483 chunks = [self._rbuf]
483 chunks = [self._rbuf]
484 i = -1
484 i = -1
485 readsize = self._rbufsize
485 readsize = self._rbufsize
486 while True:
486 while True:
487 new = self._raw_read(readsize)
487 new = self._raw_read(readsize)
488 if not new:
488 if not new:
489 break
489 break
490
490
491 chunks.append(new)
491 chunks.append(new)
492 i = new.find('\n')
492 i = new.find('\n')
493 if i >= 0:
493 if i >= 0:
494 break
494 break
495
495
496 # We either have exhausted the stream or have a newline in chunks[-1].
496 # We either have exhausted the stream or have a newline in chunks[-1].
497
497
498 # EOF
498 # EOF
499 if i == -1:
499 if i == -1:
500 self._rbuf = ''
500 self._rbuf = ''
501 return ''.join(chunks)
501 return ''.join(chunks)
502
502
503 i += 1
503 i += 1
504 self._rbuf = chunks[-1][i:]
504 self._rbuf = chunks[-1][i:]
505 chunks[-1] = chunks[-1][:i]
505 chunks[-1] = chunks[-1][:i]
506 return ''.join(chunks)
506 return ''.join(chunks)
507
507
508 def readlines(self, sizehint=0):
508 def readlines(self, sizehint=0):
509 total = 0
509 total = 0
510 list = []
510 list = []
511 while True:
511 while True:
512 line = self.readline()
512 line = self.readline()
513 if not line:
513 if not line:
514 break
514 break
515 list.append(line)
515 list.append(line)
516 total += len(line)
516 total += len(line)
517 if sizehint and total >= sizehint:
517 if sizehint and total >= sizehint:
518 break
518 break
519 return list
519 return list
520
520
521 def safesend(self, str):
521 def safesend(self, str):
522 """Send `str' to the server.
522 """Send `str' to the server.
523
523
524 Shamelessly ripped off from httplib to patch a bad behavior.
524 Shamelessly ripped off from httplib to patch a bad behavior.
525 """
525 """
526 # _broken_pipe_resp is an attribute we set in this function
526 # _broken_pipe_resp is an attribute we set in this function
527 # if the socket is closed while we're sending data but
527 # if the socket is closed while we're sending data but
528 # the server sent us a response before hanging up.
528 # the server sent us a response before hanging up.
529 # In that case, we want to pretend to send the rest of the
529 # In that case, we want to pretend to send the rest of the
530 # outgoing data, and then let the user use getresponse()
530 # outgoing data, and then let the user use getresponse()
531 # (which we wrap) to get this last response before
531 # (which we wrap) to get this last response before
532 # opening a new socket.
532 # opening a new socket.
533 if getattr(self, '_broken_pipe_resp', None) is not None:
533 if getattr(self, '_broken_pipe_resp', None) is not None:
534 return
534 return
535
535
536 if self.sock is None:
536 if self.sock is None:
537 if self.auto_open:
537 if self.auto_open:
538 self.connect()
538 self.connect()
539 else:
539 else:
540 raise httplib.NotConnected
540 raise httplib.NotConnected
541
541
542 # send the data to the server. if we get a broken pipe, then close
542 # send the data to the server. if we get a broken pipe, then close
543 # the socket. we want to reconnect when somebody tries to send again.
543 # the socket. we want to reconnect when somebody tries to send again.
544 #
544 #
545 # NOTE: we DO propagate the error, though, because we cannot simply
545 # NOTE: we DO propagate the error, though, because we cannot simply
546 # ignore the error... the caller will know if they can retry.
546 # ignore the error... the caller will know if they can retry.
547 if self.debuglevel > 0:
547 if self.debuglevel > 0:
548 print("send:", repr(str))
548 print("send:", repr(str))
549 try:
549 try:
550 blocksize = 8192
550 blocksize = 8192
551 read = getattr(str, 'read', None)
551 read = getattr(str, 'read', None)
552 if read is not None:
552 if read is not None:
553 if self.debuglevel > 0:
553 if self.debuglevel > 0:
554 print("sending a read()able")
554 print("sending a read()able")
555 data = read(blocksize)
555 data = read(blocksize)
556 while data:
556 while data:
557 self.sock.sendall(data)
557 self.sock.sendall(data)
558 data = read(blocksize)
558 data = read(blocksize)
559 else:
559 else:
560 self.sock.sendall(str)
560 self.sock.sendall(str)
561 except socket.error as v:
561 except socket.error as v:
562 reraise = True
562 reraise = True
563 if v[0] == errno.EPIPE: # Broken pipe
563 if v[0] == errno.EPIPE: # Broken pipe
564 if self._HTTPConnection__state == httplib._CS_REQ_SENT:
564 if self._HTTPConnection__state == httplib._CS_REQ_SENT:
565 self._broken_pipe_resp = None
565 self._broken_pipe_resp = None
566 self._broken_pipe_resp = self.getresponse()
566 self._broken_pipe_resp = self.getresponse()
567 reraise = False
567 reraise = False
568 self.close()
568 self.close()
569 if reraise:
569 if reraise:
570 raise
570 raise
571
571
572 def wrapgetresponse(cls):
572 def wrapgetresponse(cls):
573 """Wraps getresponse in cls with a broken-pipe sane version.
573 """Wraps getresponse in cls with a broken-pipe sane version.
574 """
574 """
575 def safegetresponse(self):
575 def safegetresponse(self):
576 # In safesend() we might set the _broken_pipe_resp
576 # In safesend() we might set the _broken_pipe_resp
577 # attribute, in which case the socket has already
577 # attribute, in which case the socket has already
578 # been closed and we just need to give them the response
578 # been closed and we just need to give them the response
579 # back. Otherwise, we use the normal response path.
579 # back. Otherwise, we use the normal response path.
580 r = getattr(self, '_broken_pipe_resp', None)
580 r = getattr(self, '_broken_pipe_resp', None)
581 if r is not None:
581 if r is not None:
582 return r
582 return r
583 return cls.getresponse(self)
583 return cls.getresponse(self)
584 safegetresponse.__doc__ = cls.getresponse.__doc__
584 safegetresponse.__doc__ = cls.getresponse.__doc__
585 return safegetresponse
585 return safegetresponse
586
586
587 class HTTPConnection(httplib.HTTPConnection):
587 class HTTPConnection(httplib.HTTPConnection):
588 # use the modified response class
588 # use the modified response class
589 response_class = HTTPResponse
589 response_class = HTTPResponse
590 send = safesend
590 send = safesend
591 getresponse = wrapgetresponse(httplib.HTTPConnection)
591 getresponse = wrapgetresponse(httplib.HTTPConnection)
592
592
593
593
594 #########################################################################
594 #########################################################################
595 ##### TEST FUNCTIONS
595 ##### TEST FUNCTIONS
596 #########################################################################
596 #########################################################################
597
597
598
598
599 def continuity(url):
599 def continuity(url):
600 md5 = hashlib.md5
600 md5 = hashlib.md5
601 format = '%25s: %s'
601 format = '%25s: %s'
602
602
603 # first fetch the file with the normal http handler
603 # first fetch the file with the normal http handler
604 opener = urlreq.buildopener()
604 opener = urlreq.buildopener()
605 urlreq.installopener(opener)
605 urlreq.installopener(opener)
606 fo = urlreq.urlopen(url)
606 fo = urlreq.urlopen(url)
607 foo = fo.read()
607 foo = fo.read()
608 fo.close()
608 fo.close()
609 m = md5(foo)
609 m = md5(foo)
610 print(format % ('normal urllib', m.hexdigest()))
610 print(format % ('normal urllib', m.hexdigest()))
611
611
612 # now install the keepalive handler and try again
612 # now install the keepalive handler and try again
613 opener = urlreq.buildopener(HTTPHandler())
613 opener = urlreq.buildopener(HTTPHandler())
614 urlreq.installopener(opener)
614 urlreq.installopener(opener)
615
615
616 fo = urlreq.urlopen(url)
616 fo = urlreq.urlopen(url)
617 foo = fo.read()
617 foo = fo.read()
618 fo.close()
618 fo.close()
619 m = md5(foo)
619 m = md5(foo)
620 print(format % ('keepalive read', m.hexdigest()))
620 print(format % ('keepalive read', m.hexdigest()))
621
621
622 fo = urlreq.urlopen(url)
622 fo = urlreq.urlopen(url)
623 foo = ''
623 foo = ''
624 while True:
624 while True:
625 f = fo.readline()
625 f = fo.readline()
626 if f:
626 if f:
627 foo = foo + f
627 foo = foo + f
628 else:
628 else:
629 break
629 break
630 fo.close()
630 fo.close()
631 m = md5(foo)
631 m = md5(foo)
632 print(format % ('keepalive readline', m.hexdigest()))
632 print(format % ('keepalive readline', m.hexdigest()))
633
633
634 def comp(N, url):
634 def comp(N, url):
635 print(' making %i connections to:\n %s' % (N, url))
635 print(' making %i connections to:\n %s' % (N, url))
636
636
637 util.stdout.write(' first using the normal urllib handlers')
637 util.stdout.write(' first using the normal urllib handlers')
638 # first use normal opener
638 # first use normal opener
639 opener = urlreq.buildopener()
639 opener = urlreq.buildopener()
640 urlreq.installopener(opener)
640 urlreq.installopener(opener)
641 t1 = fetch(N, url)
641 t1 = fetch(N, url)
642 print(' TIME: %.3f s' % t1)
642 print(' TIME: %.3f s' % t1)
643
643
644 util.stdout.write(' now using the keepalive handler ')
644 util.stdout.write(' now using the keepalive handler ')
645 # now install the keepalive handler and try again
645 # now install the keepalive handler and try again
646 opener = urlreq.buildopener(HTTPHandler())
646 opener = urlreq.buildopener(HTTPHandler())
647 urlreq.installopener(opener)
647 urlreq.installopener(opener)
648 t2 = fetch(N, url)
648 t2 = fetch(N, url)
649 print(' TIME: %.3f s' % t2)
649 print(' TIME: %.3f s' % t2)
650 print(' improvement factor: %.2f' % (t1 / t2))
650 print(' improvement factor: %.2f' % (t1 / t2))
651
651
652 def fetch(N, url, delay=0):
652 def fetch(N, url, delay=0):
653 import time
653 import time
654 lens = []
654 lens = []
655 starttime = time.time()
655 starttime = time.time()
656 for i in range(N):
656 for i in range(N):
657 if delay and i > 0:
657 if delay and i > 0:
658 time.sleep(delay)
658 time.sleep(delay)
659 fo = urlreq.urlopen(url)
659 fo = urlreq.urlopen(url)
660 foo = fo.read()
660 foo = fo.read()
661 fo.close()
661 fo.close()
662 lens.append(len(foo))
662 lens.append(len(foo))
663 diff = time.time() - starttime
663 diff = time.time() - starttime
664
664
665 j = 0
665 j = 0
666 for i in lens[1:]:
666 for i in lens[1:]:
667 j = j + 1
667 j = j + 1
668 if not i == lens[0]:
668 if not i == lens[0]:
669 print("WARNING: inconsistent length on read %i: %i" % (j, i))
669 print("WARNING: inconsistent length on read %i: %i" % (j, i))
670
670
671 return diff
671 return diff
672
672
673 def test_timeout(url):
673 def test_timeout(url):
674 global DEBUG
674 global DEBUG
675 dbbackup = DEBUG
675 dbbackup = DEBUG
676 class FakeLogger(object):
676 class FakeLogger(object):
677 def debug(self, msg, *args):
677 def debug(self, msg, *args):
678 print(msg % args)
678 print(msg % args)
679 info = warning = error = debug
679 info = warning = error = debug
680 DEBUG = FakeLogger()
680 DEBUG = FakeLogger()
681 print(" fetching the file to establish a connection")
681 print(" fetching the file to establish a connection")
682 fo = urlreq.urlopen(url)
682 fo = urlreq.urlopen(url)
683 data1 = fo.read()
683 data1 = fo.read()
684 fo.close()
684 fo.close()
685
685
686 i = 20
686 i = 20
687 print(" waiting %i seconds for the server to close the connection" % i)
687 print(" waiting %i seconds for the server to close the connection" % i)
688 while i > 0:
688 while i > 0:
689 util.stdout.write('\r %2i' % i)
689 util.stdout.write('\r %2i' % i)
690 util.stdout.flush()
690 util.stdout.flush()
691 time.sleep(1)
691 time.sleep(1)
692 i -= 1
692 i -= 1
693 util.stderr.write('\r')
693 util.stderr.write('\r')
694
694
695 print(" fetching the file a second time")
695 print(" fetching the file a second time")
696 fo = urlreq.urlopen(url)
696 fo = urlreq.urlopen(url)
697 data2 = fo.read()
697 data2 = fo.read()
698 fo.close()
698 fo.close()
699
699
700 if data1 == data2:
700 if data1 == data2:
701 print(' data are identical')
701 print(' data are identical')
702 else:
702 else:
703 print(' ERROR: DATA DIFFER')
703 print(' ERROR: DATA DIFFER')
704
704
705 DEBUG = dbbackup
705 DEBUG = dbbackup
706
706
707
707
708 def test(url, N=10):
708 def test(url, N=10):
709 print("performing continuity test (making sure stuff isn't corrupted)")
709 print("performing continuity test (making sure stuff isn't corrupted)")
710 continuity(url)
710 continuity(url)
711 print('')
711 print('')
712 print("performing speed comparison")
712 print("performing speed comparison")
713 comp(N, url)
713 comp(N, url)
714 print('')
714 print('')
715 print("performing dropped-connection check")
715 print("performing dropped-connection check")
716 test_timeout(url)
716 test_timeout(url)
717
717
718 if __name__ == '__main__':
718 if __name__ == '__main__':
719 import time
719 import time
720 try:
720 try:
721 N = int(sys.argv[1])
721 N = int(sys.argv[1])
722 url = sys.argv[2]
722 url = sys.argv[2]
723 except (IndexError, ValueError):
723 except (IndexError, ValueError):
724 print("%s <integer> <url>" % sys.argv[0])
724 print("%s <integer> <url>" % sys.argv[0])
725 else:
725 else:
726 test(url, N)
726 test(url, N)
General Comments 0
You need to be logged in to leave comments. Login now