##// END OF EJS Templates
keepalive: python 3 portability tweaks...
Augie Fackler -
r34428:a454123f default
parent child Browse files
Show More
@@ -1,716 +1,721 b''
1 # This library is free software; you can redistribute it and/or
1 # This library is free software; you can redistribute it and/or
2 # modify it under the terms of the GNU Lesser General Public
2 # modify it under the terms of the GNU Lesser General Public
3 # License as published by the Free Software Foundation; either
3 # License as published by the Free Software Foundation; either
4 # version 2.1 of the License, or (at your option) any later version.
4 # version 2.1 of the License, or (at your option) any later version.
5 #
5 #
6 # This library is distributed in the hope that it will be useful,
6 # This library is distributed in the hope that it will be useful,
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9 # Lesser General Public License for more details.
9 # Lesser General Public License for more details.
10 #
10 #
11 # You should have received a copy of the GNU Lesser General Public
11 # You should have received a copy of the GNU Lesser General Public
12 # License along with this library; if not, see
12 # License along with this library; if not, see
13 # <http://www.gnu.org/licenses/>.
13 # <http://www.gnu.org/licenses/>.
14
14
15 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
15 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
16 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
16 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
17
17
18 # Modified by Benoit Boissinot:
18 # Modified by Benoit Boissinot:
19 # - fix for digest auth (inspired from urllib2.py @ Python v2.4)
19 # - fix for digest auth (inspired from urllib2.py @ Python v2.4)
20 # Modified by Dirkjan Ochtman:
20 # Modified by Dirkjan Ochtman:
21 # - import md5 function from a local util module
21 # - import md5 function from a local util module
22 # Modified by Augie Fackler:
22 # Modified by Augie Fackler:
23 # - add safesend method and use it to prevent broken pipe errors
23 # - add safesend method and use it to prevent broken pipe errors
24 # on large POST requests
24 # on large POST requests
25
25
26 """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
26 """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
27
27
28 >>> import urllib2
28 >>> import urllib2
29 >>> from keepalive import HTTPHandler
29 >>> from keepalive import HTTPHandler
30 >>> keepalive_handler = HTTPHandler()
30 >>> keepalive_handler = HTTPHandler()
31 >>> opener = urlreq.buildopener(keepalive_handler)
31 >>> opener = urlreq.buildopener(keepalive_handler)
32 >>> urlreq.installopener(opener)
32 >>> urlreq.installopener(opener)
33 >>>
33 >>>
34 >>> fo = urlreq.urlopen('http://www.python.org')
34 >>> fo = urlreq.urlopen('http://www.python.org')
35
35
36 If a connection to a given host is requested, and all of the existing
36 If a connection to a given host is requested, and all of the existing
37 connections are still in use, another connection will be opened. If
37 connections are still in use, another connection will be opened. If
38 the handler tries to use an existing connection but it fails in some
38 the handler tries to use an existing connection but it fails in some
39 way, it will be closed and removed from the pool.
39 way, it will be closed and removed from the pool.
40
40
41 To remove the handler, simply re-run build_opener with no arguments, and
41 To remove the handler, simply re-run build_opener with no arguments, and
42 install that opener.
42 install that opener.
43
43
44 You can explicitly close connections by using the close_connection()
44 You can explicitly close connections by using the close_connection()
45 method of the returned file-like object (described below) or you can
45 method of the returned file-like object (described below) or you can
46 use the handler methods:
46 use the handler methods:
47
47
48 close_connection(host)
48 close_connection(host)
49 close_all()
49 close_all()
50 open_connections()
50 open_connections()
51
51
52 NOTE: using the close_connection and close_all methods of the handler
52 NOTE: using the close_connection and close_all methods of the handler
53 should be done with care when using multiple threads.
53 should be done with care when using multiple threads.
54 * there is nothing that prevents another thread from creating new
54 * there is nothing that prevents another thread from creating new
55 connections immediately after connections are closed
55 connections immediately after connections are closed
56 * no checks are done to prevent in-use connections from being closed
56 * no checks are done to prevent in-use connections from being closed
57
57
58 >>> keepalive_handler.close_all()
58 >>> keepalive_handler.close_all()
59
59
60 EXTRA ATTRIBUTES AND METHODS
60 EXTRA ATTRIBUTES AND METHODS
61
61
62 Upon a status of 200, the object returned has a few additional
62 Upon a status of 200, the object returned has a few additional
63 attributes and methods, which should not be used if you want to
63 attributes and methods, which should not be used if you want to
64 remain consistent with the normal urllib2-returned objects:
64 remain consistent with the normal urllib2-returned objects:
65
65
66 close_connection() - close the connection to the host
66 close_connection() - close the connection to the host
67 readlines() - you know, readlines()
67 readlines() - you know, readlines()
68 status - the return status (i.e. 404)
68 status - the return status (i.e. 404)
69 reason - english translation of status (i.e. 'File not found')
69 reason - english translation of status (i.e. 'File not found')
70
70
71 If you want the best of both worlds, use this inside an
71 If you want the best of both worlds, use this inside an
72 AttributeError-catching try:
72 AttributeError-catching try:
73
73
74 >>> try: status = fo.status
74 >>> try: status = fo.status
75 >>> except AttributeError: status = None
75 >>> except AttributeError: status = None
76
76
77 Unfortunately, these are ONLY there if status == 200, so it's not
77 Unfortunately, these are ONLY there if status == 200, so it's not
78 easy to distinguish between non-200 responses. The reason is that
78 easy to distinguish between non-200 responses. The reason is that
79 urllib2 tries to do clever things with error codes 301, 302, 401,
79 urllib2 tries to do clever things with error codes 301, 302, 401,
80 and 407, and it wraps the object upon return.
80 and 407, and it wraps the object upon return.
81 """
81 """
82
82
83 # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $
83 # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $
84
84
85 from __future__ import absolute_import, print_function
85 from __future__ import absolute_import, print_function
86
86
87 import errno
87 import errno
88 import hashlib
88 import hashlib
89 import socket
89 import socket
90 import sys
90 import sys
91 import threading
91 import threading
92
92
93 from .i18n import _
93 from .i18n import _
94 from . import (
94 from . import (
95 pycompat,
95 util,
96 util,
96 )
97 )
97
98
98 httplib = util.httplib
99 httplib = util.httplib
99 urlerr = util.urlerr
100 urlerr = util.urlerr
100 urlreq = util.urlreq
101 urlreq = util.urlreq
101
102
102 DEBUG = None
103 DEBUG = None
103
104
104 class ConnectionManager(object):
105 class ConnectionManager(object):
105 """
106 """
106 The connection manager must be able to:
107 The connection manager must be able to:
107 * keep track of all existing
108 * keep track of all existing
108 """
109 """
109 def __init__(self):
110 def __init__(self):
110 self._lock = threading.Lock()
111 self._lock = threading.Lock()
111 self._hostmap = {} # map hosts to a list of connections
112 self._hostmap = {} # map hosts to a list of connections
112 self._connmap = {} # map connections to host
113 self._connmap = {} # map connections to host
113 self._readymap = {} # map connection to ready state
114 self._readymap = {} # map connection to ready state
114
115
115 def add(self, host, connection, ready):
116 def add(self, host, connection, ready):
116 self._lock.acquire()
117 self._lock.acquire()
117 try:
118 try:
118 if host not in self._hostmap:
119 if host not in self._hostmap:
119 self._hostmap[host] = []
120 self._hostmap[host] = []
120 self._hostmap[host].append(connection)
121 self._hostmap[host].append(connection)
121 self._connmap[connection] = host
122 self._connmap[connection] = host
122 self._readymap[connection] = ready
123 self._readymap[connection] = ready
123 finally:
124 finally:
124 self._lock.release()
125 self._lock.release()
125
126
126 def remove(self, connection):
127 def remove(self, connection):
127 self._lock.acquire()
128 self._lock.acquire()
128 try:
129 try:
129 try:
130 try:
130 host = self._connmap[connection]
131 host = self._connmap[connection]
131 except KeyError:
132 except KeyError:
132 pass
133 pass
133 else:
134 else:
134 del self._connmap[connection]
135 del self._connmap[connection]
135 del self._readymap[connection]
136 del self._readymap[connection]
136 self._hostmap[host].remove(connection)
137 self._hostmap[host].remove(connection)
137 if not self._hostmap[host]: del self._hostmap[host]
138 if not self._hostmap[host]: del self._hostmap[host]
138 finally:
139 finally:
139 self._lock.release()
140 self._lock.release()
140
141
141 def set_ready(self, connection, ready):
142 def set_ready(self, connection, ready):
142 try:
143 try:
143 self._readymap[connection] = ready
144 self._readymap[connection] = ready
144 except KeyError:
145 except KeyError:
145 pass
146 pass
146
147
147 def get_ready_conn(self, host):
148 def get_ready_conn(self, host):
148 conn = None
149 conn = None
149 self._lock.acquire()
150 self._lock.acquire()
150 try:
151 try:
151 if host in self._hostmap:
152 if host in self._hostmap:
152 for c in self._hostmap[host]:
153 for c in self._hostmap[host]:
153 if self._readymap[c]:
154 if self._readymap[c]:
154 self._readymap[c] = 0
155 self._readymap[c] = 0
155 conn = c
156 conn = c
156 break
157 break
157 finally:
158 finally:
158 self._lock.release()
159 self._lock.release()
159 return conn
160 return conn
160
161
161 def get_all(self, host=None):
162 def get_all(self, host=None):
162 if host:
163 if host:
163 return list(self._hostmap.get(host, []))
164 return list(self._hostmap.get(host, []))
164 else:
165 else:
165 return dict(self._hostmap)
166 return dict(self._hostmap)
166
167
167 class KeepAliveHandler(object):
168 class KeepAliveHandler(object):
168 def __init__(self):
169 def __init__(self):
169 self._cm = ConnectionManager()
170 self._cm = ConnectionManager()
170
171
171 #### Connection Management
172 #### Connection Management
172 def open_connections(self):
173 def open_connections(self):
173 """return a list of connected hosts and the number of connections
174 """return a list of connected hosts and the number of connections
174 to each. [('foo.com:80', 2), ('bar.org', 1)]"""
175 to each. [('foo.com:80', 2), ('bar.org', 1)]"""
175 return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
176 return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
176
177
177 def close_connection(self, host):
178 def close_connection(self, host):
178 """close connection(s) to <host>
179 """close connection(s) to <host>
179 host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
180 host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
180 no error occurs if there is no connection to that host."""
181 no error occurs if there is no connection to that host."""
181 for h in self._cm.get_all(host):
182 for h in self._cm.get_all(host):
182 self._cm.remove(h)
183 self._cm.remove(h)
183 h.close()
184 h.close()
184
185
185 def close_all(self):
186 def close_all(self):
186 """close all open connections"""
187 """close all open connections"""
187 for host, conns in self._cm.get_all().iteritems():
188 for host, conns in self._cm.get_all().iteritems():
188 for h in conns:
189 for h in conns:
189 self._cm.remove(h)
190 self._cm.remove(h)
190 h.close()
191 h.close()
191
192
192 def _request_closed(self, request, host, connection):
193 def _request_closed(self, request, host, connection):
193 """tells us that this request is now closed and that the
194 """tells us that this request is now closed and that the
194 connection is ready for another request"""
195 connection is ready for another request"""
195 self._cm.set_ready(connection, 1)
196 self._cm.set_ready(connection, 1)
196
197
197 def _remove_connection(self, host, connection, close=0):
198 def _remove_connection(self, host, connection, close=0):
198 if close:
199 if close:
199 connection.close()
200 connection.close()
200 self._cm.remove(connection)
201 self._cm.remove(connection)
201
202
202 #### Transaction Execution
203 #### Transaction Execution
203 def http_open(self, req):
204 def http_open(self, req):
204 return self.do_open(HTTPConnection, req)
205 return self.do_open(HTTPConnection, req)
205
206
206 def do_open(self, http_class, req):
207 def do_open(self, http_class, req):
207 host = req.get_host()
208 host = req.get_host()
208 if not host:
209 if not host:
209 raise urlerr.urlerror('no host given')
210 raise urlerr.urlerror('no host given')
210
211
211 try:
212 try:
212 h = self._cm.get_ready_conn(host)
213 h = self._cm.get_ready_conn(host)
213 while h:
214 while h:
214 r = self._reuse_connection(h, req, host)
215 r = self._reuse_connection(h, req, host)
215
216
216 # if this response is non-None, then it worked and we're
217 # if this response is non-None, then it worked and we're
217 # done. Break out, skipping the else block.
218 # done. Break out, skipping the else block.
218 if r:
219 if r:
219 break
220 break
220
221
221 # connection is bad - possibly closed by server
222 # connection is bad - possibly closed by server
222 # discard it and ask for the next free connection
223 # discard it and ask for the next free connection
223 h.close()
224 h.close()
224 self._cm.remove(h)
225 self._cm.remove(h)
225 h = self._cm.get_ready_conn(host)
226 h = self._cm.get_ready_conn(host)
226 else:
227 else:
227 # no (working) free connections were found. Create a new one.
228 # no (working) free connections were found. Create a new one.
228 h = http_class(host)
229 h = http_class(host)
229 if DEBUG:
230 if DEBUG:
230 DEBUG.info("creating new connection to %s (%d)",
231 DEBUG.info("creating new connection to %s (%d)",
231 host, id(h))
232 host, id(h))
232 self._cm.add(host, h, 0)
233 self._cm.add(host, h, 0)
233 self._start_transaction(h, req)
234 self._start_transaction(h, req)
234 r = h.getresponse()
235 r = h.getresponse()
235 # The string form of BadStatusLine is the status line. Add some context
236 # The string form of BadStatusLine is the status line. Add some context
236 # to make the error message slightly more useful.
237 # to make the error message slightly more useful.
237 except httplib.BadStatusLine as err:
238 except httplib.BadStatusLine as err:
238 raise urlerr.urlerror(_('bad HTTP status line: %s') % err.line)
239 raise urlerr.urlerror(
240 _('bad HTTP status line: %s') % pycompat.sysbytes(err.line))
239 except (socket.error, httplib.HTTPException) as err:
241 except (socket.error, httplib.HTTPException) as err:
240 raise urlerr.urlerror(err)
242 raise urlerr.urlerror(err)
241
243
242 # if not a persistent connection, don't try to reuse it
244 # if not a persistent connection, don't try to reuse it
243 if r.will_close:
245 if r.will_close:
244 self._cm.remove(h)
246 self._cm.remove(h)
245
247
246 if DEBUG:
248 if DEBUG:
247 DEBUG.info("STATUS: %s, %s", r.status, r.reason)
249 DEBUG.info("STATUS: %s, %s", r.status, r.reason)
248 r._handler = self
250 r._handler = self
249 r._host = host
251 r._host = host
250 r._url = req.get_full_url()
252 r._url = req.get_full_url()
251 r._connection = h
253 r._connection = h
252 r.code = r.status
254 r.code = r.status
253 r.headers = r.msg
255 r.headers = r.msg
254 r.msg = r.reason
256 r.msg = r.reason
255
257
256 return r
258 return r
257
259
258 def _reuse_connection(self, h, req, host):
260 def _reuse_connection(self, h, req, host):
259 """start the transaction with a re-used connection
261 """start the transaction with a re-used connection
260 return a response object (r) upon success or None on failure.
262 return a response object (r) upon success or None on failure.
261 This DOES not close or remove bad connections in cases where
263 This DOES not close or remove bad connections in cases where
262 it returns. However, if an unexpected exception occurs, it
264 it returns. However, if an unexpected exception occurs, it
263 will close and remove the connection before re-raising.
265 will close and remove the connection before re-raising.
264 """
266 """
265 try:
267 try:
266 self._start_transaction(h, req)
268 self._start_transaction(h, req)
267 r = h.getresponse()
269 r = h.getresponse()
268 # note: just because we got something back doesn't mean it
270 # note: just because we got something back doesn't mean it
269 # worked. We'll check the version below, too.
271 # worked. We'll check the version below, too.
270 except (socket.error, httplib.HTTPException):
272 except (socket.error, httplib.HTTPException):
271 r = None
273 r = None
272 except: # re-raises
274 except: # re-raises
273 # adding this block just in case we've missed
275 # adding this block just in case we've missed
274 # something we will still raise the exception, but
276 # something we will still raise the exception, but
275 # lets try and close the connection and remove it
277 # lets try and close the connection and remove it
276 # first. We previously got into a nasty loop
278 # first. We previously got into a nasty loop
277 # where an exception was uncaught, and so the
279 # where an exception was uncaught, and so the
278 # connection stayed open. On the next try, the
280 # connection stayed open. On the next try, the
279 # same exception was raised, etc. The trade-off is
281 # same exception was raised, etc. The trade-off is
280 # that it's now possible this call will raise
282 # that it's now possible this call will raise
281 # a DIFFERENT exception
283 # a DIFFERENT exception
282 if DEBUG:
284 if DEBUG:
283 DEBUG.error("unexpected exception - closing "
285 DEBUG.error("unexpected exception - closing "
284 "connection to %s (%d)", host, id(h))
286 "connection to %s (%d)", host, id(h))
285 self._cm.remove(h)
287 self._cm.remove(h)
286 h.close()
288 h.close()
287 raise
289 raise
288
290
289 if r is None or r.version == 9:
291 if r is None or r.version == 9:
290 # httplib falls back to assuming HTTP 0.9 if it gets a
292 # httplib falls back to assuming HTTP 0.9 if it gets a
291 # bad header back. This is most likely to happen if
293 # bad header back. This is most likely to happen if
292 # the socket has been closed by the server since we
294 # the socket has been closed by the server since we
293 # last used the connection.
295 # last used the connection.
294 if DEBUG:
296 if DEBUG:
295 DEBUG.info("failed to re-use connection to %s (%d)",
297 DEBUG.info("failed to re-use connection to %s (%d)",
296 host, id(h))
298 host, id(h))
297 r = None
299 r = None
298 else:
300 else:
299 if DEBUG:
301 if DEBUG:
300 DEBUG.info("re-using connection to %s (%d)", host, id(h))
302 DEBUG.info("re-using connection to %s (%d)", host, id(h))
301
303
302 return r
304 return r
303
305
304 def _start_transaction(self, h, req):
306 def _start_transaction(self, h, req):
305 # What follows mostly reimplements HTTPConnection.request()
307 # What follows mostly reimplements HTTPConnection.request()
306 # except it adds self.parent.addheaders in the mix and sends headers
308 # except it adds self.parent.addheaders in the mix and sends headers
307 # in a deterministic order (to make testing easier).
309 # in a deterministic order (to make testing easier).
308 headers = util.sortdict(self.parent.addheaders)
310 headers = util.sortdict(self.parent.addheaders)
309 headers.update(sorted(req.headers.items()))
311 headers.update(sorted(req.headers.items()))
310 headers.update(sorted(req.unredirected_hdrs.items()))
312 headers.update(sorted(req.unredirected_hdrs.items()))
311 headers = util.sortdict((n.lower(), v) for n, v in headers.items())
313 headers = util.sortdict((n.lower(), v) for n, v in headers.items())
312 skipheaders = {}
314 skipheaders = {}
313 for n in ('host', 'accept-encoding'):
315 for n in ('host', 'accept-encoding'):
314 if n in headers:
316 if n in headers:
315 skipheaders['skip_' + n.replace('-', '_')] = 1
317 skipheaders['skip_' + n.replace('-', '_')] = 1
316 try:
318 try:
317 if req.has_data():
319 if req.has_data():
318 data = req.get_data()
320 data = req.get_data()
319 h.putrequest(
321 h.putrequest(
320 req.get_method(), req.get_selector(), **skipheaders)
322 req.get_method(), req.get_selector(), **skipheaders)
321 if 'content-type' not in headers:
323 if 'content-type' not in headers:
322 h.putheader('Content-type',
324 h.putheader('Content-type',
323 'application/x-www-form-urlencoded')
325 'application/x-www-form-urlencoded')
324 if 'content-length' not in headers:
326 if 'content-length' not in headers:
325 h.putheader('Content-length', '%d' % len(data))
327 h.putheader('Content-length', '%d' % len(data))
326 else:
328 else:
327 h.putrequest(
329 h.putrequest(
328 req.get_method(), req.get_selector(), **skipheaders)
330 req.get_method(), req.get_selector(), **skipheaders)
329 except socket.error as err:
331 except socket.error as err:
330 raise urlerr.urlerror(err)
332 raise urlerr.urlerror(err)
331 for k, v in headers.items():
333 for k, v in headers.items():
332 h.putheader(k, v)
334 h.putheader(k, v)
333 h.endheaders()
335 h.endheaders()
334 if req.has_data():
336 if req.has_data():
335 h.send(data)
337 h.send(data)
336
338
337 class HTTPHandler(KeepAliveHandler, urlreq.httphandler):
339 class HTTPHandler(KeepAliveHandler, urlreq.httphandler):
338 pass
340 pass
339
341
340 class HTTPResponse(httplib.HTTPResponse):
342 class HTTPResponse(httplib.HTTPResponse):
341 # we need to subclass HTTPResponse in order to
343 # we need to subclass HTTPResponse in order to
342 # 1) add readline() and readlines() methods
344 # 1) add readline() and readlines() methods
343 # 2) add close_connection() methods
345 # 2) add close_connection() methods
344 # 3) add info() and geturl() methods
346 # 3) add info() and geturl() methods
345
347
346 # in order to add readline(), read must be modified to deal with a
348 # in order to add readline(), read must be modified to deal with a
347 # buffer. example: readline must read a buffer and then spit back
349 # buffer. example: readline must read a buffer and then spit back
348 # one line at a time. The only real alternative is to read one
350 # one line at a time. The only real alternative is to read one
349 # BYTE at a time (ick). Once something has been read, it can't be
351 # BYTE at a time (ick). Once something has been read, it can't be
350 # put back (ok, maybe it can, but that's even uglier than this),
352 # put back (ok, maybe it can, but that's even uglier than this),
351 # so if you THEN do a normal read, you must first take stuff from
353 # so if you THEN do a normal read, you must first take stuff from
352 # the buffer.
354 # the buffer.
353
355
354 # the read method wraps the original to accommodate buffering,
356 # the read method wraps the original to accommodate buffering,
355 # although read() never adds to the buffer.
357 # although read() never adds to the buffer.
356 # Both readline and readlines have been stolen with almost no
358 # Both readline and readlines have been stolen with almost no
357 # modification from socket.py
359 # modification from socket.py
358
360
359
361
360 def __init__(self, sock, debuglevel=0, strict=0, method=None):
362 def __init__(self, sock, debuglevel=0, strict=0, method=None):
363 extrakw = {}
364 if not pycompat.ispy3:
365 extrakw['strict'] = True
366 extrakw['buffering'] = True
361 httplib.HTTPResponse.__init__(self, sock, debuglevel=debuglevel,
367 httplib.HTTPResponse.__init__(self, sock, debuglevel=debuglevel,
362 strict=True, method=method,
368 method=method, **extrakw)
363 buffering=True)
364 self.fileno = sock.fileno
369 self.fileno = sock.fileno
365 self.code = None
370 self.code = None
366 self._rbuf = ''
371 self._rbuf = ''
367 self._rbufsize = 8096
372 self._rbufsize = 8096
368 self._handler = None # inserted by the handler later
373 self._handler = None # inserted by the handler later
369 self._host = None # (same)
374 self._host = None # (same)
370 self._url = None # (same)
375 self._url = None # (same)
371 self._connection = None # (same)
376 self._connection = None # (same)
372
377
373 _raw_read = httplib.HTTPResponse.read
378 _raw_read = httplib.HTTPResponse.read
374
379
375 def close(self):
380 def close(self):
376 if self.fp:
381 if self.fp:
377 self.fp.close()
382 self.fp.close()
378 self.fp = None
383 self.fp = None
379 if self._handler:
384 if self._handler:
380 self._handler._request_closed(self, self._host,
385 self._handler._request_closed(self, self._host,
381 self._connection)
386 self._connection)
382
387
383 def close_connection(self):
388 def close_connection(self):
384 self._handler._remove_connection(self._host, self._connection, close=1)
389 self._handler._remove_connection(self._host, self._connection, close=1)
385 self.close()
390 self.close()
386
391
387 def info(self):
392 def info(self):
388 return self.headers
393 return self.headers
389
394
390 def geturl(self):
395 def geturl(self):
391 return self._url
396 return self._url
392
397
393 def read(self, amt=None):
398 def read(self, amt=None):
394 # the _rbuf test is only in this first if for speed. It's not
399 # the _rbuf test is only in this first if for speed. It's not
395 # logically necessary
400 # logically necessary
396 if self._rbuf and amt is not None:
401 if self._rbuf and amt is not None:
397 L = len(self._rbuf)
402 L = len(self._rbuf)
398 if amt > L:
403 if amt > L:
399 amt -= L
404 amt -= L
400 else:
405 else:
401 s = self._rbuf[:amt]
406 s = self._rbuf[:amt]
402 self._rbuf = self._rbuf[amt:]
407 self._rbuf = self._rbuf[amt:]
403 return s
408 return s
404
409
405 s = self._rbuf + self._raw_read(amt)
410 s = self._rbuf + self._raw_read(amt)
406 self._rbuf = ''
411 self._rbuf = ''
407 return s
412 return s
408
413
409 # stolen from Python SVN #68532 to fix issue1088
414 # stolen from Python SVN #68532 to fix issue1088
410 def _read_chunked(self, amt):
415 def _read_chunked(self, amt):
411 chunk_left = self.chunk_left
416 chunk_left = self.chunk_left
412 parts = []
417 parts = []
413
418
414 while True:
419 while True:
415 if chunk_left is None:
420 if chunk_left is None:
416 line = self.fp.readline()
421 line = self.fp.readline()
417 i = line.find(';')
422 i = line.find(';')
418 if i >= 0:
423 if i >= 0:
419 line = line[:i] # strip chunk-extensions
424 line = line[:i] # strip chunk-extensions
420 try:
425 try:
421 chunk_left = int(line, 16)
426 chunk_left = int(line, 16)
422 except ValueError:
427 except ValueError:
423 # close the connection as protocol synchronization is
428 # close the connection as protocol synchronization is
424 # probably lost
429 # probably lost
425 self.close()
430 self.close()
426 raise httplib.IncompleteRead(''.join(parts))
431 raise httplib.IncompleteRead(''.join(parts))
427 if chunk_left == 0:
432 if chunk_left == 0:
428 break
433 break
429 if amt is None:
434 if amt is None:
430 parts.append(self._safe_read(chunk_left))
435 parts.append(self._safe_read(chunk_left))
431 elif amt < chunk_left:
436 elif amt < chunk_left:
432 parts.append(self._safe_read(amt))
437 parts.append(self._safe_read(amt))
433 self.chunk_left = chunk_left - amt
438 self.chunk_left = chunk_left - amt
434 return ''.join(parts)
439 return ''.join(parts)
435 elif amt == chunk_left:
440 elif amt == chunk_left:
436 parts.append(self._safe_read(amt))
441 parts.append(self._safe_read(amt))
437 self._safe_read(2) # toss the CRLF at the end of the chunk
442 self._safe_read(2) # toss the CRLF at the end of the chunk
438 self.chunk_left = None
443 self.chunk_left = None
439 return ''.join(parts)
444 return ''.join(parts)
440 else:
445 else:
441 parts.append(self._safe_read(chunk_left))
446 parts.append(self._safe_read(chunk_left))
442 amt -= chunk_left
447 amt -= chunk_left
443
448
444 # we read the whole chunk, get another
449 # we read the whole chunk, get another
445 self._safe_read(2) # toss the CRLF at the end of the chunk
450 self._safe_read(2) # toss the CRLF at the end of the chunk
446 chunk_left = None
451 chunk_left = None
447
452
448 # read and discard trailer up to the CRLF terminator
453 # read and discard trailer up to the CRLF terminator
449 ### note: we shouldn't have any trailers!
454 ### note: we shouldn't have any trailers!
450 while True:
455 while True:
451 line = self.fp.readline()
456 line = self.fp.readline()
452 if not line:
457 if not line:
453 # a vanishingly small number of sites EOF without
458 # a vanishingly small number of sites EOF without
454 # sending the trailer
459 # sending the trailer
455 break
460 break
456 if line == '\r\n':
461 if line == '\r\n':
457 break
462 break
458
463
459 # we read everything; close the "file"
464 # we read everything; close the "file"
460 self.close()
465 self.close()
461
466
462 return ''.join(parts)
467 return ''.join(parts)
463
468
464 def readline(self):
469 def readline(self):
465 # Fast path for a line is already available in read buffer.
470 # Fast path for a line is already available in read buffer.
466 i = self._rbuf.find('\n')
471 i = self._rbuf.find('\n')
467 if i >= 0:
472 if i >= 0:
468 i += 1
473 i += 1
469 line = self._rbuf[:i]
474 line = self._rbuf[:i]
470 self._rbuf = self._rbuf[i:]
475 self._rbuf = self._rbuf[i:]
471 return line
476 return line
472
477
473 # No newline in local buffer. Read until we find one.
478 # No newline in local buffer. Read until we find one.
474 chunks = [self._rbuf]
479 chunks = [self._rbuf]
475 i = -1
480 i = -1
476 readsize = self._rbufsize
481 readsize = self._rbufsize
477 while True:
482 while True:
478 new = self._raw_read(readsize)
483 new = self._raw_read(readsize)
479 if not new:
484 if not new:
480 break
485 break
481
486
482 chunks.append(new)
487 chunks.append(new)
483 i = new.find('\n')
488 i = new.find('\n')
484 if i >= 0:
489 if i >= 0:
485 break
490 break
486
491
487 # We either have exhausted the stream or have a newline in chunks[-1].
492 # We either have exhausted the stream or have a newline in chunks[-1].
488
493
489 # EOF
494 # EOF
490 if i == -1:
495 if i == -1:
491 self._rbuf = ''
496 self._rbuf = ''
492 return ''.join(chunks)
497 return ''.join(chunks)
493
498
494 i += 1
499 i += 1
495 self._rbuf = chunks[-1][i:]
500 self._rbuf = chunks[-1][i:]
496 chunks[-1] = chunks[-1][:i]
501 chunks[-1] = chunks[-1][:i]
497 return ''.join(chunks)
502 return ''.join(chunks)
498
503
499 def readlines(self, sizehint=0):
504 def readlines(self, sizehint=0):
500 total = 0
505 total = 0
501 list = []
506 list = []
502 while True:
507 while True:
503 line = self.readline()
508 line = self.readline()
504 if not line:
509 if not line:
505 break
510 break
506 list.append(line)
511 list.append(line)
507 total += len(line)
512 total += len(line)
508 if sizehint and total >= sizehint:
513 if sizehint and total >= sizehint:
509 break
514 break
510 return list
515 return list
511
516
512 def safesend(self, str):
517 def safesend(self, str):
513 """Send `str' to the server.
518 """Send `str' to the server.
514
519
515 Shamelessly ripped off from httplib to patch a bad behavior.
520 Shamelessly ripped off from httplib to patch a bad behavior.
516 """
521 """
517 # _broken_pipe_resp is an attribute we set in this function
522 # _broken_pipe_resp is an attribute we set in this function
518 # if the socket is closed while we're sending data but
523 # if the socket is closed while we're sending data but
519 # the server sent us a response before hanging up.
524 # the server sent us a response before hanging up.
520 # In that case, we want to pretend to send the rest of the
525 # In that case, we want to pretend to send the rest of the
521 # outgoing data, and then let the user use getresponse()
526 # outgoing data, and then let the user use getresponse()
522 # (which we wrap) to get this last response before
527 # (which we wrap) to get this last response before
523 # opening a new socket.
528 # opening a new socket.
524 if getattr(self, '_broken_pipe_resp', None) is not None:
529 if getattr(self, '_broken_pipe_resp', None) is not None:
525 return
530 return
526
531
527 if self.sock is None:
532 if self.sock is None:
528 if self.auto_open:
533 if self.auto_open:
529 self.connect()
534 self.connect()
530 else:
535 else:
531 raise httplib.NotConnected
536 raise httplib.NotConnected
532
537
533 # send the data to the server. if we get a broken pipe, then close
538 # send the data to the server. if we get a broken pipe, then close
534 # the socket. we want to reconnect when somebody tries to send again.
539 # the socket. we want to reconnect when somebody tries to send again.
535 #
540 #
536 # NOTE: we DO propagate the error, though, because we cannot simply
541 # NOTE: we DO propagate the error, though, because we cannot simply
537 # ignore the error... the caller will know if they can retry.
542 # ignore the error... the caller will know if they can retry.
538 if self.debuglevel > 0:
543 if self.debuglevel > 0:
539 print("send:", repr(str))
544 print("send:", repr(str))
540 try:
545 try:
541 blocksize = 8192
546 blocksize = 8192
542 read = getattr(str, 'read', None)
547 read = getattr(str, 'read', None)
543 if read is not None:
548 if read is not None:
544 if self.debuglevel > 0:
549 if self.debuglevel > 0:
545 print("sending a read()able")
550 print("sending a read()able")
546 data = read(blocksize)
551 data = read(blocksize)
547 while data:
552 while data:
548 self.sock.sendall(data)
553 self.sock.sendall(data)
549 data = read(blocksize)
554 data = read(blocksize)
550 else:
555 else:
551 self.sock.sendall(str)
556 self.sock.sendall(str)
552 except socket.error as v:
557 except socket.error as v:
553 reraise = True
558 reraise = True
554 if v[0] == errno.EPIPE: # Broken pipe
559 if v[0] == errno.EPIPE: # Broken pipe
555 if self._HTTPConnection__state == httplib._CS_REQ_SENT:
560 if self._HTTPConnection__state == httplib._CS_REQ_SENT:
556 self._broken_pipe_resp = None
561 self._broken_pipe_resp = None
557 self._broken_pipe_resp = self.getresponse()
562 self._broken_pipe_resp = self.getresponse()
558 reraise = False
563 reraise = False
559 self.close()
564 self.close()
560 if reraise:
565 if reraise:
561 raise
566 raise
562
567
563 def wrapgetresponse(cls):
568 def wrapgetresponse(cls):
564 """Wraps getresponse in cls with a broken-pipe sane version.
569 """Wraps getresponse in cls with a broken-pipe sane version.
565 """
570 """
566 def safegetresponse(self):
571 def safegetresponse(self):
567 # In safesend() we might set the _broken_pipe_resp
572 # In safesend() we might set the _broken_pipe_resp
568 # attribute, in which case the socket has already
573 # attribute, in which case the socket has already
569 # been closed and we just need to give them the response
574 # been closed and we just need to give them the response
570 # back. Otherwise, we use the normal response path.
575 # back. Otherwise, we use the normal response path.
571 r = getattr(self, '_broken_pipe_resp', None)
576 r = getattr(self, '_broken_pipe_resp', None)
572 if r is not None:
577 if r is not None:
573 return r
578 return r
574 return cls.getresponse(self)
579 return cls.getresponse(self)
575 safegetresponse.__doc__ = cls.getresponse.__doc__
580 safegetresponse.__doc__ = cls.getresponse.__doc__
576 return safegetresponse
581 return safegetresponse
577
582
578 class HTTPConnection(httplib.HTTPConnection):
583 class HTTPConnection(httplib.HTTPConnection):
579 # use the modified response class
584 # use the modified response class
580 response_class = HTTPResponse
585 response_class = HTTPResponse
581 send = safesend
586 send = safesend
582 getresponse = wrapgetresponse(httplib.HTTPConnection)
587 getresponse = wrapgetresponse(httplib.HTTPConnection)
583
588
584
589
585 #########################################################################
590 #########################################################################
586 ##### TEST FUNCTIONS
591 ##### TEST FUNCTIONS
587 #########################################################################
592 #########################################################################
588
593
589
594
590 def continuity(url):
595 def continuity(url):
591 md5 = hashlib.md5
596 md5 = hashlib.md5
592 format = '%25s: %s'
597 format = '%25s: %s'
593
598
594 # first fetch the file with the normal http handler
599 # first fetch the file with the normal http handler
595 opener = urlreq.buildopener()
600 opener = urlreq.buildopener()
596 urlreq.installopener(opener)
601 urlreq.installopener(opener)
597 fo = urlreq.urlopen(url)
602 fo = urlreq.urlopen(url)
598 foo = fo.read()
603 foo = fo.read()
599 fo.close()
604 fo.close()
600 m = md5(foo)
605 m = md5(foo)
601 print(format % ('normal urllib', m.hexdigest()))
606 print(format % ('normal urllib', m.hexdigest()))
602
607
603 # now install the keepalive handler and try again
608 # now install the keepalive handler and try again
604 opener = urlreq.buildopener(HTTPHandler())
609 opener = urlreq.buildopener(HTTPHandler())
605 urlreq.installopener(opener)
610 urlreq.installopener(opener)
606
611
607 fo = urlreq.urlopen(url)
612 fo = urlreq.urlopen(url)
608 foo = fo.read()
613 foo = fo.read()
609 fo.close()
614 fo.close()
610 m = md5(foo)
615 m = md5(foo)
611 print(format % ('keepalive read', m.hexdigest()))
616 print(format % ('keepalive read', m.hexdigest()))
612
617
613 fo = urlreq.urlopen(url)
618 fo = urlreq.urlopen(url)
614 foo = ''
619 foo = ''
615 while True:
620 while True:
616 f = fo.readline()
621 f = fo.readline()
617 if f:
622 if f:
618 foo = foo + f
623 foo = foo + f
619 else: break
624 else: break
620 fo.close()
625 fo.close()
621 m = md5(foo)
626 m = md5(foo)
622 print(format % ('keepalive readline', m.hexdigest()))
627 print(format % ('keepalive readline', m.hexdigest()))
623
628
624 def comp(N, url):
629 def comp(N, url):
625 print(' making %i connections to:\n %s' % (N, url))
630 print(' making %i connections to:\n %s' % (N, url))
626
631
627 util.stdout.write(' first using the normal urllib handlers')
632 util.stdout.write(' first using the normal urllib handlers')
628 # first use normal opener
633 # first use normal opener
629 opener = urlreq.buildopener()
634 opener = urlreq.buildopener()
630 urlreq.installopener(opener)
635 urlreq.installopener(opener)
631 t1 = fetch(N, url)
636 t1 = fetch(N, url)
632 print(' TIME: %.3f s' % t1)
637 print(' TIME: %.3f s' % t1)
633
638
634 util.stdout.write(' now using the keepalive handler ')
639 util.stdout.write(' now using the keepalive handler ')
635 # now install the keepalive handler and try again
640 # now install the keepalive handler and try again
636 opener = urlreq.buildopener(HTTPHandler())
641 opener = urlreq.buildopener(HTTPHandler())
637 urlreq.installopener(opener)
642 urlreq.installopener(opener)
638 t2 = fetch(N, url)
643 t2 = fetch(N, url)
639 print(' TIME: %.3f s' % t2)
644 print(' TIME: %.3f s' % t2)
640 print(' improvement factor: %.2f' % (t1 / t2))
645 print(' improvement factor: %.2f' % (t1 / t2))
641
646
642 def fetch(N, url, delay=0):
647 def fetch(N, url, delay=0):
643 import time
648 import time
644 lens = []
649 lens = []
645 starttime = time.time()
650 starttime = time.time()
646 for i in range(N):
651 for i in range(N):
647 if delay and i > 0:
652 if delay and i > 0:
648 time.sleep(delay)
653 time.sleep(delay)
649 fo = urlreq.urlopen(url)
654 fo = urlreq.urlopen(url)
650 foo = fo.read()
655 foo = fo.read()
651 fo.close()
656 fo.close()
652 lens.append(len(foo))
657 lens.append(len(foo))
653 diff = time.time() - starttime
658 diff = time.time() - starttime
654
659
655 j = 0
660 j = 0
656 for i in lens[1:]:
661 for i in lens[1:]:
657 j = j + 1
662 j = j + 1
658 if not i == lens[0]:
663 if not i == lens[0]:
659 print("WARNING: inconsistent length on read %i: %i" % (j, i))
664 print("WARNING: inconsistent length on read %i: %i" % (j, i))
660
665
661 return diff
666 return diff
662
667
663 def test_timeout(url):
668 def test_timeout(url):
664 global DEBUG
669 global DEBUG
665 dbbackup = DEBUG
670 dbbackup = DEBUG
666 class FakeLogger(object):
671 class FakeLogger(object):
667 def debug(self, msg, *args):
672 def debug(self, msg, *args):
668 print(msg % args)
673 print(msg % args)
669 info = warning = error = debug
674 info = warning = error = debug
670 DEBUG = FakeLogger()
675 DEBUG = FakeLogger()
671 print(" fetching the file to establish a connection")
676 print(" fetching the file to establish a connection")
672 fo = urlreq.urlopen(url)
677 fo = urlreq.urlopen(url)
673 data1 = fo.read()
678 data1 = fo.read()
674 fo.close()
679 fo.close()
675
680
676 i = 20
681 i = 20
677 print(" waiting %i seconds for the server to close the connection" % i)
682 print(" waiting %i seconds for the server to close the connection" % i)
678 while i > 0:
683 while i > 0:
679 util.stdout.write('\r %2i' % i)
684 util.stdout.write('\r %2i' % i)
680 util.stdout.flush()
685 util.stdout.flush()
681 time.sleep(1)
686 time.sleep(1)
682 i -= 1
687 i -= 1
683 util.stderr.write('\r')
688 util.stderr.write('\r')
684
689
685 print(" fetching the file a second time")
690 print(" fetching the file a second time")
686 fo = urlreq.urlopen(url)
691 fo = urlreq.urlopen(url)
687 data2 = fo.read()
692 data2 = fo.read()
688 fo.close()
693 fo.close()
689
694
690 if data1 == data2:
695 if data1 == data2:
691 print(' data are identical')
696 print(' data are identical')
692 else:
697 else:
693 print(' ERROR: DATA DIFFER')
698 print(' ERROR: DATA DIFFER')
694
699
695 DEBUG = dbbackup
700 DEBUG = dbbackup
696
701
697
702
698 def test(url, N=10):
703 def test(url, N=10):
699 print("performing continuity test (making sure stuff isn't corrupted)")
704 print("performing continuity test (making sure stuff isn't corrupted)")
700 continuity(url)
705 continuity(url)
701 print('')
706 print('')
702 print("performing speed comparison")
707 print("performing speed comparison")
703 comp(N, url)
708 comp(N, url)
704 print('')
709 print('')
705 print("performing dropped-connection check")
710 print("performing dropped-connection check")
706 test_timeout(url)
711 test_timeout(url)
707
712
708 if __name__ == '__main__':
713 if __name__ == '__main__':
709 import time
714 import time
710 try:
715 try:
711 N = int(sys.argv[1])
716 N = int(sys.argv[1])
712 url = sys.argv[2]
717 url = sys.argv[2]
713 except (IndexError, ValueError):
718 except (IndexError, ValueError):
714 print("%s <integer> <url>" % sys.argv[0])
719 print("%s <integer> <url>" % sys.argv[0])
715 else:
720 else:
716 test(url, N)
721 test(url, N)
General Comments 0
You need to be logged in to leave comments. Login now