##// END OF EJS Templates
keepalive: track request count and bytes sent...
Gregory Szorc -
r40371:dc82ad1b default
parent child Browse files
Show More
@@ -1,760 +1,779 b''
1 # This library is free software; you can redistribute it and/or
1 # This library is free software; you can redistribute it and/or
2 # modify it under the terms of the GNU Lesser General Public
2 # modify it under the terms of the GNU Lesser General Public
3 # License as published by the Free Software Foundation; either
3 # License as published by the Free Software Foundation; either
4 # version 2.1 of the License, or (at your option) any later version.
4 # version 2.1 of the License, or (at your option) any later version.
5 #
5 #
6 # This library is distributed in the hope that it will be useful,
6 # This library is distributed in the hope that it will be useful,
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9 # Lesser General Public License for more details.
9 # Lesser General Public License for more details.
10 #
10 #
11 # You should have received a copy of the GNU Lesser General Public
11 # You should have received a copy of the GNU Lesser General Public
12 # License along with this library; if not, see
12 # License along with this library; if not, see
13 # <http://www.gnu.org/licenses/>.
13 # <http://www.gnu.org/licenses/>.
14
14
15 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
15 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
16 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
16 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
17
17
18 # Modified by Benoit Boissinot:
18 # Modified by Benoit Boissinot:
19 # - fix for digest auth (inspired from urllib2.py @ Python v2.4)
19 # - fix for digest auth (inspired from urllib2.py @ Python v2.4)
20 # Modified by Dirkjan Ochtman:
20 # Modified by Dirkjan Ochtman:
21 # - import md5 function from a local util module
21 # - import md5 function from a local util module
22 # Modified by Augie Fackler:
22 # Modified by Augie Fackler:
23 # - add safesend method and use it to prevent broken pipe errors
23 # - add safesend method and use it to prevent broken pipe errors
24 # on large POST requests
24 # on large POST requests
25
25
26 """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
26 """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
27
27
28 >>> import urllib2
28 >>> import urllib2
29 >>> from keepalive import HTTPHandler
29 >>> from keepalive import HTTPHandler
30 >>> keepalive_handler = HTTPHandler()
30 >>> keepalive_handler = HTTPHandler()
31 >>> opener = urlreq.buildopener(keepalive_handler)
31 >>> opener = urlreq.buildopener(keepalive_handler)
32 >>> urlreq.installopener(opener)
32 >>> urlreq.installopener(opener)
33 >>>
33 >>>
34 >>> fo = urlreq.urlopen('http://www.python.org')
34 >>> fo = urlreq.urlopen('http://www.python.org')
35
35
36 If a connection to a given host is requested, and all of the existing
36 If a connection to a given host is requested, and all of the existing
37 connections are still in use, another connection will be opened. If
37 connections are still in use, another connection will be opened. If
38 the handler tries to use an existing connection but it fails in some
38 the handler tries to use an existing connection but it fails in some
39 way, it will be closed and removed from the pool.
39 way, it will be closed and removed from the pool.
40
40
41 To remove the handler, simply re-run build_opener with no arguments, and
41 To remove the handler, simply re-run build_opener with no arguments, and
42 install that opener.
42 install that opener.
43
43
44 You can explicitly close connections by using the close_connection()
44 You can explicitly close connections by using the close_connection()
45 method of the returned file-like object (described below) or you can
45 method of the returned file-like object (described below) or you can
46 use the handler methods:
46 use the handler methods:
47
47
48 close_connection(host)
48 close_connection(host)
49 close_all()
49 close_all()
50 open_connections()
50 open_connections()
51
51
52 NOTE: using the close_connection and close_all methods of the handler
52 NOTE: using the close_connection and close_all methods of the handler
53 should be done with care when using multiple threads.
53 should be done with care when using multiple threads.
54 * there is nothing that prevents another thread from creating new
54 * there is nothing that prevents another thread from creating new
55 connections immediately after connections are closed
55 connections immediately after connections are closed
56 * no checks are done to prevent in-use connections from being closed
56 * no checks are done to prevent in-use connections from being closed
57
57
58 >>> keepalive_handler.close_all()
58 >>> keepalive_handler.close_all()
59
59
60 EXTRA ATTRIBUTES AND METHODS
60 EXTRA ATTRIBUTES AND METHODS
61
61
62 Upon a status of 200, the object returned has a few additional
62 Upon a status of 200, the object returned has a few additional
63 attributes and methods, which should not be used if you want to
63 attributes and methods, which should not be used if you want to
64 remain consistent with the normal urllib2-returned objects:
64 remain consistent with the normal urllib2-returned objects:
65
65
66 close_connection() - close the connection to the host
66 close_connection() - close the connection to the host
67 readlines() - you know, readlines()
67 readlines() - you know, readlines()
68 status - the return status (i.e. 404)
68 status - the return status (i.e. 404)
69 reason - english translation of status (i.e. 'File not found')
69 reason - english translation of status (i.e. 'File not found')
70
70
71 If you want the best of both worlds, use this inside an
71 If you want the best of both worlds, use this inside an
72 AttributeError-catching try:
72 AttributeError-catching try:
73
73
74 >>> try: status = fo.status
74 >>> try: status = fo.status
75 >>> except AttributeError: status = None
75 >>> except AttributeError: status = None
76
76
77 Unfortunately, these are ONLY there if status == 200, so it's not
77 Unfortunately, these are ONLY there if status == 200, so it's not
78 easy to distinguish between non-200 responses. The reason is that
78 easy to distinguish between non-200 responses. The reason is that
79 urllib2 tries to do clever things with error codes 301, 302, 401,
79 urllib2 tries to do clever things with error codes 301, 302, 401,
80 and 407, and it wraps the object upon return.
80 and 407, and it wraps the object upon return.
81 """
81 """
82
82
83 # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $
83 # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $
84
84
85 from __future__ import absolute_import, print_function
85 from __future__ import absolute_import, print_function
86
86
87 import errno
87 import errno
88 import hashlib
88 import hashlib
89 import socket
89 import socket
90 import sys
90 import sys
91 import threading
91 import threading
92
92
93 from .i18n import _
93 from .i18n import _
94 from . import (
94 from . import (
95 node,
95 node,
96 pycompat,
96 pycompat,
97 urllibcompat,
97 urllibcompat,
98 util,
98 util,
99 )
99 )
100 from .utils import (
100 from .utils import (
101 procutil,
101 procutil,
102 )
102 )
103
103
104 httplib = util.httplib
104 httplib = util.httplib
105 urlerr = util.urlerr
105 urlerr = util.urlerr
106 urlreq = util.urlreq
106 urlreq = util.urlreq
107
107
108 DEBUG = None
108 DEBUG = None
109
109
110 class ConnectionManager(object):
110 class ConnectionManager(object):
111 """
111 """
112 The connection manager must be able to:
112 The connection manager must be able to:
113 * keep track of all existing
113 * keep track of all existing
114 """
114 """
115 def __init__(self):
115 def __init__(self):
116 self._lock = threading.Lock()
116 self._lock = threading.Lock()
117 self._hostmap = {} # map hosts to a list of connections
117 self._hostmap = {} # map hosts to a list of connections
118 self._connmap = {} # map connections to host
118 self._connmap = {} # map connections to host
119 self._readymap = {} # map connection to ready state
119 self._readymap = {} # map connection to ready state
120
120
121 def add(self, host, connection, ready):
121 def add(self, host, connection, ready):
122 self._lock.acquire()
122 self._lock.acquire()
123 try:
123 try:
124 if host not in self._hostmap:
124 if host not in self._hostmap:
125 self._hostmap[host] = []
125 self._hostmap[host] = []
126 self._hostmap[host].append(connection)
126 self._hostmap[host].append(connection)
127 self._connmap[connection] = host
127 self._connmap[connection] = host
128 self._readymap[connection] = ready
128 self._readymap[connection] = ready
129 finally:
129 finally:
130 self._lock.release()
130 self._lock.release()
131
131
132 def remove(self, connection):
132 def remove(self, connection):
133 self._lock.acquire()
133 self._lock.acquire()
134 try:
134 try:
135 try:
135 try:
136 host = self._connmap[connection]
136 host = self._connmap[connection]
137 except KeyError:
137 except KeyError:
138 pass
138 pass
139 else:
139 else:
140 del self._connmap[connection]
140 del self._connmap[connection]
141 del self._readymap[connection]
141 del self._readymap[connection]
142 self._hostmap[host].remove(connection)
142 self._hostmap[host].remove(connection)
143 if not self._hostmap[host]:
143 if not self._hostmap[host]:
144 del self._hostmap[host]
144 del self._hostmap[host]
145 finally:
145 finally:
146 self._lock.release()
146 self._lock.release()
147
147
148 def set_ready(self, connection, ready):
148 def set_ready(self, connection, ready):
149 try:
149 try:
150 self._readymap[connection] = ready
150 self._readymap[connection] = ready
151 except KeyError:
151 except KeyError:
152 pass
152 pass
153
153
154 def get_ready_conn(self, host):
154 def get_ready_conn(self, host):
155 conn = None
155 conn = None
156 self._lock.acquire()
156 self._lock.acquire()
157 try:
157 try:
158 if host in self._hostmap:
158 if host in self._hostmap:
159 for c in self._hostmap[host]:
159 for c in self._hostmap[host]:
160 if self._readymap[c]:
160 if self._readymap[c]:
161 self._readymap[c] = 0
161 self._readymap[c] = 0
162 conn = c
162 conn = c
163 break
163 break
164 finally:
164 finally:
165 self._lock.release()
165 self._lock.release()
166 return conn
166 return conn
167
167
168 def get_all(self, host=None):
168 def get_all(self, host=None):
169 if host:
169 if host:
170 return list(self._hostmap.get(host, []))
170 return list(self._hostmap.get(host, []))
171 else:
171 else:
172 return dict(self._hostmap)
172 return dict(self._hostmap)
173
173
174 class KeepAliveHandler(object):
174 class KeepAliveHandler(object):
175 def __init__(self):
175 def __init__(self):
176 self._cm = ConnectionManager()
176 self._cm = ConnectionManager()
177 self.requestscount = 0
178 self.sentbytescount = 0
177
179
178 #### Connection Management
180 #### Connection Management
179 def open_connections(self):
181 def open_connections(self):
180 """return a list of connected hosts and the number of connections
182 """return a list of connected hosts and the number of connections
181 to each. [('foo.com:80', 2), ('bar.org', 1)]"""
183 to each. [('foo.com:80', 2), ('bar.org', 1)]"""
182 return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
184 return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
183
185
184 def close_connection(self, host):
186 def close_connection(self, host):
185 """close connection(s) to <host>
187 """close connection(s) to <host>
186 host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
188 host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
187 no error occurs if there is no connection to that host."""
189 no error occurs if there is no connection to that host."""
188 for h in self._cm.get_all(host):
190 for h in self._cm.get_all(host):
189 self._cm.remove(h)
191 self._cm.remove(h)
190 h.close()
192 h.close()
191
193
192 def close_all(self):
194 def close_all(self):
193 """close all open connections"""
195 """close all open connections"""
194 for host, conns in self._cm.get_all().iteritems():
196 for host, conns in self._cm.get_all().iteritems():
195 for h in conns:
197 for h in conns:
196 self._cm.remove(h)
198 self._cm.remove(h)
197 h.close()
199 h.close()
198
200
199 def _request_closed(self, request, host, connection):
201 def _request_closed(self, request, host, connection):
200 """tells us that this request is now closed and that the
202 """tells us that this request is now closed and that the
201 connection is ready for another request"""
203 connection is ready for another request"""
202 self._cm.set_ready(connection, 1)
204 self._cm.set_ready(connection, 1)
203
205
204 def _remove_connection(self, host, connection, close=0):
206 def _remove_connection(self, host, connection, close=0):
205 if close:
207 if close:
206 connection.close()
208 connection.close()
207 self._cm.remove(connection)
209 self._cm.remove(connection)
208
210
209 #### Transaction Execution
211 #### Transaction Execution
210 def http_open(self, req):
212 def http_open(self, req):
211 return self.do_open(HTTPConnection, req)
213 return self.do_open(HTTPConnection, req)
212
214
213 def do_open(self, http_class, req):
215 def do_open(self, http_class, req):
214 host = urllibcompat.gethost(req)
216 host = urllibcompat.gethost(req)
215 if not host:
217 if not host:
216 raise urlerr.urlerror('no host given')
218 raise urlerr.urlerror('no host given')
217
219
218 try:
220 try:
219 h = self._cm.get_ready_conn(host)
221 h = self._cm.get_ready_conn(host)
220 while h:
222 while h:
221 r = self._reuse_connection(h, req, host)
223 r = self._reuse_connection(h, req, host)
222
224
223 # if this response is non-None, then it worked and we're
225 # if this response is non-None, then it worked and we're
224 # done. Break out, skipping the else block.
226 # done. Break out, skipping the else block.
225 if r:
227 if r:
226 break
228 break
227
229
228 # connection is bad - possibly closed by server
230 # connection is bad - possibly closed by server
229 # discard it and ask for the next free connection
231 # discard it and ask for the next free connection
230 h.close()
232 h.close()
231 self._cm.remove(h)
233 self._cm.remove(h)
232 h = self._cm.get_ready_conn(host)
234 h = self._cm.get_ready_conn(host)
233 else:
235 else:
234 # no (working) free connections were found. Create a new one.
236 # no (working) free connections were found. Create a new one.
235 h = http_class(host)
237 h = http_class(host)
236 if DEBUG:
238 if DEBUG:
237 DEBUG.info("creating new connection to %s (%d)",
239 DEBUG.info("creating new connection to %s (%d)",
238 host, id(h))
240 host, id(h))
239 self._cm.add(host, h, 0)
241 self._cm.add(host, h, 0)
240 self._start_transaction(h, req)
242 self._start_transaction(h, req)
241 r = h.getresponse()
243 r = h.getresponse()
242 # The string form of BadStatusLine is the status line. Add some context
244 # The string form of BadStatusLine is the status line. Add some context
243 # to make the error message slightly more useful.
245 # to make the error message slightly more useful.
244 except httplib.BadStatusLine as err:
246 except httplib.BadStatusLine as err:
245 raise urlerr.urlerror(
247 raise urlerr.urlerror(
246 _('bad HTTP status line: %s') % pycompat.sysbytes(err.line))
248 _('bad HTTP status line: %s') % pycompat.sysbytes(err.line))
247 except (socket.error, httplib.HTTPException) as err:
249 except (socket.error, httplib.HTTPException) as err:
248 raise urlerr.urlerror(err)
250 raise urlerr.urlerror(err)
249
251
250 # If not a persistent connection, don't try to reuse it. Look
252 # If not a persistent connection, don't try to reuse it. Look
251 # for this using getattr() since vcr doesn't define this
253 # for this using getattr() since vcr doesn't define this
252 # attribute, and in that case always close the connection.
254 # attribute, and in that case always close the connection.
253 if getattr(r, r'will_close', True):
255 if getattr(r, r'will_close', True):
254 self._cm.remove(h)
256 self._cm.remove(h)
255
257
256 if DEBUG:
258 if DEBUG:
257 DEBUG.info("STATUS: %s, %s", r.status, r.reason)
259 DEBUG.info("STATUS: %s, %s", r.status, r.reason)
258 r._handler = self
260 r._handler = self
259 r._host = host
261 r._host = host
260 r._url = req.get_full_url()
262 r._url = req.get_full_url()
261 r._connection = h
263 r._connection = h
262 r.code = r.status
264 r.code = r.status
263 r.headers = r.msg
265 r.headers = r.msg
264 r.msg = r.reason
266 r.msg = r.reason
265
267
266 return r
268 return r
267
269
268 def _reuse_connection(self, h, req, host):
270 def _reuse_connection(self, h, req, host):
269 """start the transaction with a re-used connection
271 """start the transaction with a re-used connection
270 return a response object (r) upon success or None on failure.
272 return a response object (r) upon success or None on failure.
271 This DOES not close or remove bad connections in cases where
273 This DOES not close or remove bad connections in cases where
272 it returns. However, if an unexpected exception occurs, it
274 it returns. However, if an unexpected exception occurs, it
273 will close and remove the connection before re-raising.
275 will close and remove the connection before re-raising.
274 """
276 """
275 try:
277 try:
276 self._start_transaction(h, req)
278 self._start_transaction(h, req)
277 r = h.getresponse()
279 r = h.getresponse()
278 # note: just because we got something back doesn't mean it
280 # note: just because we got something back doesn't mean it
279 # worked. We'll check the version below, too.
281 # worked. We'll check the version below, too.
280 except (socket.error, httplib.HTTPException):
282 except (socket.error, httplib.HTTPException):
281 r = None
283 r = None
282 except: # re-raises
284 except: # re-raises
283 # adding this block just in case we've missed
285 # adding this block just in case we've missed
284 # something we will still raise the exception, but
286 # something we will still raise the exception, but
285 # lets try and close the connection and remove it
287 # lets try and close the connection and remove it
286 # first. We previously got into a nasty loop
288 # first. We previously got into a nasty loop
287 # where an exception was uncaught, and so the
289 # where an exception was uncaught, and so the
288 # connection stayed open. On the next try, the
290 # connection stayed open. On the next try, the
289 # same exception was raised, etc. The trade-off is
291 # same exception was raised, etc. The trade-off is
290 # that it's now possible this call will raise
292 # that it's now possible this call will raise
291 # a DIFFERENT exception
293 # a DIFFERENT exception
292 if DEBUG:
294 if DEBUG:
293 DEBUG.error("unexpected exception - closing "
295 DEBUG.error("unexpected exception - closing "
294 "connection to %s (%d)", host, id(h))
296 "connection to %s (%d)", host, id(h))
295 self._cm.remove(h)
297 self._cm.remove(h)
296 h.close()
298 h.close()
297 raise
299 raise
298
300
299 if r is None or r.version == 9:
301 if r is None or r.version == 9:
300 # httplib falls back to assuming HTTP 0.9 if it gets a
302 # httplib falls back to assuming HTTP 0.9 if it gets a
301 # bad header back. This is most likely to happen if
303 # bad header back. This is most likely to happen if
302 # the socket has been closed by the server since we
304 # the socket has been closed by the server since we
303 # last used the connection.
305 # last used the connection.
304 if DEBUG:
306 if DEBUG:
305 DEBUG.info("failed to re-use connection to %s (%d)",
307 DEBUG.info("failed to re-use connection to %s (%d)",
306 host, id(h))
308 host, id(h))
307 r = None
309 r = None
308 else:
310 else:
309 if DEBUG:
311 if DEBUG:
310 DEBUG.info("re-using connection to %s (%d)", host, id(h))
312 DEBUG.info("re-using connection to %s (%d)", host, id(h))
311
313
312 return r
314 return r
313
315
314 def _start_transaction(self, h, req):
316 def _start_transaction(self, h, req):
317 oldbytescount = h.sentbytescount
318
315 # What follows mostly reimplements HTTPConnection.request()
319 # What follows mostly reimplements HTTPConnection.request()
316 # except it adds self.parent.addheaders in the mix and sends headers
320 # except it adds self.parent.addheaders in the mix and sends headers
317 # in a deterministic order (to make testing easier).
321 # in a deterministic order (to make testing easier).
318 headers = util.sortdict(self.parent.addheaders)
322 headers = util.sortdict(self.parent.addheaders)
319 headers.update(sorted(req.headers.items()))
323 headers.update(sorted(req.headers.items()))
320 headers.update(sorted(req.unredirected_hdrs.items()))
324 headers.update(sorted(req.unredirected_hdrs.items()))
321 headers = util.sortdict((n.lower(), v) for n, v in headers.items())
325 headers = util.sortdict((n.lower(), v) for n, v in headers.items())
322 skipheaders = {}
326 skipheaders = {}
323 for n in (r'host', r'accept-encoding'):
327 for n in (r'host', r'accept-encoding'):
324 if n in headers:
328 if n in headers:
325 skipheaders[r'skip_' + n.replace(r'-', r'_')] = 1
329 skipheaders[r'skip_' + n.replace(r'-', r'_')] = 1
326 try:
330 try:
327 if urllibcompat.hasdata(req):
331 if urllibcompat.hasdata(req):
328 data = urllibcompat.getdata(req)
332 data = urllibcompat.getdata(req)
329 h.putrequest(
333 h.putrequest(
330 req.get_method(), urllibcompat.getselector(req),
334 req.get_method(), urllibcompat.getselector(req),
331 **skipheaders)
335 **skipheaders)
332 if r'content-type' not in headers:
336 if r'content-type' not in headers:
333 h.putheader(r'Content-type',
337 h.putheader(r'Content-type',
334 r'application/x-www-form-urlencoded')
338 r'application/x-www-form-urlencoded')
335 if r'content-length' not in headers:
339 if r'content-length' not in headers:
336 h.putheader(r'Content-length', r'%d' % len(data))
340 h.putheader(r'Content-length', r'%d' % len(data))
337 else:
341 else:
338 h.putrequest(
342 h.putrequest(
339 req.get_method(), urllibcompat.getselector(req),
343 req.get_method(), urllibcompat.getselector(req),
340 **skipheaders)
344 **skipheaders)
341 except socket.error as err:
345 except socket.error as err:
342 raise urlerr.urlerror(err)
346 raise urlerr.urlerror(err)
343 for k, v in headers.items():
347 for k, v in headers.items():
344 h.putheader(k, v)
348 h.putheader(k, v)
345 h.endheaders()
349 h.endheaders()
346 if urllibcompat.hasdata(req):
350 if urllibcompat.hasdata(req):
347 h.send(data)
351 h.send(data)
348
352
353 # This will fail to record events in case of I/O failure. That's OK.
354 self.requestscount += 1
355 self.sentbytescount += h.sentbytescount - oldbytescount
356
357 try:
358 self.parent.requestscount += 1
359 self.parent.sentbytescount += h.sentbytescount - oldbytescount
360 except AttributeError:
361 pass
362
349 class HTTPHandler(KeepAliveHandler, urlreq.httphandler):
363 class HTTPHandler(KeepAliveHandler, urlreq.httphandler):
350 pass
364 pass
351
365
352 class HTTPResponse(httplib.HTTPResponse):
366 class HTTPResponse(httplib.HTTPResponse):
353 # we need to subclass HTTPResponse in order to
367 # we need to subclass HTTPResponse in order to
354 # 1) add readline(), readlines(), and readinto() methods
368 # 1) add readline(), readlines(), and readinto() methods
355 # 2) add close_connection() methods
369 # 2) add close_connection() methods
356 # 3) add info() and geturl() methods
370 # 3) add info() and geturl() methods
357
371
358 # in order to add readline(), read must be modified to deal with a
372 # in order to add readline(), read must be modified to deal with a
359 # buffer. example: readline must read a buffer and then spit back
373 # buffer. example: readline must read a buffer and then spit back
360 # one line at a time. The only real alternative is to read one
374 # one line at a time. The only real alternative is to read one
361 # BYTE at a time (ick). Once something has been read, it can't be
375 # BYTE at a time (ick). Once something has been read, it can't be
362 # put back (ok, maybe it can, but that's even uglier than this),
376 # put back (ok, maybe it can, but that's even uglier than this),
363 # so if you THEN do a normal read, you must first take stuff from
377 # so if you THEN do a normal read, you must first take stuff from
364 # the buffer.
378 # the buffer.
365
379
366 # the read method wraps the original to accommodate buffering,
380 # the read method wraps the original to accommodate buffering,
367 # although read() never adds to the buffer.
381 # although read() never adds to the buffer.
368 # Both readline and readlines have been stolen with almost no
382 # Both readline and readlines have been stolen with almost no
369 # modification from socket.py
383 # modification from socket.py
370
384
371
385
372 def __init__(self, sock, debuglevel=0, strict=0, method=None):
386 def __init__(self, sock, debuglevel=0, strict=0, method=None):
373 extrakw = {}
387 extrakw = {}
374 if not pycompat.ispy3:
388 if not pycompat.ispy3:
375 extrakw[r'strict'] = True
389 extrakw[r'strict'] = True
376 extrakw[r'buffering'] = True
390 extrakw[r'buffering'] = True
377 httplib.HTTPResponse.__init__(self, sock, debuglevel=debuglevel,
391 httplib.HTTPResponse.__init__(self, sock, debuglevel=debuglevel,
378 method=method, **extrakw)
392 method=method, **extrakw)
379 self.fileno = sock.fileno
393 self.fileno = sock.fileno
380 self.code = None
394 self.code = None
381 self._rbuf = ''
395 self._rbuf = ''
382 self._rbufsize = 8096
396 self._rbufsize = 8096
383 self._handler = None # inserted by the handler later
397 self._handler = None # inserted by the handler later
384 self._host = None # (same)
398 self._host = None # (same)
385 self._url = None # (same)
399 self._url = None # (same)
386 self._connection = None # (same)
400 self._connection = None # (same)
387
401
388 _raw_read = httplib.HTTPResponse.read
402 _raw_read = httplib.HTTPResponse.read
389 _raw_readinto = getattr(httplib.HTTPResponse, 'readinto', None)
403 _raw_readinto = getattr(httplib.HTTPResponse, 'readinto', None)
390
404
391 def close(self):
405 def close(self):
392 if self.fp:
406 if self.fp:
393 self.fp.close()
407 self.fp.close()
394 self.fp = None
408 self.fp = None
395 if self._handler:
409 if self._handler:
396 self._handler._request_closed(self, self._host,
410 self._handler._request_closed(self, self._host,
397 self._connection)
411 self._connection)
398
412
399 def close_connection(self):
413 def close_connection(self):
400 self._handler._remove_connection(self._host, self._connection, close=1)
414 self._handler._remove_connection(self._host, self._connection, close=1)
401 self.close()
415 self.close()
402
416
403 def info(self):
417 def info(self):
404 return self.headers
418 return self.headers
405
419
406 def geturl(self):
420 def geturl(self):
407 return self._url
421 return self._url
408
422
409 def read(self, amt=None):
423 def read(self, amt=None):
410 # the _rbuf test is only in this first if for speed. It's not
424 # the _rbuf test is only in this first if for speed. It's not
411 # logically necessary
425 # logically necessary
412 if self._rbuf and amt is not None:
426 if self._rbuf and amt is not None:
413 L = len(self._rbuf)
427 L = len(self._rbuf)
414 if amt > L:
428 if amt > L:
415 amt -= L
429 amt -= L
416 else:
430 else:
417 s = self._rbuf[:amt]
431 s = self._rbuf[:amt]
418 self._rbuf = self._rbuf[amt:]
432 self._rbuf = self._rbuf[amt:]
419 return s
433 return s
420 # Careful! http.client.HTTPResponse.read() on Python 3 is
434 # Careful! http.client.HTTPResponse.read() on Python 3 is
421 # implemented using readinto(), which can duplicate self._rbuf
435 # implemented using readinto(), which can duplicate self._rbuf
422 # if it's not empty.
436 # if it's not empty.
423 s = self._rbuf
437 s = self._rbuf
424 self._rbuf = ''
438 self._rbuf = ''
425 s += self._raw_read(amt)
439 s += self._raw_read(amt)
426 return s
440 return s
427
441
428 # stolen from Python SVN #68532 to fix issue1088
442 # stolen from Python SVN #68532 to fix issue1088
429 def _read_chunked(self, amt):
443 def _read_chunked(self, amt):
430 chunk_left = self.chunk_left
444 chunk_left = self.chunk_left
431 parts = []
445 parts = []
432
446
433 while True:
447 while True:
434 if chunk_left is None:
448 if chunk_left is None:
435 line = self.fp.readline()
449 line = self.fp.readline()
436 i = line.find(';')
450 i = line.find(';')
437 if i >= 0:
451 if i >= 0:
438 line = line[:i] # strip chunk-extensions
452 line = line[:i] # strip chunk-extensions
439 try:
453 try:
440 chunk_left = int(line, 16)
454 chunk_left = int(line, 16)
441 except ValueError:
455 except ValueError:
442 # close the connection as protocol synchronization is
456 # close the connection as protocol synchronization is
443 # probably lost
457 # probably lost
444 self.close()
458 self.close()
445 raise httplib.IncompleteRead(''.join(parts))
459 raise httplib.IncompleteRead(''.join(parts))
446 if chunk_left == 0:
460 if chunk_left == 0:
447 break
461 break
448 if amt is None:
462 if amt is None:
449 parts.append(self._safe_read(chunk_left))
463 parts.append(self._safe_read(chunk_left))
450 elif amt < chunk_left:
464 elif amt < chunk_left:
451 parts.append(self._safe_read(amt))
465 parts.append(self._safe_read(amt))
452 self.chunk_left = chunk_left - amt
466 self.chunk_left = chunk_left - amt
453 return ''.join(parts)
467 return ''.join(parts)
454 elif amt == chunk_left:
468 elif amt == chunk_left:
455 parts.append(self._safe_read(amt))
469 parts.append(self._safe_read(amt))
456 self._safe_read(2) # toss the CRLF at the end of the chunk
470 self._safe_read(2) # toss the CRLF at the end of the chunk
457 self.chunk_left = None
471 self.chunk_left = None
458 return ''.join(parts)
472 return ''.join(parts)
459 else:
473 else:
460 parts.append(self._safe_read(chunk_left))
474 parts.append(self._safe_read(chunk_left))
461 amt -= chunk_left
475 amt -= chunk_left
462
476
463 # we read the whole chunk, get another
477 # we read the whole chunk, get another
464 self._safe_read(2) # toss the CRLF at the end of the chunk
478 self._safe_read(2) # toss the CRLF at the end of the chunk
465 chunk_left = None
479 chunk_left = None
466
480
467 # read and discard trailer up to the CRLF terminator
481 # read and discard trailer up to the CRLF terminator
468 ### note: we shouldn't have any trailers!
482 ### note: we shouldn't have any trailers!
469 while True:
483 while True:
470 line = self.fp.readline()
484 line = self.fp.readline()
471 if not line:
485 if not line:
472 # a vanishingly small number of sites EOF without
486 # a vanishingly small number of sites EOF without
473 # sending the trailer
487 # sending the trailer
474 break
488 break
475 if line == '\r\n':
489 if line == '\r\n':
476 break
490 break
477
491
478 # we read everything; close the "file"
492 # we read everything; close the "file"
479 self.close()
493 self.close()
480
494
481 return ''.join(parts)
495 return ''.join(parts)
482
496
483 def readline(self):
497 def readline(self):
484 # Fast path for a line is already available in read buffer.
498 # Fast path for a line is already available in read buffer.
485 i = self._rbuf.find('\n')
499 i = self._rbuf.find('\n')
486 if i >= 0:
500 if i >= 0:
487 i += 1
501 i += 1
488 line = self._rbuf[:i]
502 line = self._rbuf[:i]
489 self._rbuf = self._rbuf[i:]
503 self._rbuf = self._rbuf[i:]
490 return line
504 return line
491
505
492 # No newline in local buffer. Read until we find one.
506 # No newline in local buffer. Read until we find one.
493 chunks = [self._rbuf]
507 chunks = [self._rbuf]
494 i = -1
508 i = -1
495 readsize = self._rbufsize
509 readsize = self._rbufsize
496 while True:
510 while True:
497 new = self._raw_read(readsize)
511 new = self._raw_read(readsize)
498 if not new:
512 if not new:
499 break
513 break
500
514
501 chunks.append(new)
515 chunks.append(new)
502 i = new.find('\n')
516 i = new.find('\n')
503 if i >= 0:
517 if i >= 0:
504 break
518 break
505
519
506 # We either have exhausted the stream or have a newline in chunks[-1].
520 # We either have exhausted the stream or have a newline in chunks[-1].
507
521
508 # EOF
522 # EOF
509 if i == -1:
523 if i == -1:
510 self._rbuf = ''
524 self._rbuf = ''
511 return ''.join(chunks)
525 return ''.join(chunks)
512
526
513 i += 1
527 i += 1
514 self._rbuf = chunks[-1][i:]
528 self._rbuf = chunks[-1][i:]
515 chunks[-1] = chunks[-1][:i]
529 chunks[-1] = chunks[-1][:i]
516 return ''.join(chunks)
530 return ''.join(chunks)
517
531
518 def readlines(self, sizehint=0):
532 def readlines(self, sizehint=0):
519 total = 0
533 total = 0
520 list = []
534 list = []
521 while True:
535 while True:
522 line = self.readline()
536 line = self.readline()
523 if not line:
537 if not line:
524 break
538 break
525 list.append(line)
539 list.append(line)
526 total += len(line)
540 total += len(line)
527 if sizehint and total >= sizehint:
541 if sizehint and total >= sizehint:
528 break
542 break
529 return list
543 return list
530
544
531 def readinto(self, dest):
545 def readinto(self, dest):
532 if self._raw_readinto is None:
546 if self._raw_readinto is None:
533 res = self.read(len(dest))
547 res = self.read(len(dest))
534 if not res:
548 if not res:
535 return 0
549 return 0
536 dest[0:len(res)] = res
550 dest[0:len(res)] = res
537 return len(res)
551 return len(res)
538 total = len(dest)
552 total = len(dest)
539 have = len(self._rbuf)
553 have = len(self._rbuf)
540 if have >= total:
554 if have >= total:
541 dest[0:total] = self._rbuf[:total]
555 dest[0:total] = self._rbuf[:total]
542 self._rbuf = self._rbuf[total:]
556 self._rbuf = self._rbuf[total:]
543 return total
557 return total
544 mv = memoryview(dest)
558 mv = memoryview(dest)
545 got = self._raw_readinto(mv[have:total])
559 got = self._raw_readinto(mv[have:total])
546 dest[0:have] = self._rbuf
560 dest[0:have] = self._rbuf
547 got += len(self._rbuf)
561 got += len(self._rbuf)
548 self._rbuf = ''
562 self._rbuf = ''
549 return got
563 return got
550
564
551 def safesend(self, str):
565 def safesend(self, str):
552 """Send `str' to the server.
566 """Send `str' to the server.
553
567
554 Shamelessly ripped off from httplib to patch a bad behavior.
568 Shamelessly ripped off from httplib to patch a bad behavior.
555 """
569 """
556 # _broken_pipe_resp is an attribute we set in this function
570 # _broken_pipe_resp is an attribute we set in this function
557 # if the socket is closed while we're sending data but
571 # if the socket is closed while we're sending data but
558 # the server sent us a response before hanging up.
572 # the server sent us a response before hanging up.
559 # In that case, we want to pretend to send the rest of the
573 # In that case, we want to pretend to send the rest of the
560 # outgoing data, and then let the user use getresponse()
574 # outgoing data, and then let the user use getresponse()
561 # (which we wrap) to get this last response before
575 # (which we wrap) to get this last response before
562 # opening a new socket.
576 # opening a new socket.
563 if getattr(self, '_broken_pipe_resp', None) is not None:
577 if getattr(self, '_broken_pipe_resp', None) is not None:
564 return
578 return
565
579
566 if self.sock is None:
580 if self.sock is None:
567 if self.auto_open:
581 if self.auto_open:
568 self.connect()
582 self.connect()
569 else:
583 else:
570 raise httplib.NotConnected
584 raise httplib.NotConnected
571
585
572 # send the data to the server. if we get a broken pipe, then close
586 # send the data to the server. if we get a broken pipe, then close
573 # the socket. we want to reconnect when somebody tries to send again.
587 # the socket. we want to reconnect when somebody tries to send again.
574 #
588 #
575 # NOTE: we DO propagate the error, though, because we cannot simply
589 # NOTE: we DO propagate the error, though, because we cannot simply
576 # ignore the error... the caller will know if they can retry.
590 # ignore the error... the caller will know if they can retry.
577 if self.debuglevel > 0:
591 if self.debuglevel > 0:
578 print("send:", repr(str))
592 print("send:", repr(str))
579 try:
593 try:
580 blocksize = 8192
594 blocksize = 8192
581 read = getattr(str, 'read', None)
595 read = getattr(str, 'read', None)
582 if read is not None:
596 if read is not None:
583 if self.debuglevel > 0:
597 if self.debuglevel > 0:
584 print("sending a read()able")
598 print("sending a read()able")
585 data = read(blocksize)
599 data = read(blocksize)
586 while data:
600 while data:
587 self.sock.sendall(data)
601 self.sock.sendall(data)
602 self.sentbytescount += len(data)
588 data = read(blocksize)
603 data = read(blocksize)
589 else:
604 else:
590 self.sock.sendall(str)
605 self.sock.sendall(str)
606 self.sentbytescount += len(str)
591 except socket.error as v:
607 except socket.error as v:
592 reraise = True
608 reraise = True
593 if v[0] == errno.EPIPE: # Broken pipe
609 if v[0] == errno.EPIPE: # Broken pipe
594 if self._HTTPConnection__state == httplib._CS_REQ_SENT:
610 if self._HTTPConnection__state == httplib._CS_REQ_SENT:
595 self._broken_pipe_resp = None
611 self._broken_pipe_resp = None
596 self._broken_pipe_resp = self.getresponse()
612 self._broken_pipe_resp = self.getresponse()
597 reraise = False
613 reraise = False
598 self.close()
614 self.close()
599 if reraise:
615 if reraise:
600 raise
616 raise
601
617
602 def wrapgetresponse(cls):
618 def wrapgetresponse(cls):
603 """Wraps getresponse in cls with a broken-pipe sane version.
619 """Wraps getresponse in cls with a broken-pipe sane version.
604 """
620 """
605 def safegetresponse(self):
621 def safegetresponse(self):
606 # In safesend() we might set the _broken_pipe_resp
622 # In safesend() we might set the _broken_pipe_resp
607 # attribute, in which case the socket has already
623 # attribute, in which case the socket has already
608 # been closed and we just need to give them the response
624 # been closed and we just need to give them the response
609 # back. Otherwise, we use the normal response path.
625 # back. Otherwise, we use the normal response path.
610 r = getattr(self, '_broken_pipe_resp', None)
626 r = getattr(self, '_broken_pipe_resp', None)
611 if r is not None:
627 if r is not None:
612 return r
628 return r
613 return cls.getresponse(self)
629 return cls.getresponse(self)
614 safegetresponse.__doc__ = cls.getresponse.__doc__
630 safegetresponse.__doc__ = cls.getresponse.__doc__
615 return safegetresponse
631 return safegetresponse
616
632
617 class HTTPConnection(httplib.HTTPConnection):
633 class HTTPConnection(httplib.HTTPConnection):
618 # url.httpsconnection inherits from this. So when adding/removing
634 # url.httpsconnection inherits from this. So when adding/removing
619 # attributes, be sure to audit httpsconnection() for unintended
635 # attributes, be sure to audit httpsconnection() for unintended
620 # consequences.
636 # consequences.
621
637
622 # use the modified response class
638 # use the modified response class
623 response_class = HTTPResponse
639 response_class = HTTPResponse
624 send = safesend
640 send = safesend
625 getresponse = wrapgetresponse(httplib.HTTPConnection)
641 getresponse = wrapgetresponse(httplib.HTTPConnection)
626
642
643 def __init__(self, *args, **kwargs):
644 httplib.HTTPConnection.__init__(self, *args, **kwargs)
645 self.sentbytescount = 0
627
646
628 #########################################################################
647 #########################################################################
629 ##### TEST FUNCTIONS
648 ##### TEST FUNCTIONS
630 #########################################################################
649 #########################################################################
631
650
632
651
633 def continuity(url):
652 def continuity(url):
634 md5 = hashlib.md5
653 md5 = hashlib.md5
635 format = '%25s: %s'
654 format = '%25s: %s'
636
655
637 # first fetch the file with the normal http handler
656 # first fetch the file with the normal http handler
638 opener = urlreq.buildopener()
657 opener = urlreq.buildopener()
639 urlreq.installopener(opener)
658 urlreq.installopener(opener)
640 fo = urlreq.urlopen(url)
659 fo = urlreq.urlopen(url)
641 foo = fo.read()
660 foo = fo.read()
642 fo.close()
661 fo.close()
643 m = md5(foo)
662 m = md5(foo)
644 print(format % ('normal urllib', node.hex(m.digest())))
663 print(format % ('normal urllib', node.hex(m.digest())))
645
664
646 # now install the keepalive handler and try again
665 # now install the keepalive handler and try again
647 opener = urlreq.buildopener(HTTPHandler())
666 opener = urlreq.buildopener(HTTPHandler())
648 urlreq.installopener(opener)
667 urlreq.installopener(opener)
649
668
650 fo = urlreq.urlopen(url)
669 fo = urlreq.urlopen(url)
651 foo = fo.read()
670 foo = fo.read()
652 fo.close()
671 fo.close()
653 m = md5(foo)
672 m = md5(foo)
654 print(format % ('keepalive read', node.hex(m.digest())))
673 print(format % ('keepalive read', node.hex(m.digest())))
655
674
656 fo = urlreq.urlopen(url)
675 fo = urlreq.urlopen(url)
657 foo = ''
676 foo = ''
658 while True:
677 while True:
659 f = fo.readline()
678 f = fo.readline()
660 if f:
679 if f:
661 foo = foo + f
680 foo = foo + f
662 else:
681 else:
663 break
682 break
664 fo.close()
683 fo.close()
665 m = md5(foo)
684 m = md5(foo)
666 print(format % ('keepalive readline', node.hex(m.digest())))
685 print(format % ('keepalive readline', node.hex(m.digest())))
667
686
668 def comp(N, url):
687 def comp(N, url):
669 print(' making %i connections to:\n %s' % (N, url))
688 print(' making %i connections to:\n %s' % (N, url))
670
689
671 procutil.stdout.write(' first using the normal urllib handlers')
690 procutil.stdout.write(' first using the normal urllib handlers')
672 # first use normal opener
691 # first use normal opener
673 opener = urlreq.buildopener()
692 opener = urlreq.buildopener()
674 urlreq.installopener(opener)
693 urlreq.installopener(opener)
675 t1 = fetch(N, url)
694 t1 = fetch(N, url)
676 print(' TIME: %.3f s' % t1)
695 print(' TIME: %.3f s' % t1)
677
696
678 procutil.stdout.write(' now using the keepalive handler ')
697 procutil.stdout.write(' now using the keepalive handler ')
679 # now install the keepalive handler and try again
698 # now install the keepalive handler and try again
680 opener = urlreq.buildopener(HTTPHandler())
699 opener = urlreq.buildopener(HTTPHandler())
681 urlreq.installopener(opener)
700 urlreq.installopener(opener)
682 t2 = fetch(N, url)
701 t2 = fetch(N, url)
683 print(' TIME: %.3f s' % t2)
702 print(' TIME: %.3f s' % t2)
684 print(' improvement factor: %.2f' % (t1 / t2))
703 print(' improvement factor: %.2f' % (t1 / t2))
685
704
686 def fetch(N, url, delay=0):
705 def fetch(N, url, delay=0):
687 import time
706 import time
688 lens = []
707 lens = []
689 starttime = time.time()
708 starttime = time.time()
690 for i in range(N):
709 for i in range(N):
691 if delay and i > 0:
710 if delay and i > 0:
692 time.sleep(delay)
711 time.sleep(delay)
693 fo = urlreq.urlopen(url)
712 fo = urlreq.urlopen(url)
694 foo = fo.read()
713 foo = fo.read()
695 fo.close()
714 fo.close()
696 lens.append(len(foo))
715 lens.append(len(foo))
697 diff = time.time() - starttime
716 diff = time.time() - starttime
698
717
699 j = 0
718 j = 0
700 for i in lens[1:]:
719 for i in lens[1:]:
701 j = j + 1
720 j = j + 1
702 if not i == lens[0]:
721 if not i == lens[0]:
703 print("WARNING: inconsistent length on read %i: %i" % (j, i))
722 print("WARNING: inconsistent length on read %i: %i" % (j, i))
704
723
705 return diff
724 return diff
706
725
707 def test_timeout(url):
726 def test_timeout(url):
708 global DEBUG
727 global DEBUG
709 dbbackup = DEBUG
728 dbbackup = DEBUG
710 class FakeLogger(object):
729 class FakeLogger(object):
711 def debug(self, msg, *args):
730 def debug(self, msg, *args):
712 print(msg % args)
731 print(msg % args)
713 info = warning = error = debug
732 info = warning = error = debug
714 DEBUG = FakeLogger()
733 DEBUG = FakeLogger()
715 print(" fetching the file to establish a connection")
734 print(" fetching the file to establish a connection")
716 fo = urlreq.urlopen(url)
735 fo = urlreq.urlopen(url)
717 data1 = fo.read()
736 data1 = fo.read()
718 fo.close()
737 fo.close()
719
738
720 i = 20
739 i = 20
721 print(" waiting %i seconds for the server to close the connection" % i)
740 print(" waiting %i seconds for the server to close the connection" % i)
722 while i > 0:
741 while i > 0:
723 procutil.stdout.write('\r %2i' % i)
742 procutil.stdout.write('\r %2i' % i)
724 procutil.stdout.flush()
743 procutil.stdout.flush()
725 time.sleep(1)
744 time.sleep(1)
726 i -= 1
745 i -= 1
727 procutil.stderr.write('\r')
746 procutil.stderr.write('\r')
728
747
729 print(" fetching the file a second time")
748 print(" fetching the file a second time")
730 fo = urlreq.urlopen(url)
749 fo = urlreq.urlopen(url)
731 data2 = fo.read()
750 data2 = fo.read()
732 fo.close()
751 fo.close()
733
752
734 if data1 == data2:
753 if data1 == data2:
735 print(' data are identical')
754 print(' data are identical')
736 else:
755 else:
737 print(' ERROR: DATA DIFFER')
756 print(' ERROR: DATA DIFFER')
738
757
739 DEBUG = dbbackup
758 DEBUG = dbbackup
740
759
741
760
742 def test(url, N=10):
761 def test(url, N=10):
743 print("performing continuity test (making sure stuff isn't corrupted)")
762 print("performing continuity test (making sure stuff isn't corrupted)")
744 continuity(url)
763 continuity(url)
745 print('')
764 print('')
746 print("performing speed comparison")
765 print("performing speed comparison")
747 comp(N, url)
766 comp(N, url)
748 print('')
767 print('')
749 print("performing dropped-connection check")
768 print("performing dropped-connection check")
750 test_timeout(url)
769 test_timeout(url)
751
770
752 if __name__ == '__main__':
771 if __name__ == '__main__':
753 import time
772 import time
754 try:
773 try:
755 N = int(sys.argv[1])
774 N = int(sys.argv[1])
756 url = sys.argv[2]
775 url = sys.argv[2]
757 except (IndexError, ValueError):
776 except (IndexError, ValueError):
758 print("%s <integer> <url>" % sys.argv[0])
777 print("%s <integer> <url>" % sys.argv[0])
759 else:
778 else:
760 test(url, N)
779 test(url, N)
General Comments 0
You need to be logged in to leave comments. Login now