##// END OF EJS Templates
http: reuse Python's implementation of read/readline/readinto...
Joerg Sonnenberger -
r52722:c1ed5ee2 default
parent child Browse files
Show More
@@ -1,844 +1,707 b''
1 # This library is free software; you can redistribute it and/or
1 # This library is free software; you can redistribute it and/or
2 # modify it under the terms of the GNU Lesser General Public
2 # modify it under the terms of the GNU Lesser General Public
3 # License as published by the Free Software Foundation; either
3 # License as published by the Free Software Foundation; either
4 # version 2.1 of the License, or (at your option) any later version.
4 # version 2.1 of the License, or (at your option) any later version.
5 #
5 #
6 # This library is distributed in the hope that it will be useful,
6 # This library is distributed in the hope that it will be useful,
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9 # Lesser General Public License for more details.
9 # Lesser General Public License for more details.
10 #
10 #
11 # You should have received a copy of the GNU Lesser General Public
11 # You should have received a copy of the GNU Lesser General Public
12 # License along with this library; if not, see
12 # License along with this library; if not, see
13 # <http://www.gnu.org/licenses/>.
13 # <http://www.gnu.org/licenses/>.
14
14
15 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
15 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
16 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
16 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
17
17
18 # Modified by Benoit Boissinot:
18 # Modified by Benoit Boissinot:
19 # - fix for digest auth (inspired from urllib2.py @ Python v2.4)
19 # - fix for digest auth (inspired from urllib2.py @ Python v2.4)
20 # Modified by Dirkjan Ochtman:
20 # Modified by Dirkjan Ochtman:
21 # - import md5 function from a local util module
21 # - import md5 function from a local util module
22 # Modified by Augie Fackler:
22 # Modified by Augie Fackler:
23 # - add safesend method and use it to prevent broken pipe errors
23 # - add safesend method and use it to prevent broken pipe errors
24 # on large POST requests
24 # on large POST requests
25
25
26 """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
26 """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
27
27
28 >>> import urllib2
28 >>> import urllib2
29 >>> from keepalive import HTTPHandler
29 >>> from keepalive import HTTPHandler
30 >>> keepalive_handler = HTTPHandler()
30 >>> keepalive_handler = HTTPHandler()
31 >>> opener = urlreq.buildopener(keepalive_handler)
31 >>> opener = urlreq.buildopener(keepalive_handler)
32 >>> urlreq.installopener(opener)
32 >>> urlreq.installopener(opener)
33 >>>
33 >>>
34 >>> fo = urlreq.urlopen('http://www.python.org')
34 >>> fo = urlreq.urlopen('http://www.python.org')
35
35
36 If a connection to a given host is requested, and all of the existing
36 If a connection to a given host is requested, and all of the existing
37 connections are still in use, another connection will be opened. If
37 connections are still in use, another connection will be opened. If
38 the handler tries to use an existing connection but it fails in some
38 the handler tries to use an existing connection but it fails in some
39 way, it will be closed and removed from the pool.
39 way, it will be closed and removed from the pool.
40
40
41 To remove the handler, simply re-run build_opener with no arguments, and
41 To remove the handler, simply re-run build_opener with no arguments, and
42 install that opener.
42 install that opener.
43
43
44 You can explicitly close connections by using the close_connection()
44 You can explicitly close connections by using the close_connection()
45 method of the returned file-like object (described below) or you can
45 method of the returned file-like object (described below) or you can
46 use the handler methods:
46 use the handler methods:
47
47
48 close_connection(host)
48 close_connection(host)
49 close_all()
49 close_all()
50 open_connections()
50 open_connections()
51
51
52 NOTE: using the close_connection and close_all methods of the handler
52 NOTE: using the close_connection and close_all methods of the handler
53 should be done with care when using multiple threads.
53 should be done with care when using multiple threads.
54 * there is nothing that prevents another thread from creating new
54 * there is nothing that prevents another thread from creating new
55 connections immediately after connections are closed
55 connections immediately after connections are closed
56 * no checks are done to prevent in-use connections from being closed
56 * no checks are done to prevent in-use connections from being closed
57
57
58 >>> keepalive_handler.close_all()
58 >>> keepalive_handler.close_all()
59
59
60 EXTRA ATTRIBUTES AND METHODS
60 EXTRA ATTRIBUTES AND METHODS
61
61
62 Upon a status of 200, the object returned has a few additional
62 Upon a status of 200, the object returned has a few additional
63 attributes and methods, which should not be used if you want to
63 attributes and methods, which should not be used if you want to
64 remain consistent with the normal urllib2-returned objects:
64 remain consistent with the normal urllib2-returned objects:
65
65
66 close_connection() - close the connection to the host
66 close_connection() - close the connection to the host
67 readlines() - you know, readlines()
67 readlines() - you know, readlines()
68 status - the return status (i.e. 404)
68 status - the return status (i.e. 404)
69 reason - english translation of status (i.e. 'File not found')
69 reason - english translation of status (i.e. 'File not found')
70
70
71 If you want the best of both worlds, use this inside an
71 If you want the best of both worlds, use this inside an
72 AttributeError-catching try:
72 AttributeError-catching try:
73
73
74 >>> try: status = fo.status
74 >>> try: status = fo.status
75 >>> except AttributeError: status = None
75 >>> except AttributeError: status = None
76
76
77 Unfortunately, these are ONLY there if status == 200, so it's not
77 Unfortunately, these are ONLY there if status == 200, so it's not
78 easy to distinguish between non-200 responses. The reason is that
78 easy to distinguish between non-200 responses. The reason is that
79 urllib2 tries to do clever things with error codes 301, 302, 401,
79 urllib2 tries to do clever things with error codes 301, 302, 401,
80 and 407, and it wraps the object upon return.
80 and 407, and it wraps the object upon return.
81 """
81 """
82
82
83 # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $
83 # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $
84
84
85
85
86 import collections
86 import collections
87 import hashlib
87 import hashlib
88 import socket
88 import socket
89 import sys
89 import sys
90 import threading
90 import threading
91
91
92 from .i18n import _
92 from .i18n import _
93 from .node import hex
93 from .node import hex
94 from . import (
94 from . import (
95 pycompat,
95 pycompat,
96 urllibcompat,
96 urllibcompat,
97 util,
97 util,
98 )
98 )
99 from .utils import procutil
99 from .utils import procutil
100
100
101 httplib = util.httplib
101 httplib = util.httplib
102 urlerr = util.urlerr
102 urlerr = util.urlerr
103 urlreq = util.urlreq
103 urlreq = util.urlreq
104
104
105 DEBUG = None
105 DEBUG = None
106
106
107
107
108 class ConnectionManager:
108 class ConnectionManager:
109 """
109 """
110 The connection manager must be able to:
110 The connection manager must be able to:
111 * keep track of all existing
111 * keep track of all existing
112 """
112 """
113
113
114 def __init__(self):
114 def __init__(self):
115 self._lock = threading.Lock()
115 self._lock = threading.Lock()
116 self._hostmap = collections.defaultdict(list) # host -> [connection]
116 self._hostmap = collections.defaultdict(list) # host -> [connection]
117 self._connmap = {} # map connections to host
117 self._connmap = {} # map connections to host
118 self._readymap = {} # map connection to ready state
118 self._readymap = {} # map connection to ready state
119
119
120 def add(self, host, connection, ready):
120 def add(self, host, connection, ready):
121 self._lock.acquire()
121 self._lock.acquire()
122 try:
122 try:
123 self._hostmap[host].append(connection)
123 self._hostmap[host].append(connection)
124 self._connmap[connection] = host
124 self._connmap[connection] = host
125 self._readymap[connection] = ready
125 self._readymap[connection] = ready
126 finally:
126 finally:
127 self._lock.release()
127 self._lock.release()
128
128
129 def remove(self, connection):
129 def remove(self, connection):
130 self._lock.acquire()
130 self._lock.acquire()
131 try:
131 try:
132 try:
132 try:
133 host = self._connmap[connection]
133 host = self._connmap[connection]
134 except KeyError:
134 except KeyError:
135 pass
135 pass
136 else:
136 else:
137 del self._connmap[connection]
137 del self._connmap[connection]
138 del self._readymap[connection]
138 del self._readymap[connection]
139 self._hostmap[host].remove(connection)
139 self._hostmap[host].remove(connection)
140 if not self._hostmap[host]:
140 if not self._hostmap[host]:
141 del self._hostmap[host]
141 del self._hostmap[host]
142 finally:
142 finally:
143 self._lock.release()
143 self._lock.release()
144
144
145 def set_ready(self, connection, ready):
145 def set_ready(self, connection, ready):
146 try:
146 try:
147 self._readymap[connection] = ready
147 self._readymap[connection] = ready
148 except KeyError:
148 except KeyError:
149 pass
149 pass
150
150
151 def get_ready_conn(self, host):
151 def get_ready_conn(self, host):
152 conn = None
152 conn = None
153 self._lock.acquire()
153 self._lock.acquire()
154 try:
154 try:
155 for c in self._hostmap[host]:
155 for c in self._hostmap[host]:
156 if self._readymap[c]:
156 if self._readymap[c]:
157 self._readymap[c] = False
157 self._readymap[c] = False
158 conn = c
158 conn = c
159 break
159 break
160 finally:
160 finally:
161 self._lock.release()
161 self._lock.release()
162 return conn
162 return conn
163
163
164 def get_all(self, host=None):
164 def get_all(self, host=None):
165 if host:
165 if host:
166 return list(self._hostmap[host])
166 return list(self._hostmap[host])
167 else:
167 else:
168 return dict(
168 return dict(
169 {h: list(conns) for (h, conns) in self._hostmap.items()}
169 {h: list(conns) for (h, conns) in self._hostmap.items()}
170 )
170 )
171
171
172
172
173 class KeepAliveHandler:
173 class KeepAliveHandler:
174 def __init__(self, timeout=None):
174 def __init__(self, timeout=None):
175 self._cm = ConnectionManager()
175 self._cm = ConnectionManager()
176 self._timeout = timeout
176 self._timeout = timeout
177 self.requestscount = 0
177 self.requestscount = 0
178 self.sentbytescount = 0
178 self.sentbytescount = 0
179
179
180 #### Connection Management
180 #### Connection Management
181 def open_connections(self):
181 def open_connections(self):
182 """return a list of connected hosts and the number of connections
182 """return a list of connected hosts and the number of connections
183 to each. [('foo.com:80', 2), ('bar.org', 1)]"""
183 to each. [('foo.com:80', 2), ('bar.org', 1)]"""
184 return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
184 return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
185
185
186 def close_connection(self, host):
186 def close_connection(self, host):
187 """close connection(s) to <host>
187 """close connection(s) to <host>
188 host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
188 host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
189 no error occurs if there is no connection to that host."""
189 no error occurs if there is no connection to that host."""
190 for h in self._cm.get_all(host):
190 for h in self._cm.get_all(host):
191 self._cm.remove(h)
191 self._cm.remove(h)
192 h.close()
192 h.close()
193
193
194 def close_all(self):
194 def close_all(self):
195 """close all open connections"""
195 """close all open connections"""
196 for host, conns in self._cm.get_all().items():
196 for host, conns in self._cm.get_all().items():
197 for h in conns:
197 for h in conns:
198 self._cm.remove(h)
198 self._cm.remove(h)
199 h.close()
199 h.close()
200
200
201 def _request_closed(self, request, host, connection):
201 def _request_closed(self, request, host, connection):
202 """tells us that this request is now closed and that the
202 """tells us that this request is now closed and that the
203 connection is ready for another request"""
203 connection is ready for another request"""
204 self._cm.set_ready(connection, True)
204 self._cm.set_ready(connection, True)
205
205
206 def _remove_connection(self, host, connection, close=0):
206 def _remove_connection(self, host, connection, close=0):
207 if close:
207 if close:
208 connection.close()
208 connection.close()
209 self._cm.remove(connection)
209 self._cm.remove(connection)
210
210
211 #### Transaction Execution
211 #### Transaction Execution
212 def http_open(self, req):
212 def http_open(self, req):
213 return self.do_open(HTTPConnection, req)
213 return self.do_open(HTTPConnection, req)
214
214
215 def do_open(self, http_class, req):
215 def do_open(self, http_class, req):
216 host = urllibcompat.gethost(req)
216 host = urllibcompat.gethost(req)
217 if not host:
217 if not host:
218 raise urlerr.urlerror(b'no host given')
218 raise urlerr.urlerror(b'no host given')
219
219
220 try:
220 try:
221 h = self._cm.get_ready_conn(host)
221 h = self._cm.get_ready_conn(host)
222 while h:
222 while h:
223 r = self._reuse_connection(h, req, host)
223 r = self._reuse_connection(h, req, host)
224
224
225 # if this response is non-None, then it worked and we're
225 # if this response is non-None, then it worked and we're
226 # done. Break out, skipping the else block.
226 # done. Break out, skipping the else block.
227 if r:
227 if r:
228 break
228 break
229
229
230 # connection is bad - possibly closed by server
230 # connection is bad - possibly closed by server
231 # discard it and ask for the next free connection
231 # discard it and ask for the next free connection
232 h.close()
232 h.close()
233 self._cm.remove(h)
233 self._cm.remove(h)
234 h = self._cm.get_ready_conn(host)
234 h = self._cm.get_ready_conn(host)
235 else:
235 else:
236 # no (working) free connections were found. Create a new one.
236 # no (working) free connections were found. Create a new one.
237 h = http_class(host, timeout=self._timeout)
237 h = http_class(host, timeout=self._timeout)
238 if DEBUG:
238 if DEBUG:
239 DEBUG.info(
239 DEBUG.info(
240 b"creating new connection to %s (%d)", host, id(h)
240 b"creating new connection to %s (%d)", host, id(h)
241 )
241 )
242 self._cm.add(host, h, False)
242 self._cm.add(host, h, False)
243 self._start_transaction(h, req)
243 self._start_transaction(h, req)
244 r = h.getresponse()
244 r = h.getresponse()
245 # The string form of BadStatusLine is the status line. Add some context
245 # The string form of BadStatusLine is the status line. Add some context
246 # to make the error message slightly more useful.
246 # to make the error message slightly more useful.
247 except httplib.BadStatusLine as err:
247 except httplib.BadStatusLine as err:
248 raise urlerr.urlerror(
248 raise urlerr.urlerror(
249 _(b'bad HTTP status line: %s') % pycompat.sysbytes(err.line)
249 _(b'bad HTTP status line: %s') % pycompat.sysbytes(err.line)
250 )
250 )
251 except (socket.error, httplib.HTTPException) as err:
251 except (socket.error, httplib.HTTPException) as err:
252 raise urlerr.urlerror(err)
252 raise urlerr.urlerror(err)
253
253
254 # If not a persistent connection, don't try to reuse it. Look
254 # If not a persistent connection, don't try to reuse it. Look
255 # for this using getattr() since vcr doesn't define this
255 # for this using getattr() since vcr doesn't define this
256 # attribute, and in that case always close the connection.
256 # attribute, and in that case always close the connection.
257 if getattr(r, 'will_close', True):
257 if getattr(r, 'will_close', True):
258 self._cm.remove(h)
258 self._cm.remove(h)
259
259
260 if DEBUG:
260 if DEBUG:
261 DEBUG.info(b"STATUS: %s, %s", r.status, r.reason)
261 DEBUG.info(b"STATUS: %s, %s", r.status, r.reason)
262 r._handler = self
262 r._handler = self
263 r._host = host
263 r._host = host
264 r._url = req.get_full_url()
264 r._url = req.get_full_url()
265 r._connection = h
265 r._connection = h
266 r.code = r.status
266 r.code = r.status
267 r.headers = r.msg
267 r.headers = r.msg
268 r.msg = r.reason
268 r.msg = r.reason
269
269
270 return r
270 return r
271
271
272 def _reuse_connection(self, h, req, host):
272 def _reuse_connection(self, h, req, host):
273 """start the transaction with a re-used connection
273 """start the transaction with a re-used connection
274 return a response object (r) upon success or None on failure.
274 return a response object (r) upon success or None on failure.
275 This DOES not close or remove bad connections in cases where
275 This DOES not close or remove bad connections in cases where
276 it returns. However, if an unexpected exception occurs, it
276 it returns. However, if an unexpected exception occurs, it
277 will close and remove the connection before re-raising.
277 will close and remove the connection before re-raising.
278 """
278 """
279 try:
279 try:
280 self._start_transaction(h, req)
280 self._start_transaction(h, req)
281 r = h.getresponse()
281 r = h.getresponse()
282 # note: just because we got something back doesn't mean it
282 # note: just because we got something back doesn't mean it
283 # worked. We'll check the version below, too.
283 # worked. We'll check the version below, too.
284 except (socket.error, httplib.HTTPException):
284 except (socket.error, httplib.HTTPException):
285 r = None
285 r = None
286 except: # re-raises
286 except: # re-raises
287 # adding this block just in case we've missed
287 # adding this block just in case we've missed
288 # something we will still raise the exception, but
288 # something we will still raise the exception, but
289 # lets try and close the connection and remove it
289 # lets try and close the connection and remove it
290 # first. We previously got into a nasty loop
290 # first. We previously got into a nasty loop
291 # where an exception was uncaught, and so the
291 # where an exception was uncaught, and so the
292 # connection stayed open. On the next try, the
292 # connection stayed open. On the next try, the
293 # same exception was raised, etc. The trade-off is
293 # same exception was raised, etc. The trade-off is
294 # that it's now possible this call will raise
294 # that it's now possible this call will raise
295 # a DIFFERENT exception
295 # a DIFFERENT exception
296 if DEBUG:
296 if DEBUG:
297 DEBUG.error(
297 DEBUG.error(
298 b"unexpected exception - closing connection to %s (%d)",
298 b"unexpected exception - closing connection to %s (%d)",
299 host,
299 host,
300 id(h),
300 id(h),
301 )
301 )
302 self._cm.remove(h)
302 self._cm.remove(h)
303 h.close()
303 h.close()
304 raise
304 raise
305
305
306 if r is None or r.version == 9:
306 if r is None or r.version == 9:
307 # httplib falls back to assuming HTTP 0.9 if it gets a
307 # httplib falls back to assuming HTTP 0.9 if it gets a
308 # bad header back. This is most likely to happen if
308 # bad header back. This is most likely to happen if
309 # the socket has been closed by the server since we
309 # the socket has been closed by the server since we
310 # last used the connection.
310 # last used the connection.
311 if DEBUG:
311 if DEBUG:
312 DEBUG.info(
312 DEBUG.info(
313 b"failed to re-use connection to %s (%d)", host, id(h)
313 b"failed to re-use connection to %s (%d)", host, id(h)
314 )
314 )
315 r = None
315 r = None
316 else:
316 else:
317 if DEBUG:
317 if DEBUG:
318 DEBUG.info(b"re-using connection to %s (%d)", host, id(h))
318 DEBUG.info(b"re-using connection to %s (%d)", host, id(h))
319
319
320 return r
320 return r
321
321
322 def _start_transaction(self, h, req):
322 def _start_transaction(self, h, req):
323 oldbytescount = getattr(h, 'sentbytescount', 0)
323 oldbytescount = getattr(h, 'sentbytescount', 0)
324
324
325 # What follows mostly reimplements HTTPConnection.request()
325 # What follows mostly reimplements HTTPConnection.request()
326 # except it adds self.parent.addheaders in the mix and sends headers
326 # except it adds self.parent.addheaders in the mix and sends headers
327 # in a deterministic order (to make testing easier).
327 # in a deterministic order (to make testing easier).
328 headers = util.sortdict(self.parent.addheaders)
328 headers = util.sortdict(self.parent.addheaders)
329 headers.update(sorted(req.headers.items()))
329 headers.update(sorted(req.headers.items()))
330 headers.update(sorted(req.unredirected_hdrs.items()))
330 headers.update(sorted(req.unredirected_hdrs.items()))
331 headers = util.sortdict((n.lower(), v) for n, v in headers.items())
331 headers = util.sortdict((n.lower(), v) for n, v in headers.items())
332 skipheaders = {}
332 skipheaders = {}
333 for n in ('host', 'accept-encoding'):
333 for n in ('host', 'accept-encoding'):
334 if n in headers:
334 if n in headers:
335 skipheaders['skip_' + n.replace('-', '_')] = 1
335 skipheaders['skip_' + n.replace('-', '_')] = 1
336 try:
336 try:
337 if urllibcompat.hasdata(req):
337 if urllibcompat.hasdata(req):
338 data = urllibcompat.getdata(req)
338 data = urllibcompat.getdata(req)
339 h.putrequest(
339 h.putrequest(
340 req.get_method(),
340 req.get_method(),
341 urllibcompat.getselector(req),
341 urllibcompat.getselector(req),
342 **skipheaders
342 **skipheaders
343 )
343 )
344 if 'content-type' not in headers:
344 if 'content-type' not in headers:
345 h.putheader(
345 h.putheader(
346 'Content-type', 'application/x-www-form-urlencoded'
346 'Content-type', 'application/x-www-form-urlencoded'
347 )
347 )
348 if 'content-length' not in headers:
348 if 'content-length' not in headers:
349 h.putheader('Content-length', '%d' % len(data))
349 h.putheader('Content-length', '%d' % len(data))
350 else:
350 else:
351 h.putrequest(
351 h.putrequest(
352 req.get_method(),
352 req.get_method(),
353 urllibcompat.getselector(req),
353 urllibcompat.getselector(req),
354 **skipheaders
354 **skipheaders
355 )
355 )
356 except socket.error as err:
356 except socket.error as err:
357 raise urlerr.urlerror(err)
357 raise urlerr.urlerror(err)
358 for k, v in headers.items():
358 for k, v in headers.items():
359 h.putheader(k, v)
359 h.putheader(k, v)
360 h.endheaders()
360 h.endheaders()
361 if urllibcompat.hasdata(req):
361 if urllibcompat.hasdata(req):
362 h.send(data)
362 h.send(data)
363
363
364 # This will fail to record events in case of I/O failure. That's OK.
364 # This will fail to record events in case of I/O failure. That's OK.
365 self.requestscount += 1
365 self.requestscount += 1
366 self.sentbytescount += getattr(h, 'sentbytescount', 0) - oldbytescount
366 self.sentbytescount += getattr(h, 'sentbytescount', 0) - oldbytescount
367
367
368 try:
368 try:
369 self.parent.requestscount += 1
369 self.parent.requestscount += 1
370 self.parent.sentbytescount += (
370 self.parent.sentbytescount += (
371 getattr(h, 'sentbytescount', 0) - oldbytescount
371 getattr(h, 'sentbytescount', 0) - oldbytescount
372 )
372 )
373 except AttributeError:
373 except AttributeError:
374 pass
374 pass
375
375
376
376
377 class HTTPHandler(KeepAliveHandler, urlreq.httphandler):
377 class HTTPHandler(KeepAliveHandler, urlreq.httphandler):
378 pass
378 pass
379
379
380
380
381 class HTTPResponse(httplib.HTTPResponse):
381 class HTTPResponse(httplib.HTTPResponse):
382 # we need to subclass HTTPResponse in order to
382 # we need to subclass HTTPResponse in order to
383 # 1) add readline(), readlines(), and readinto() methods
383 # 1) add close_connection() methods
384 # 2) add close_connection() methods
384 # 2) add info() and geturl() methods
385 # 3) add info() and geturl() methods
385 # 3) add accounting for read(), readlines() and readinto()
386
387 # in order to add readline(), read must be modified to deal with a
388 # buffer. example: readline must read a buffer and then spit back
389 # one line at a time. The only real alternative is to read one
390 # BYTE at a time (ick). Once something has been read, it can't be
391 # put back (ok, maybe it can, but that's even uglier than this),
392 # so if you THEN do a normal read, you must first take stuff from
393 # the buffer.
394
395 # the read method wraps the original to accommodate buffering,
396 # although read() never adds to the buffer.
397 # Both readline and readlines have been stolen with almost no
398 # modification from socket.py
399
386
400 def __init__(self, sock, debuglevel=0, strict=0, method=None):
387 def __init__(self, sock, debuglevel=0, strict=0, method=None):
401 httplib.HTTPResponse.__init__(
388 httplib.HTTPResponse.__init__(
402 self, sock, debuglevel=debuglevel, method=method
389 self, sock, debuglevel=debuglevel, method=method
403 )
390 )
404 self.fileno = sock.fileno
391 self.fileno = sock.fileno
405 self.code = None
392 self.code = None
406 self.receivedbytescount = 0
393 self.receivedbytescount = 0
407 self._rbuf = b''
394 self._rbuf = b''
408 self._rbufsize = 8096
395 self._rbufsize = 8096
409 self._handler = None # inserted by the handler later
396 self._handler = None # inserted by the handler later
410 self._host = None # (same)
397 self._host = None # (same)
411 self._url = None # (same)
398 self._url = None # (same)
412 self._connection = None # (same)
399 self._connection = None # (same)
413
400
414 _raw_read = httplib.HTTPResponse.read
415 _raw_readinto = getattr(httplib.HTTPResponse, 'readinto', None)
416
417 # Python 2.7 has a single close() which closes the socket handle.
401 # Python 2.7 has a single close() which closes the socket handle.
418 # This method was effectively renamed to _close_conn() in Python 3. But
402 # This method was effectively renamed to _close_conn() in Python 3. But
419 # there is also a close(). _close_conn() is called by methods like
403 # there is also a close(). _close_conn() is called by methods like
420 # read().
404 # read().
421
405
422 def close(self):
406 def close(self):
423 if self.fp:
407 if self.fp:
424 self.fp.close()
408 self.fp.close()
425 self.fp = None
409 self.fp = None
426 if self._handler:
410 if self._handler:
427 self._handler._request_closed(
411 self._handler._request_closed(
428 self, self._host, self._connection
412 self, self._host, self._connection
429 )
413 )
430
414
431 def _close_conn(self):
415 def _close_conn(self):
432 self.close()
416 self.close()
433
417
434 def close_connection(self):
418 def close_connection(self):
435 self._handler._remove_connection(self._host, self._connection, close=1)
419 self._handler._remove_connection(self._host, self._connection, close=1)
436 self.close()
420 self.close()
437
421
438 def info(self):
422 def info(self):
439 return self.headers
423 return self.headers
440
424
441 def geturl(self):
425 def geturl(self):
442 return self._url
426 return self._url
443
427
444 def read(self, amt=None):
428 def read(self, amt=None):
445 # the _rbuf test is only in this first if for speed. It's not
429 data = super().read(amt)
446 # logically necessary
447 if self._rbuf and amt is not None:
448 L = len(self._rbuf)
449 if amt > L:
450 amt -= L
451 else:
452 s = self._rbuf[:amt]
453 self._rbuf = self._rbuf[amt:]
454 return s
455 # Careful! http.client.HTTPResponse.read() on Python 3 is
456 # implemented using readinto(), which can duplicate self._rbuf
457 # if it's not empty.
458 s = self._rbuf
459 self._rbuf = b''
460 data = self._raw_read(amt)
461
462 self.receivedbytescount += len(data)
430 self.receivedbytescount += len(data)
463 try:
431 try:
464 self._connection.receivedbytescount += len(data)
432 self._connection.receivedbytescount += len(data)
465 except AttributeError:
433 except AttributeError:
466 pass
434 pass
467 try:
435 try:
468 self._handler.parent.receivedbytescount += len(data)
436 self._handler.parent.receivedbytescount += len(data)
469 except AttributeError:
437 except AttributeError:
470 pass
438 pass
471
439 return data
472 s += data
473 return s
474
475 # stolen from Python SVN #68532 to fix issue1088
476 def _read_chunked(self, amt):
477 chunk_left = self.chunk_left
478 parts = []
479
480 while True:
481 if chunk_left is None:
482 line = self.fp.readline()
483 i = line.find(b';')
484 if i >= 0:
485 line = line[:i] # strip chunk-extensions
486 try:
487 chunk_left = int(line, 16)
488 except ValueError:
489 # close the connection as protocol synchronization is
490 # probably lost
491 self.close()
492 raise httplib.IncompleteRead(b''.join(parts))
493 if chunk_left == 0:
494 break
495 if amt is None:
496 parts.append(self._safe_read(chunk_left))
497 elif amt < chunk_left:
498 parts.append(self._safe_read(amt))
499 self.chunk_left = chunk_left - amt
500 return b''.join(parts)
501 elif amt == chunk_left:
502 parts.append(self._safe_read(amt))
503 self._safe_read(2) # toss the CRLF at the end of the chunk
504 self.chunk_left = None
505 return b''.join(parts)
506 else:
507 parts.append(self._safe_read(chunk_left))
508 amt -= chunk_left
509
510 # we read the whole chunk, get another
511 self._safe_read(2) # toss the CRLF at the end of the chunk
512 chunk_left = None
513
514 # read and discard trailer up to the CRLF terminator
515 ### note: we shouldn't have any trailers!
516 while True:
517 line = self.fp.readline()
518 if not line:
519 # a vanishingly small number of sites EOF without
520 # sending the trailer
521 break
522 if line == b'\r\n':
523 break
524
525 # we read everything; close the "file"
526 self.close()
527
528 return b''.join(parts)
529
440
530 def readline(self):
441 def readline(self):
531 # Fast path for a line is already available in read buffer.
442 data = super().readline()
532 i = self._rbuf.find(b'\n')
443 self.receivedbytescount += len(data)
533 if i >= 0:
444 try:
534 i += 1
445 self._connection.receivedbytescount += len(data)
535 line = self._rbuf[:i]
446 except AttributeError:
536 self._rbuf = self._rbuf[i:]
447 pass
537 return line
448 try:
538
449 self._handler.parent.receivedbytescount += len(data)
539 # No newline in local buffer. Read until we find one.
450 except AttributeError:
540 # readinto read via readinto will already return _rbuf
451 pass
541 if self._raw_readinto is None:
452 return data
542 chunks = [self._rbuf]
543 else:
544 chunks = []
545 i = -1
546 readsize = self._rbufsize
547 while True:
548 new = self._raw_read(readsize)
549 if not new:
550 break
551
552 self.receivedbytescount += len(new)
553 self._connection.receivedbytescount += len(new)
554 try:
555 self._handler.parent.receivedbytescount += len(new)
556 except AttributeError:
557 pass
558
559 chunks.append(new)
560 i = new.find(b'\n')
561 if i >= 0:
562 break
563
564 # We either have exhausted the stream or have a newline in chunks[-1].
565
566 # EOF
567 if i == -1:
568 self._rbuf = b''
569 return b''.join(chunks)
570
571 i += 1
572 self._rbuf = chunks[-1][i:]
573 chunks[-1] = chunks[-1][:i]
574 return b''.join(chunks)
575
453
576 def readinto(self, dest):
454 def readinto(self, dest):
577 if self._raw_readinto is None:
455 got = super().readinto(dest)
578 res = self.read(len(dest))
579 if not res:
580 return 0
581 dest[0 : len(res)] = res
582 return len(res)
583 total = len(dest)
584 have = len(self._rbuf)
585 if have >= total:
586 dest[0:total] = self._rbuf[:total]
587 self._rbuf = self._rbuf[total:]
588 return total
589 mv = memoryview(dest)
590 got = self._raw_readinto(mv[have:total])
591
592 self.receivedbytescount += got
456 self.receivedbytescount += got
593 self._connection.receivedbytescount += got
594 try:
457 try:
595 self._handler.receivedbytescount += got
458 self._connection.receivedbytescount += got
596 except AttributeError:
459 except AttributeError:
597 pass
460 pass
598
461 try:
599 dest[0:have] = self._rbuf
462 self._handler.parent.receivedbytescount += got
600 got += len(self._rbuf)
463 except AttributeError:
601 self._rbuf = b''
464 pass
602 return got
465 return got
603
466
604
467
605 def safesend(self, str):
468 def safesend(self, str):
606 """Send `str' to the server.
469 """Send `str' to the server.
607
470
608 Shamelessly ripped off from httplib to patch a bad behavior.
471 Shamelessly ripped off from httplib to patch a bad behavior.
609 """
472 """
610 # _broken_pipe_resp is an attribute we set in this function
473 # _broken_pipe_resp is an attribute we set in this function
611 # if the socket is closed while we're sending data but
474 # if the socket is closed while we're sending data but
612 # the server sent us a response before hanging up.
475 # the server sent us a response before hanging up.
613 # In that case, we want to pretend to send the rest of the
476 # In that case, we want to pretend to send the rest of the
614 # outgoing data, and then let the user use getresponse()
477 # outgoing data, and then let the user use getresponse()
615 # (which we wrap) to get this last response before
478 # (which we wrap) to get this last response before
616 # opening a new socket.
479 # opening a new socket.
617 if getattr(self, '_broken_pipe_resp', None) is not None:
480 if getattr(self, '_broken_pipe_resp', None) is not None:
618 return
481 return
619
482
620 if self.sock is None:
483 if self.sock is None:
621 if self.auto_open:
484 if self.auto_open:
622 self.connect()
485 self.connect()
623 else:
486 else:
624 raise httplib.NotConnected
487 raise httplib.NotConnected
625
488
626 # send the data to the server. if we get a broken pipe, then close
489 # send the data to the server. if we get a broken pipe, then close
627 # the socket. we want to reconnect when somebody tries to send again.
490 # the socket. we want to reconnect when somebody tries to send again.
628 #
491 #
629 # NOTE: we DO propagate the error, though, because we cannot simply
492 # NOTE: we DO propagate the error, though, because we cannot simply
630 # ignore the error... the caller will know if they can retry.
493 # ignore the error... the caller will know if they can retry.
631 if self.debuglevel > 0:
494 if self.debuglevel > 0:
632 print(b"send:", repr(str))
495 print(b"send:", repr(str))
633 try:
496 try:
634 blocksize = 8192
497 blocksize = 8192
635 read = getattr(str, 'read', None)
498 read = getattr(str, 'read', None)
636 if read is not None:
499 if read is not None:
637 if self.debuglevel > 0:
500 if self.debuglevel > 0:
638 print(b"sending a read()able")
501 print(b"sending a read()able")
639 data = read(blocksize)
502 data = read(blocksize)
640 while data:
503 while data:
641 self.sock.sendall(data)
504 self.sock.sendall(data)
642 self.sentbytescount += len(data)
505 self.sentbytescount += len(data)
643 data = read(blocksize)
506 data = read(blocksize)
644 else:
507 else:
645 self.sock.sendall(str)
508 self.sock.sendall(str)
646 self.sentbytescount += len(str)
509 self.sentbytescount += len(str)
647 except BrokenPipeError:
510 except BrokenPipeError:
648 if self._HTTPConnection__state == httplib._CS_REQ_SENT:
511 if self._HTTPConnection__state == httplib._CS_REQ_SENT:
649 self._broken_pipe_resp = None
512 self._broken_pipe_resp = None
650 self._broken_pipe_resp = self.getresponse()
513 self._broken_pipe_resp = self.getresponse()
651 reraise = False
514 reraise = False
652 else:
515 else:
653 reraise = True
516 reraise = True
654 self.close()
517 self.close()
655 if reraise:
518 if reraise:
656 raise
519 raise
657
520
658
521
659 def wrapgetresponse(cls):
522 def wrapgetresponse(cls):
660 """Wraps getresponse in cls with a broken-pipe sane version."""
523 """Wraps getresponse in cls with a broken-pipe sane version."""
661
524
662 def safegetresponse(self):
525 def safegetresponse(self):
663 # In safesend() we might set the _broken_pipe_resp
526 # In safesend() we might set the _broken_pipe_resp
664 # attribute, in which case the socket has already
527 # attribute, in which case the socket has already
665 # been closed and we just need to give them the response
528 # been closed and we just need to give them the response
666 # back. Otherwise, we use the normal response path.
529 # back. Otherwise, we use the normal response path.
667 r = getattr(self, '_broken_pipe_resp', None)
530 r = getattr(self, '_broken_pipe_resp', None)
668 if r is not None:
531 if r is not None:
669 return r
532 return r
670 return cls.getresponse(self)
533 return cls.getresponse(self)
671
534
672 safegetresponse.__doc__ = cls.getresponse.__doc__
535 safegetresponse.__doc__ = cls.getresponse.__doc__
673 return safegetresponse
536 return safegetresponse
674
537
675
538
676 class HTTPConnection(httplib.HTTPConnection):
539 class HTTPConnection(httplib.HTTPConnection):
677 # url.httpsconnection inherits from this. So when adding/removing
540 # url.httpsconnection inherits from this. So when adding/removing
678 # attributes, be sure to audit httpsconnection() for unintended
541 # attributes, be sure to audit httpsconnection() for unintended
679 # consequences.
542 # consequences.
680
543
681 # use the modified response class
544 # use the modified response class
682 response_class = HTTPResponse
545 response_class = HTTPResponse
683 send = safesend
546 send = safesend
684 getresponse = wrapgetresponse(httplib.HTTPConnection)
547 getresponse = wrapgetresponse(httplib.HTTPConnection)
685
548
686 def __init__(self, *args, **kwargs):
549 def __init__(self, *args, **kwargs):
687 httplib.HTTPConnection.__init__(self, *args, **kwargs)
550 httplib.HTTPConnection.__init__(self, *args, **kwargs)
688 self.sentbytescount = 0
551 self.sentbytescount = 0
689 self.receivedbytescount = 0
552 self.receivedbytescount = 0
690
553
691 def __repr__(self):
554 def __repr__(self):
692 base = super(HTTPConnection, self).__repr__()
555 base = super(HTTPConnection, self).__repr__()
693 local = "(unconnected)"
556 local = "(unconnected)"
694 s = self.sock
557 s = self.sock
695 if s:
558 if s:
696 try:
559 try:
697 local = "%s:%d" % s.getsockname()
560 local = "%s:%d" % s.getsockname()
698 except OSError:
561 except OSError:
699 pass # Likely not connected
562 pass # Likely not connected
700 return "<%s: %s <--> %s:%d>" % (base, local, self.host, self.port)
563 return "<%s: %s <--> %s:%d>" % (base, local, self.host, self.port)
701
564
702
565
703 #########################################################################
566 #########################################################################
704 ##### TEST FUNCTIONS
567 ##### TEST FUNCTIONS
705 #########################################################################
568 #########################################################################
706
569
707
570
708 def continuity(url):
571 def continuity(url):
709 md5 = hashlib.md5
572 md5 = hashlib.md5
710 format = b'%25s: %s'
573 format = b'%25s: %s'
711
574
712 # first fetch the file with the normal http handler
575 # first fetch the file with the normal http handler
713 opener = urlreq.buildopener()
576 opener = urlreq.buildopener()
714 urlreq.installopener(opener)
577 urlreq.installopener(opener)
715 fo = urlreq.urlopen(url)
578 fo = urlreq.urlopen(url)
716 foo = fo.read()
579 foo = fo.read()
717 fo.close()
580 fo.close()
718 m = md5(foo)
581 m = md5(foo)
719 print(format % (b'normal urllib', hex(m.digest())))
582 print(format % (b'normal urllib', hex(m.digest())))
720
583
721 # now install the keepalive handler and try again
584 # now install the keepalive handler and try again
722 opener = urlreq.buildopener(HTTPHandler())
585 opener = urlreq.buildopener(HTTPHandler())
723 urlreq.installopener(opener)
586 urlreq.installopener(opener)
724
587
725 fo = urlreq.urlopen(url)
588 fo = urlreq.urlopen(url)
726 foo = fo.read()
589 foo = fo.read()
727 fo.close()
590 fo.close()
728 m = md5(foo)
591 m = md5(foo)
729 print(format % (b'keepalive read', hex(m.digest())))
592 print(format % (b'keepalive read', hex(m.digest())))
730
593
731 fo = urlreq.urlopen(url)
594 fo = urlreq.urlopen(url)
732 foo = b''
595 foo = b''
733 while True:
596 while True:
734 f = fo.readline()
597 f = fo.readline()
735 if f:
598 if f:
736 foo = foo + f
599 foo = foo + f
737 else:
600 else:
738 break
601 break
739 fo.close()
602 fo.close()
740 m = md5(foo)
603 m = md5(foo)
741 print(format % (b'keepalive readline', hex(m.digest())))
604 print(format % (b'keepalive readline', hex(m.digest())))
742
605
743
606
744 def comp(N, url):
607 def comp(N, url):
745 print(b' making %i connections to:\n %s' % (N, url))
608 print(b' making %i connections to:\n %s' % (N, url))
746
609
747 procutil.stdout.write(b' first using the normal urllib handlers')
610 procutil.stdout.write(b' first using the normal urllib handlers')
748 # first use normal opener
611 # first use normal opener
749 opener = urlreq.buildopener()
612 opener = urlreq.buildopener()
750 urlreq.installopener(opener)
613 urlreq.installopener(opener)
751 t1 = fetch(N, url)
614 t1 = fetch(N, url)
752 print(b' TIME: %.3f s' % t1)
615 print(b' TIME: %.3f s' % t1)
753
616
754 procutil.stdout.write(b' now using the keepalive handler ')
617 procutil.stdout.write(b' now using the keepalive handler ')
755 # now install the keepalive handler and try again
618 # now install the keepalive handler and try again
756 opener = urlreq.buildopener(HTTPHandler())
619 opener = urlreq.buildopener(HTTPHandler())
757 urlreq.installopener(opener)
620 urlreq.installopener(opener)
758 t2 = fetch(N, url)
621 t2 = fetch(N, url)
759 print(b' TIME: %.3f s' % t2)
622 print(b' TIME: %.3f s' % t2)
760 print(b' improvement factor: %.2f' % (t1 / t2))
623 print(b' improvement factor: %.2f' % (t1 / t2))
761
624
762
625
763 def fetch(N, url, delay=0):
626 def fetch(N, url, delay=0):
764 import time
627 import time
765
628
766 lens = []
629 lens = []
767 starttime = time.time()
630 starttime = time.time()
768 for i in range(N):
631 for i in range(N):
769 if delay and i > 0:
632 if delay and i > 0:
770 time.sleep(delay)
633 time.sleep(delay)
771 fo = urlreq.urlopen(url)
634 fo = urlreq.urlopen(url)
772 foo = fo.read()
635 foo = fo.read()
773 fo.close()
636 fo.close()
774 lens.append(len(foo))
637 lens.append(len(foo))
775 diff = time.time() - starttime
638 diff = time.time() - starttime
776
639
777 j = 0
640 j = 0
778 for i in lens[1:]:
641 for i in lens[1:]:
779 j = j + 1
642 j = j + 1
780 if not i == lens[0]:
643 if not i == lens[0]:
781 print(b"WARNING: inconsistent length on read %i: %i" % (j, i))
644 print(b"WARNING: inconsistent length on read %i: %i" % (j, i))
782
645
783 return diff
646 return diff
784
647
785
648
786 def test_timeout(url):
649 def test_timeout(url):
787 global DEBUG
650 global DEBUG
788 dbbackup = DEBUG
651 dbbackup = DEBUG
789
652
790 class FakeLogger:
653 class FakeLogger:
791 def debug(self, msg, *args):
654 def debug(self, msg, *args):
792 print(msg % args)
655 print(msg % args)
793
656
794 info = warning = error = debug
657 info = warning = error = debug
795
658
796 DEBUG = FakeLogger()
659 DEBUG = FakeLogger()
797 print(b" fetching the file to establish a connection")
660 print(b" fetching the file to establish a connection")
798 fo = urlreq.urlopen(url)
661 fo = urlreq.urlopen(url)
799 data1 = fo.read()
662 data1 = fo.read()
800 fo.close()
663 fo.close()
801
664
802 i = 20
665 i = 20
803 print(b" waiting %i seconds for the server to close the connection" % i)
666 print(b" waiting %i seconds for the server to close the connection" % i)
804 while i > 0:
667 while i > 0:
805 procutil.stdout.write(b'\r %2i' % i)
668 procutil.stdout.write(b'\r %2i' % i)
806 procutil.stdout.flush()
669 procutil.stdout.flush()
807 time.sleep(1)
670 time.sleep(1)
808 i -= 1
671 i -= 1
809 procutil.stderr.write(b'\r')
672 procutil.stderr.write(b'\r')
810
673
811 print(b" fetching the file a second time")
674 print(b" fetching the file a second time")
812 fo = urlreq.urlopen(url)
675 fo = urlreq.urlopen(url)
813 data2 = fo.read()
676 data2 = fo.read()
814 fo.close()
677 fo.close()
815
678
816 if data1 == data2:
679 if data1 == data2:
817 print(b' data are identical')
680 print(b' data are identical')
818 else:
681 else:
819 print(b' ERROR: DATA DIFFER')
682 print(b' ERROR: DATA DIFFER')
820
683
821 DEBUG = dbbackup
684 DEBUG = dbbackup
822
685
823
686
824 def test(url, N=10):
687 def test(url, N=10):
825 print(b"performing continuity test (making sure stuff isn't corrupted)")
688 print(b"performing continuity test (making sure stuff isn't corrupted)")
826 continuity(url)
689 continuity(url)
827 print(b'')
690 print(b'')
828 print(b"performing speed comparison")
691 print(b"performing speed comparison")
829 comp(N, url)
692 comp(N, url)
830 print(b'')
693 print(b'')
831 print(b"performing dropped-connection check")
694 print(b"performing dropped-connection check")
832 test_timeout(url)
695 test_timeout(url)
833
696
834
697
835 if __name__ == '__main__':
698 if __name__ == '__main__':
836 import time
699 import time
837
700
838 try:
701 try:
839 N = int(sys.argv[1])
702 N = int(sys.argv[1])
840 url = sys.argv[2]
703 url = sys.argv[2]
841 except (IndexError, ValueError):
704 except (IndexError, ValueError):
842 print(b"%s <integer> <url>" % sys.argv[0])
705 print(b"%s <integer> <url>" % sys.argv[0])
843 else:
706 else:
844 test(url, N)
707 test(url, N)
General Comments 0
You need to be logged in to leave comments. Login now