##// END OF EJS Templates
keepalive: ensure `close_all()` actually closes all cached connections...
Matt Harbison -
r50436:8251f7cc stable
parent child Browse files
Show More
@@ -1,845 +1,847 b''
1 1 # This library is free software; you can redistribute it and/or
2 2 # modify it under the terms of the GNU Lesser General Public
3 3 # License as published by the Free Software Foundation; either
4 4 # version 2.1 of the License, or (at your option) any later version.
5 5 #
6 6 # This library is distributed in the hope that it will be useful,
7 7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9 9 # Lesser General Public License for more details.
10 10 #
11 11 # You should have received a copy of the GNU Lesser General Public
12 12 # License along with this library; if not, see
13 13 # <http://www.gnu.org/licenses/>.
14 14
15 15 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
16 16 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
17 17
18 18 # Modified by Benoit Boissinot:
19 19 # - fix for digest auth (inspired from urllib2.py @ Python v2.4)
20 20 # Modified by Dirkjan Ochtman:
21 21 # - import md5 function from a local util module
22 22 # Modified by Augie Fackler:
23 23 # - add safesend method and use it to prevent broken pipe errors
24 24 # on large POST requests
25 25
26 26 """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
27 27
28 28 >>> import urllib2
29 29 >>> from keepalive import HTTPHandler
30 30 >>> keepalive_handler = HTTPHandler()
31 31 >>> opener = urlreq.buildopener(keepalive_handler)
32 32 >>> urlreq.installopener(opener)
33 33 >>>
34 34 >>> fo = urlreq.urlopen('http://www.python.org')
35 35
36 36 If a connection to a given host is requested, and all of the existing
37 37 connections are still in use, another connection will be opened. If
38 38 the handler tries to use an existing connection but it fails in some
39 39 way, it will be closed and removed from the pool.
40 40
41 41 To remove the handler, simply re-run build_opener with no arguments, and
42 42 install that opener.
43 43
44 44 You can explicitly close connections by using the close_connection()
45 45 method of the returned file-like object (described below) or you can
46 46 use the handler methods:
47 47
48 48 close_connection(host)
49 49 close_all()
50 50 open_connections()
51 51
52 52 NOTE: using the close_connection and close_all methods of the handler
53 53 should be done with care when using multiple threads.
54 54 * there is nothing that prevents another thread from creating new
55 55 connections immediately after connections are closed
56 56 * no checks are done to prevent in-use connections from being closed
57 57
58 58 >>> keepalive_handler.close_all()
59 59
60 60 EXTRA ATTRIBUTES AND METHODS
61 61
62 62 Upon a status of 200, the object returned has a few additional
63 63 attributes and methods, which should not be used if you want to
64 64 remain consistent with the normal urllib2-returned objects:
65 65
66 66 close_connection() - close the connection to the host
67 67 readlines() - you know, readlines()
68 68 status - the return status (i.e. 404)
69 69 reason - english translation of status (i.e. 'File not found')
70 70
71 71 If you want the best of both worlds, use this inside an
72 72 AttributeError-catching try:
73 73
74 74 >>> try: status = fo.status
75 75 >>> except AttributeError: status = None
76 76
77 77 Unfortunately, these are ONLY there if status == 200, so it's not
78 78 easy to distinguish between non-200 responses. The reason is that
79 79 urllib2 tries to do clever things with error codes 301, 302, 401,
80 80 and 407, and it wraps the object upon return.
81 81 """
82 82
83 83 # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $
84 84
85 85
86 86 import collections
87 87 import hashlib
88 88 import socket
89 89 import sys
90 90 import threading
91 91
92 92 from .i18n import _
93 93 from .pycompat import getattr
94 94 from .node import hex
95 95 from . import (
96 96 pycompat,
97 97 urllibcompat,
98 98 util,
99 99 )
100 100 from .utils import procutil
101 101
102 102 httplib = util.httplib
103 103 urlerr = util.urlerr
104 104 urlreq = util.urlreq
105 105
106 106 DEBUG = None
107 107
108 108
109 109 class ConnectionManager:
110 110 """
111 111 The connection manager must be able to:
112 112 * keep track of all existing
113 113 """
114 114
115 115 def __init__(self):
116 116 self._lock = threading.Lock()
117 117 self._hostmap = collections.defaultdict(list) # host -> [connection]
118 118 self._connmap = {} # map connections to host
119 119 self._readymap = {} # map connection to ready state
120 120
121 121 def add(self, host, connection, ready):
122 122 self._lock.acquire()
123 123 try:
124 124 self._hostmap[host].append(connection)
125 125 self._connmap[connection] = host
126 126 self._readymap[connection] = ready
127 127 finally:
128 128 self._lock.release()
129 129
130 130 def remove(self, connection):
131 131 self._lock.acquire()
132 132 try:
133 133 try:
134 134 host = self._connmap[connection]
135 135 except KeyError:
136 136 pass
137 137 else:
138 138 del self._connmap[connection]
139 139 del self._readymap[connection]
140 140 self._hostmap[host].remove(connection)
141 141 if not self._hostmap[host]:
142 142 del self._hostmap[host]
143 143 finally:
144 144 self._lock.release()
145 145
146 146 def set_ready(self, connection, ready):
147 147 try:
148 148 self._readymap[connection] = ready
149 149 except KeyError:
150 150 pass
151 151
152 152 def get_ready_conn(self, host):
153 153 conn = None
154 154 self._lock.acquire()
155 155 try:
156 156 for c in self._hostmap[host]:
157 157 if self._readymap[c]:
158 158 self._readymap[c] = False
159 159 conn = c
160 160 break
161 161 finally:
162 162 self._lock.release()
163 163 return conn
164 164
165 165 def get_all(self, host=None):
166 166 if host:
167 167 return list(self._hostmap[host])
168 168 else:
169 return dict(self._hostmap)
169 return dict(
170 {h: list(conns) for (h, conns) in self._hostmap.items()}
171 )
170 172
171 173
172 174 class KeepAliveHandler:
173 175 def __init__(self, timeout=None):
174 176 self._cm = ConnectionManager()
175 177 self._timeout = timeout
176 178 self.requestscount = 0
177 179 self.sentbytescount = 0
178 180
179 181 #### Connection Management
180 182 def open_connections(self):
181 183 """return a list of connected hosts and the number of connections
182 184 to each. [('foo.com:80', 2), ('bar.org', 1)]"""
183 185 return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
184 186
185 187 def close_connection(self, host):
186 188 """close connection(s) to <host>
187 189 host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
188 190 no error occurs if there is no connection to that host."""
189 191 for h in self._cm.get_all(host):
190 192 self._cm.remove(h)
191 193 h.close()
192 194
193 195 def close_all(self):
194 196 """close all open connections"""
195 197 for host, conns in self._cm.get_all().items():
196 198 for h in conns:
197 199 self._cm.remove(h)
198 200 h.close()
199 201
200 202 def _request_closed(self, request, host, connection):
201 203 """tells us that this request is now closed and that the
202 204 connection is ready for another request"""
203 205 self._cm.set_ready(connection, True)
204 206
205 207 def _remove_connection(self, host, connection, close=0):
206 208 if close:
207 209 connection.close()
208 210 self._cm.remove(connection)
209 211
210 212 #### Transaction Execution
211 213 def http_open(self, req):
212 214 return self.do_open(HTTPConnection, req)
213 215
214 216 def do_open(self, http_class, req):
215 217 host = urllibcompat.gethost(req)
216 218 if not host:
217 219 raise urlerr.urlerror(b'no host given')
218 220
219 221 try:
220 222 h = self._cm.get_ready_conn(host)
221 223 while h:
222 224 r = self._reuse_connection(h, req, host)
223 225
224 226 # if this response is non-None, then it worked and we're
225 227 # done. Break out, skipping the else block.
226 228 if r:
227 229 break
228 230
229 231 # connection is bad - possibly closed by server
230 232 # discard it and ask for the next free connection
231 233 h.close()
232 234 self._cm.remove(h)
233 235 h = self._cm.get_ready_conn(host)
234 236 else:
235 237 # no (working) free connections were found. Create a new one.
236 238 h = http_class(host, timeout=self._timeout)
237 239 if DEBUG:
238 240 DEBUG.info(
239 241 b"creating new connection to %s (%d)", host, id(h)
240 242 )
241 243 self._cm.add(host, h, False)
242 244 self._start_transaction(h, req)
243 245 r = h.getresponse()
244 246 # The string form of BadStatusLine is the status line. Add some context
245 247 # to make the error message slightly more useful.
246 248 except httplib.BadStatusLine as err:
247 249 raise urlerr.urlerror(
248 250 _(b'bad HTTP status line: %s') % pycompat.sysbytes(err.line)
249 251 )
250 252 except (socket.error, httplib.HTTPException) as err:
251 253 raise urlerr.urlerror(err)
252 254
253 255 # If not a persistent connection, don't try to reuse it. Look
254 256 # for this using getattr() since vcr doesn't define this
255 257 # attribute, and in that case always close the connection.
256 258 if getattr(r, 'will_close', True):
257 259 self._cm.remove(h)
258 260
259 261 if DEBUG:
260 262 DEBUG.info(b"STATUS: %s, %s", r.status, r.reason)
261 263 r._handler = self
262 264 r._host = host
263 265 r._url = req.get_full_url()
264 266 r._connection = h
265 267 r.code = r.status
266 268 r.headers = r.msg
267 269 r.msg = r.reason
268 270
269 271 return r
270 272
271 273 def _reuse_connection(self, h, req, host):
272 274 """start the transaction with a re-used connection
273 275 return a response object (r) upon success or None on failure.
274 276 This DOES not close or remove bad connections in cases where
275 277 it returns. However, if an unexpected exception occurs, it
276 278 will close and remove the connection before re-raising.
277 279 """
278 280 try:
279 281 self._start_transaction(h, req)
280 282 r = h.getresponse()
281 283 # note: just because we got something back doesn't mean it
282 284 # worked. We'll check the version below, too.
283 285 except (socket.error, httplib.HTTPException):
284 286 r = None
285 287 except: # re-raises
286 288 # adding this block just in case we've missed
287 289 # something we will still raise the exception, but
288 290 # lets try and close the connection and remove it
289 291 # first. We previously got into a nasty loop
290 292 # where an exception was uncaught, and so the
291 293 # connection stayed open. On the next try, the
292 294 # same exception was raised, etc. The trade-off is
293 295 # that it's now possible this call will raise
294 296 # a DIFFERENT exception
295 297 if DEBUG:
296 298 DEBUG.error(
297 299 b"unexpected exception - closing connection to %s (%d)",
298 300 host,
299 301 id(h),
300 302 )
301 303 self._cm.remove(h)
302 304 h.close()
303 305 raise
304 306
305 307 if r is None or r.version == 9:
306 308 # httplib falls back to assuming HTTP 0.9 if it gets a
307 309 # bad header back. This is most likely to happen if
308 310 # the socket has been closed by the server since we
309 311 # last used the connection.
310 312 if DEBUG:
311 313 DEBUG.info(
312 314 b"failed to re-use connection to %s (%d)", host, id(h)
313 315 )
314 316 r = None
315 317 else:
316 318 if DEBUG:
317 319 DEBUG.info(b"re-using connection to %s (%d)", host, id(h))
318 320
319 321 return r
320 322
321 323 def _start_transaction(self, h, req):
322 324 oldbytescount = getattr(h, 'sentbytescount', 0)
323 325
324 326 # What follows mostly reimplements HTTPConnection.request()
325 327 # except it adds self.parent.addheaders in the mix and sends headers
326 328 # in a deterministic order (to make testing easier).
327 329 headers = util.sortdict(self.parent.addheaders)
328 330 headers.update(sorted(req.headers.items()))
329 331 headers.update(sorted(req.unredirected_hdrs.items()))
330 332 headers = util.sortdict((n.lower(), v) for n, v in headers.items())
331 333 skipheaders = {}
332 334 for n in ('host', 'accept-encoding'):
333 335 if n in headers:
334 336 skipheaders['skip_' + n.replace('-', '_')] = 1
335 337 try:
336 338 if urllibcompat.hasdata(req):
337 339 data = urllibcompat.getdata(req)
338 340 h.putrequest(
339 341 req.get_method(),
340 342 urllibcompat.getselector(req),
341 343 **skipheaders
342 344 )
343 345 if 'content-type' not in headers:
344 346 h.putheader(
345 347 'Content-type', 'application/x-www-form-urlencoded'
346 348 )
347 349 if 'content-length' not in headers:
348 350 h.putheader('Content-length', '%d' % len(data))
349 351 else:
350 352 h.putrequest(
351 353 req.get_method(),
352 354 urllibcompat.getselector(req),
353 355 **skipheaders
354 356 )
355 357 except socket.error as err:
356 358 raise urlerr.urlerror(err)
357 359 for k, v in headers.items():
358 360 h.putheader(k, v)
359 361 h.endheaders()
360 362 if urllibcompat.hasdata(req):
361 363 h.send(data)
362 364
363 365 # This will fail to record events in case of I/O failure. That's OK.
364 366 self.requestscount += 1
365 367 self.sentbytescount += getattr(h, 'sentbytescount', 0) - oldbytescount
366 368
367 369 try:
368 370 self.parent.requestscount += 1
369 371 self.parent.sentbytescount += (
370 372 getattr(h, 'sentbytescount', 0) - oldbytescount
371 373 )
372 374 except AttributeError:
373 375 pass
374 376
375 377
376 378 class HTTPHandler(KeepAliveHandler, urlreq.httphandler):
377 379 pass
378 380
379 381
380 382 class HTTPResponse(httplib.HTTPResponse):
381 383 # we need to subclass HTTPResponse in order to
382 384 # 1) add readline(), readlines(), and readinto() methods
383 385 # 2) add close_connection() methods
384 386 # 3) add info() and geturl() methods
385 387
386 388 # in order to add readline(), read must be modified to deal with a
387 389 # buffer. example: readline must read a buffer and then spit back
388 390 # one line at a time. The only real alternative is to read one
389 391 # BYTE at a time (ick). Once something has been read, it can't be
390 392 # put back (ok, maybe it can, but that's even uglier than this),
391 393 # so if you THEN do a normal read, you must first take stuff from
392 394 # the buffer.
393 395
394 396 # the read method wraps the original to accommodate buffering,
395 397 # although read() never adds to the buffer.
396 398 # Both readline and readlines have been stolen with almost no
397 399 # modification from socket.py
398 400
399 401 def __init__(self, sock, debuglevel=0, strict=0, method=None):
400 402 httplib.HTTPResponse.__init__(
401 403 self, sock, debuglevel=debuglevel, method=method
402 404 )
403 405 self.fileno = sock.fileno
404 406 self.code = None
405 407 self.receivedbytescount = 0
406 408 self._rbuf = b''
407 409 self._rbufsize = 8096
408 410 self._handler = None # inserted by the handler later
409 411 self._host = None # (same)
410 412 self._url = None # (same)
411 413 self._connection = None # (same)
412 414
413 415 _raw_read = httplib.HTTPResponse.read
414 416 _raw_readinto = getattr(httplib.HTTPResponse, 'readinto', None)
415 417
416 418 # Python 2.7 has a single close() which closes the socket handle.
417 419 # This method was effectively renamed to _close_conn() in Python 3. But
418 420 # there is also a close(). _close_conn() is called by methods like
419 421 # read().
420 422
421 423 def close(self):
422 424 if self.fp:
423 425 self.fp.close()
424 426 self.fp = None
425 427 if self._handler:
426 428 self._handler._request_closed(
427 429 self, self._host, self._connection
428 430 )
429 431
430 432 def _close_conn(self):
431 433 self.close()
432 434
433 435 def close_connection(self):
434 436 self._handler._remove_connection(self._host, self._connection, close=1)
435 437 self.close()
436 438
437 439 def info(self):
438 440 return self.headers
439 441
440 442 def geturl(self):
441 443 return self._url
442 444
443 445 def read(self, amt=None):
444 446 # the _rbuf test is only in this first if for speed. It's not
445 447 # logically necessary
446 448 if self._rbuf and amt is not None:
447 449 L = len(self._rbuf)
448 450 if amt > L:
449 451 amt -= L
450 452 else:
451 453 s = self._rbuf[:amt]
452 454 self._rbuf = self._rbuf[amt:]
453 455 return s
454 456 # Careful! http.client.HTTPResponse.read() on Python 3 is
455 457 # implemented using readinto(), which can duplicate self._rbuf
456 458 # if it's not empty.
457 459 s = self._rbuf
458 460 self._rbuf = b''
459 461 data = self._raw_read(amt)
460 462
461 463 self.receivedbytescount += len(data)
462 464 try:
463 465 self._connection.receivedbytescount += len(data)
464 466 except AttributeError:
465 467 pass
466 468 try:
467 469 self._handler.parent.receivedbytescount += len(data)
468 470 except AttributeError:
469 471 pass
470 472
471 473 s += data
472 474 return s
473 475
474 476 # stolen from Python SVN #68532 to fix issue1088
475 477 def _read_chunked(self, amt):
476 478 chunk_left = self.chunk_left
477 479 parts = []
478 480
479 481 while True:
480 482 if chunk_left is None:
481 483 line = self.fp.readline()
482 484 i = line.find(b';')
483 485 if i >= 0:
484 486 line = line[:i] # strip chunk-extensions
485 487 try:
486 488 chunk_left = int(line, 16)
487 489 except ValueError:
488 490 # close the connection as protocol synchronization is
489 491 # probably lost
490 492 self.close()
491 493 raise httplib.IncompleteRead(b''.join(parts))
492 494 if chunk_left == 0:
493 495 break
494 496 if amt is None:
495 497 parts.append(self._safe_read(chunk_left))
496 498 elif amt < chunk_left:
497 499 parts.append(self._safe_read(amt))
498 500 self.chunk_left = chunk_left - amt
499 501 return b''.join(parts)
500 502 elif amt == chunk_left:
501 503 parts.append(self._safe_read(amt))
502 504 self._safe_read(2) # toss the CRLF at the end of the chunk
503 505 self.chunk_left = None
504 506 return b''.join(parts)
505 507 else:
506 508 parts.append(self._safe_read(chunk_left))
507 509 amt -= chunk_left
508 510
509 511 # we read the whole chunk, get another
510 512 self._safe_read(2) # toss the CRLF at the end of the chunk
511 513 chunk_left = None
512 514
513 515 # read and discard trailer up to the CRLF terminator
514 516 ### note: we shouldn't have any trailers!
515 517 while True:
516 518 line = self.fp.readline()
517 519 if not line:
518 520 # a vanishingly small number of sites EOF without
519 521 # sending the trailer
520 522 break
521 523 if line == b'\r\n':
522 524 break
523 525
524 526 # we read everything; close the "file"
525 527 self.close()
526 528
527 529 return b''.join(parts)
528 530
529 531 def readline(self):
530 532 # Fast path for a line is already available in read buffer.
531 533 i = self._rbuf.find(b'\n')
532 534 if i >= 0:
533 535 i += 1
534 536 line = self._rbuf[:i]
535 537 self._rbuf = self._rbuf[i:]
536 538 return line
537 539
538 540 # No newline in local buffer. Read until we find one.
539 541 # readinto read via readinto will already return _rbuf
540 542 if self._raw_readinto is None:
541 543 chunks = [self._rbuf]
542 544 else:
543 545 chunks = []
544 546 i = -1
545 547 readsize = self._rbufsize
546 548 while True:
547 549 new = self._raw_read(readsize)
548 550 if not new:
549 551 break
550 552
551 553 self.receivedbytescount += len(new)
552 554 self._connection.receivedbytescount += len(new)
553 555 try:
554 556 self._handler.parent.receivedbytescount += len(new)
555 557 except AttributeError:
556 558 pass
557 559
558 560 chunks.append(new)
559 561 i = new.find(b'\n')
560 562 if i >= 0:
561 563 break
562 564
563 565 # We either have exhausted the stream or have a newline in chunks[-1].
564 566
565 567 # EOF
566 568 if i == -1:
567 569 self._rbuf = b''
568 570 return b''.join(chunks)
569 571
570 572 i += 1
571 573 self._rbuf = chunks[-1][i:]
572 574 chunks[-1] = chunks[-1][:i]
573 575 return b''.join(chunks)
574 576
575 577 def readlines(self, sizehint=0):
576 578 total = 0
577 579 list = []
578 580 while True:
579 581 line = self.readline()
580 582 if not line:
581 583 break
582 584 list.append(line)
583 585 total += len(line)
584 586 if sizehint and total >= sizehint:
585 587 break
586 588 return list
587 589
588 590 def readinto(self, dest):
589 591 if self._raw_readinto is None:
590 592 res = self.read(len(dest))
591 593 if not res:
592 594 return 0
593 595 dest[0 : len(res)] = res
594 596 return len(res)
595 597 total = len(dest)
596 598 have = len(self._rbuf)
597 599 if have >= total:
598 600 dest[0:total] = self._rbuf[:total]
599 601 self._rbuf = self._rbuf[total:]
600 602 return total
601 603 mv = memoryview(dest)
602 604 got = self._raw_readinto(mv[have:total])
603 605
604 606 self.receivedbytescount += got
605 607 self._connection.receivedbytescount += got
606 608 try:
607 609 self._handler.receivedbytescount += got
608 610 except AttributeError:
609 611 pass
610 612
611 613 dest[0:have] = self._rbuf
612 614 got += len(self._rbuf)
613 615 self._rbuf = b''
614 616 return got
615 617
616 618
617 619 def safesend(self, str):
618 620 """Send `str' to the server.
619 621
620 622 Shamelessly ripped off from httplib to patch a bad behavior.
621 623 """
622 624 # _broken_pipe_resp is an attribute we set in this function
623 625 # if the socket is closed while we're sending data but
624 626 # the server sent us a response before hanging up.
625 627 # In that case, we want to pretend to send the rest of the
626 628 # outgoing data, and then let the user use getresponse()
627 629 # (which we wrap) to get this last response before
628 630 # opening a new socket.
629 631 if getattr(self, '_broken_pipe_resp', None) is not None:
630 632 return
631 633
632 634 if self.sock is None:
633 635 if self.auto_open:
634 636 self.connect()
635 637 else:
636 638 raise httplib.NotConnected
637 639
638 640 # send the data to the server. if we get a broken pipe, then close
639 641 # the socket. we want to reconnect when somebody tries to send again.
640 642 #
641 643 # NOTE: we DO propagate the error, though, because we cannot simply
642 644 # ignore the error... the caller will know if they can retry.
643 645 if self.debuglevel > 0:
644 646 print(b"send:", repr(str))
645 647 try:
646 648 blocksize = 8192
647 649 read = getattr(str, 'read', None)
648 650 if read is not None:
649 651 if self.debuglevel > 0:
650 652 print(b"sending a read()able")
651 653 data = read(blocksize)
652 654 while data:
653 655 self.sock.sendall(data)
654 656 self.sentbytescount += len(data)
655 657 data = read(blocksize)
656 658 else:
657 659 self.sock.sendall(str)
658 660 self.sentbytescount += len(str)
659 661 except BrokenPipeError:
660 662 if self._HTTPConnection__state == httplib._CS_REQ_SENT:
661 663 self._broken_pipe_resp = None
662 664 self._broken_pipe_resp = self.getresponse()
663 665 reraise = False
664 666 else:
665 667 reraise = True
666 668 self.close()
667 669 if reraise:
668 670 raise
669 671
670 672
671 673 def wrapgetresponse(cls):
672 674 """Wraps getresponse in cls with a broken-pipe sane version."""
673 675
674 676 def safegetresponse(self):
675 677 # In safesend() we might set the _broken_pipe_resp
676 678 # attribute, in which case the socket has already
677 679 # been closed and we just need to give them the response
678 680 # back. Otherwise, we use the normal response path.
679 681 r = getattr(self, '_broken_pipe_resp', None)
680 682 if r is not None:
681 683 return r
682 684 return cls.getresponse(self)
683 685
684 686 safegetresponse.__doc__ = cls.getresponse.__doc__
685 687 return safegetresponse
686 688
687 689
688 690 class HTTPConnection(httplib.HTTPConnection):
689 691 # url.httpsconnection inherits from this. So when adding/removing
690 692 # attributes, be sure to audit httpsconnection() for unintended
691 693 # consequences.
692 694
693 695 # use the modified response class
694 696 response_class = HTTPResponse
695 697 send = safesend
696 698 getresponse = wrapgetresponse(httplib.HTTPConnection)
697 699
698 700 def __init__(self, *args, **kwargs):
699 701 httplib.HTTPConnection.__init__(self, *args, **kwargs)
700 702 self.sentbytescount = 0
701 703 self.receivedbytescount = 0
702 704
703 705
704 706 #########################################################################
705 707 ##### TEST FUNCTIONS
706 708 #########################################################################
707 709
708 710
709 711 def continuity(url):
710 712 md5 = hashlib.md5
711 713 format = b'%25s: %s'
712 714
713 715 # first fetch the file with the normal http handler
714 716 opener = urlreq.buildopener()
715 717 urlreq.installopener(opener)
716 718 fo = urlreq.urlopen(url)
717 719 foo = fo.read()
718 720 fo.close()
719 721 m = md5(foo)
720 722 print(format % (b'normal urllib', hex(m.digest())))
721 723
722 724 # now install the keepalive handler and try again
723 725 opener = urlreq.buildopener(HTTPHandler())
724 726 urlreq.installopener(opener)
725 727
726 728 fo = urlreq.urlopen(url)
727 729 foo = fo.read()
728 730 fo.close()
729 731 m = md5(foo)
730 732 print(format % (b'keepalive read', hex(m.digest())))
731 733
732 734 fo = urlreq.urlopen(url)
733 735 foo = b''
734 736 while True:
735 737 f = fo.readline()
736 738 if f:
737 739 foo = foo + f
738 740 else:
739 741 break
740 742 fo.close()
741 743 m = md5(foo)
742 744 print(format % (b'keepalive readline', hex(m.digest())))
743 745
744 746
745 747 def comp(N, url):
746 748 print(b' making %i connections to:\n %s' % (N, url))
747 749
748 750 procutil.stdout.write(b' first using the normal urllib handlers')
749 751 # first use normal opener
750 752 opener = urlreq.buildopener()
751 753 urlreq.installopener(opener)
752 754 t1 = fetch(N, url)
753 755 print(b' TIME: %.3f s' % t1)
754 756
755 757 procutil.stdout.write(b' now using the keepalive handler ')
756 758 # now install the keepalive handler and try again
757 759 opener = urlreq.buildopener(HTTPHandler())
758 760 urlreq.installopener(opener)
759 761 t2 = fetch(N, url)
760 762 print(b' TIME: %.3f s' % t2)
761 763 print(b' improvement factor: %.2f' % (t1 / t2))
762 764
763 765
764 766 def fetch(N, url, delay=0):
765 767 import time
766 768
767 769 lens = []
768 770 starttime = time.time()
769 771 for i in range(N):
770 772 if delay and i > 0:
771 773 time.sleep(delay)
772 774 fo = urlreq.urlopen(url)
773 775 foo = fo.read()
774 776 fo.close()
775 777 lens.append(len(foo))
776 778 diff = time.time() - starttime
777 779
778 780 j = 0
779 781 for i in lens[1:]:
780 782 j = j + 1
781 783 if not i == lens[0]:
782 784 print(b"WARNING: inconsistent length on read %i: %i" % (j, i))
783 785
784 786 return diff
785 787
786 788
787 789 def test_timeout(url):
788 790 global DEBUG
789 791 dbbackup = DEBUG
790 792
791 793 class FakeLogger:
792 794 def debug(self, msg, *args):
793 795 print(msg % args)
794 796
795 797 info = warning = error = debug
796 798
797 799 DEBUG = FakeLogger()
798 800 print(b" fetching the file to establish a connection")
799 801 fo = urlreq.urlopen(url)
800 802 data1 = fo.read()
801 803 fo.close()
802 804
803 805 i = 20
804 806 print(b" waiting %i seconds for the server to close the connection" % i)
805 807 while i > 0:
806 808 procutil.stdout.write(b'\r %2i' % i)
807 809 procutil.stdout.flush()
808 810 time.sleep(1)
809 811 i -= 1
810 812 procutil.stderr.write(b'\r')
811 813
812 814 print(b" fetching the file a second time")
813 815 fo = urlreq.urlopen(url)
814 816 data2 = fo.read()
815 817 fo.close()
816 818
817 819 if data1 == data2:
818 820 print(b' data are identical')
819 821 else:
820 822 print(b' ERROR: DATA DIFFER')
821 823
822 824 DEBUG = dbbackup
823 825
824 826
825 827 def test(url, N=10):
826 828 print(b"performing continuity test (making sure stuff isn't corrupted)")
827 829 continuity(url)
828 830 print(b'')
829 831 print(b"performing speed comparison")
830 832 comp(N, url)
831 833 print(b'')
832 834 print(b"performing dropped-connection check")
833 835 test_timeout(url)
834 836
835 837
836 838 if __name__ == '__main__':
837 839 import time
838 840
839 841 try:
840 842 N = int(sys.argv[1])
841 843 url = sys.argv[2]
842 844 except (IndexError, ValueError):
843 845 print(b"%s <integer> <url>" % sys.argv[0])
844 846 else:
845 847 test(url, N)
General Comments 0
You need to be logged in to leave comments. Login now