##// END OF EJS Templates
keepalive: remove pycompat.iteritems()...
Gregory Szorc -
r49779:dea766fc default
parent child Browse files
Show More
@@ -1,846 +1,846
1 1 # This library is free software; you can redistribute it and/or
2 2 # modify it under the terms of the GNU Lesser General Public
3 3 # License as published by the Free Software Foundation; either
4 4 # version 2.1 of the License, or (at your option) any later version.
5 5 #
6 6 # This library is distributed in the hope that it will be useful,
7 7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9 9 # Lesser General Public License for more details.
10 10 #
11 11 # You should have received a copy of the GNU Lesser General Public
12 12 # License along with this library; if not, see
13 13 # <http://www.gnu.org/licenses/>.
14 14
15 15 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
16 16 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
17 17
18 18 # Modified by Benoit Boissinot:
19 19 # - fix for digest auth (inspired from urllib2.py @ Python v2.4)
20 20 # Modified by Dirkjan Ochtman:
21 21 # - import md5 function from a local util module
22 22 # Modified by Augie Fackler:
23 23 # - add safesend method and use it to prevent broken pipe errors
24 24 # on large POST requests
25 25
26 26 """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
27 27
28 28 >>> import urllib2
29 29 >>> from keepalive import HTTPHandler
30 30 >>> keepalive_handler = HTTPHandler()
31 31 >>> opener = urlreq.buildopener(keepalive_handler)
32 32 >>> urlreq.installopener(opener)
33 33 >>>
34 34 >>> fo = urlreq.urlopen('http://www.python.org')
35 35
36 36 If a connection to a given host is requested, and all of the existing
37 37 connections are still in use, another connection will be opened. If
38 38 the handler tries to use an existing connection but it fails in some
39 39 way, it will be closed and removed from the pool.
40 40
41 41 To remove the handler, simply re-run build_opener with no arguments, and
42 42 install that opener.
43 43
44 44 You can explicitly close connections by using the close_connection()
45 45 method of the returned file-like object (described below) or you can
46 46 use the handler methods:
47 47
48 48 close_connection(host)
49 49 close_all()
50 50 open_connections()
51 51
52 52 NOTE: using the close_connection and close_all methods of the handler
53 53 should be done with care when using multiple threads.
54 54 * there is nothing that prevents another thread from creating new
55 55 connections immediately after connections are closed
56 56 * no checks are done to prevent in-use connections from being closed
57 57
58 58 >>> keepalive_handler.close_all()
59 59
60 60 EXTRA ATTRIBUTES AND METHODS
61 61
62 62 Upon a status of 200, the object returned has a few additional
63 63 attributes and methods, which should not be used if you want to
64 64 remain consistent with the normal urllib2-returned objects:
65 65
66 66 close_connection() - close the connection to the host
67 67 readlines() - you know, readlines()
68 68 status - the return status (i.e. 404)
69 69 reason - english translation of status (i.e. 'File not found')
70 70
71 71 If you want the best of both worlds, use this inside an
72 72 AttributeError-catching try:
73 73
74 74 >>> try: status = fo.status
75 75 >>> except AttributeError: status = None
76 76
77 77 Unfortunately, these are ONLY there if status == 200, so it's not
78 78 easy to distinguish between non-200 responses. The reason is that
79 79 urllib2 tries to do clever things with error codes 301, 302, 401,
80 80 and 407, and it wraps the object upon return.
81 81 """
82 82
83 83 # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $
84 84
85 85
86 86 import collections
87 87 import errno
88 88 import hashlib
89 89 import socket
90 90 import sys
91 91 import threading
92 92
93 93 from .i18n import _
94 94 from .pycompat import getattr
95 95 from .node import hex
96 96 from . import (
97 97 pycompat,
98 98 urllibcompat,
99 99 util,
100 100 )
101 101 from .utils import procutil
102 102
103 103 httplib = util.httplib
104 104 urlerr = util.urlerr
105 105 urlreq = util.urlreq
106 106
107 107 DEBUG = None
108 108
109 109
110 110 class ConnectionManager(object):
111 111 """
112 112 The connection manager must be able to:
113 113 * keep track of all existing
114 114 """
115 115
116 116 def __init__(self):
117 117 self._lock = threading.Lock()
118 118 self._hostmap = collections.defaultdict(list) # host -> [connection]
119 119 self._connmap = {} # map connections to host
120 120 self._readymap = {} # map connection to ready state
121 121
122 122 def add(self, host, connection, ready):
123 123 self._lock.acquire()
124 124 try:
125 125 self._hostmap[host].append(connection)
126 126 self._connmap[connection] = host
127 127 self._readymap[connection] = ready
128 128 finally:
129 129 self._lock.release()
130 130
131 131 def remove(self, connection):
132 132 self._lock.acquire()
133 133 try:
134 134 try:
135 135 host = self._connmap[connection]
136 136 except KeyError:
137 137 pass
138 138 else:
139 139 del self._connmap[connection]
140 140 del self._readymap[connection]
141 141 self._hostmap[host].remove(connection)
142 142 if not self._hostmap[host]:
143 143 del self._hostmap[host]
144 144 finally:
145 145 self._lock.release()
146 146
147 147 def set_ready(self, connection, ready):
148 148 try:
149 149 self._readymap[connection] = ready
150 150 except KeyError:
151 151 pass
152 152
153 153 def get_ready_conn(self, host):
154 154 conn = None
155 155 self._lock.acquire()
156 156 try:
157 157 for c in self._hostmap[host]:
158 158 if self._readymap[c]:
159 159 self._readymap[c] = False
160 160 conn = c
161 161 break
162 162 finally:
163 163 self._lock.release()
164 164 return conn
165 165
166 166 def get_all(self, host=None):
167 167 if host:
168 168 return list(self._hostmap[host])
169 169 else:
170 170 return dict(self._hostmap)
171 171
172 172
173 173 class KeepAliveHandler(object):
174 174 def __init__(self, timeout=None):
175 175 self._cm = ConnectionManager()
176 176 self._timeout = timeout
177 177 self.requestscount = 0
178 178 self.sentbytescount = 0
179 179
180 180 #### Connection Management
181 181 def open_connections(self):
182 182 """return a list of connected hosts and the number of connections
183 183 to each. [('foo.com:80', 2), ('bar.org', 1)]"""
184 184 return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
185 185
186 186 def close_connection(self, host):
187 187 """close connection(s) to <host>
188 188 host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
189 189 no error occurs if there is no connection to that host."""
190 190 for h in self._cm.get_all(host):
191 191 self._cm.remove(h)
192 192 h.close()
193 193
194 194 def close_all(self):
195 195 """close all open connections"""
196 for host, conns in pycompat.iteritems(self._cm.get_all()):
196 for host, conns in self._cm.get_all().items():
197 197 for h in conns:
198 198 self._cm.remove(h)
199 199 h.close()
200 200
201 201 def _request_closed(self, request, host, connection):
202 202 """tells us that this request is now closed and that the
203 203 connection is ready for another request"""
204 204 self._cm.set_ready(connection, True)
205 205
206 206 def _remove_connection(self, host, connection, close=0):
207 207 if close:
208 208 connection.close()
209 209 self._cm.remove(connection)
210 210
211 211 #### Transaction Execution
212 212 def http_open(self, req):
213 213 return self.do_open(HTTPConnection, req)
214 214
215 215 def do_open(self, http_class, req):
216 216 host = urllibcompat.gethost(req)
217 217 if not host:
218 218 raise urlerr.urlerror(b'no host given')
219 219
220 220 try:
221 221 h = self._cm.get_ready_conn(host)
222 222 while h:
223 223 r = self._reuse_connection(h, req, host)
224 224
225 225 # if this response is non-None, then it worked and we're
226 226 # done. Break out, skipping the else block.
227 227 if r:
228 228 break
229 229
230 230 # connection is bad - possibly closed by server
231 231 # discard it and ask for the next free connection
232 232 h.close()
233 233 self._cm.remove(h)
234 234 h = self._cm.get_ready_conn(host)
235 235 else:
236 236 # no (working) free connections were found. Create a new one.
237 237 h = http_class(host, timeout=self._timeout)
238 238 if DEBUG:
239 239 DEBUG.info(
240 240 b"creating new connection to %s (%d)", host, id(h)
241 241 )
242 242 self._cm.add(host, h, False)
243 243 self._start_transaction(h, req)
244 244 r = h.getresponse()
245 245 # The string form of BadStatusLine is the status line. Add some context
246 246 # to make the error message slightly more useful.
247 247 except httplib.BadStatusLine as err:
248 248 raise urlerr.urlerror(
249 249 _(b'bad HTTP status line: %s') % pycompat.sysbytes(err.line)
250 250 )
251 251 except (socket.error, httplib.HTTPException) as err:
252 252 raise urlerr.urlerror(err)
253 253
254 254 # If not a persistent connection, don't try to reuse it. Look
255 255 # for this using getattr() since vcr doesn't define this
256 256 # attribute, and in that case always close the connection.
257 257 if getattr(r, 'will_close', True):
258 258 self._cm.remove(h)
259 259
260 260 if DEBUG:
261 261 DEBUG.info(b"STATUS: %s, %s", r.status, r.reason)
262 262 r._handler = self
263 263 r._host = host
264 264 r._url = req.get_full_url()
265 265 r._connection = h
266 266 r.code = r.status
267 267 r.headers = r.msg
268 268 r.msg = r.reason
269 269
270 270 return r
271 271
272 272 def _reuse_connection(self, h, req, host):
273 273 """start the transaction with a re-used connection
274 274 return a response object (r) upon success or None on failure.
275 275 This DOES not close or remove bad connections in cases where
276 276 it returns. However, if an unexpected exception occurs, it
277 277 will close and remove the connection before re-raising.
278 278 """
279 279 try:
280 280 self._start_transaction(h, req)
281 281 r = h.getresponse()
282 282 # note: just because we got something back doesn't mean it
283 283 # worked. We'll check the version below, too.
284 284 except (socket.error, httplib.HTTPException):
285 285 r = None
286 286 except: # re-raises
287 287 # adding this block just in case we've missed
288 288 # something we will still raise the exception, but
289 289 # lets try and close the connection and remove it
290 290 # first. We previously got into a nasty loop
291 291 # where an exception was uncaught, and so the
292 292 # connection stayed open. On the next try, the
293 293 # same exception was raised, etc. The trade-off is
294 294 # that it's now possible this call will raise
295 295 # a DIFFERENT exception
296 296 if DEBUG:
297 297 DEBUG.error(
298 298 b"unexpected exception - closing connection to %s (%d)",
299 299 host,
300 300 id(h),
301 301 )
302 302 self._cm.remove(h)
303 303 h.close()
304 304 raise
305 305
306 306 if r is None or r.version == 9:
307 307 # httplib falls back to assuming HTTP 0.9 if it gets a
308 308 # bad header back. This is most likely to happen if
309 309 # the socket has been closed by the server since we
310 310 # last used the connection.
311 311 if DEBUG:
312 312 DEBUG.info(
313 313 b"failed to re-use connection to %s (%d)", host, id(h)
314 314 )
315 315 r = None
316 316 else:
317 317 if DEBUG:
318 318 DEBUG.info(b"re-using connection to %s (%d)", host, id(h))
319 319
320 320 return r
321 321
322 322 def _start_transaction(self, h, req):
323 323 oldbytescount = getattr(h, 'sentbytescount', 0)
324 324
325 325 # What follows mostly reimplements HTTPConnection.request()
326 326 # except it adds self.parent.addheaders in the mix and sends headers
327 327 # in a deterministic order (to make testing easier).
328 328 headers = util.sortdict(self.parent.addheaders)
329 329 headers.update(sorted(req.headers.items()))
330 330 headers.update(sorted(req.unredirected_hdrs.items()))
331 331 headers = util.sortdict((n.lower(), v) for n, v in headers.items())
332 332 skipheaders = {}
333 333 for n in ('host', 'accept-encoding'):
334 334 if n in headers:
335 335 skipheaders['skip_' + n.replace('-', '_')] = 1
336 336 try:
337 337 if urllibcompat.hasdata(req):
338 338 data = urllibcompat.getdata(req)
339 339 h.putrequest(
340 340 req.get_method(),
341 341 urllibcompat.getselector(req),
342 342 **skipheaders
343 343 )
344 344 if 'content-type' not in headers:
345 345 h.putheader(
346 346 'Content-type', 'application/x-www-form-urlencoded'
347 347 )
348 348 if 'content-length' not in headers:
349 349 h.putheader('Content-length', '%d' % len(data))
350 350 else:
351 351 h.putrequest(
352 352 req.get_method(),
353 353 urllibcompat.getselector(req),
354 354 **skipheaders
355 355 )
356 356 except socket.error as err:
357 357 raise urlerr.urlerror(err)
358 358 for k, v in headers.items():
359 359 h.putheader(k, v)
360 360 h.endheaders()
361 361 if urllibcompat.hasdata(req):
362 362 h.send(data)
363 363
364 364 # This will fail to record events in case of I/O failure. That's OK.
365 365 self.requestscount += 1
366 366 self.sentbytescount += getattr(h, 'sentbytescount', 0) - oldbytescount
367 367
368 368 try:
369 369 self.parent.requestscount += 1
370 370 self.parent.sentbytescount += (
371 371 getattr(h, 'sentbytescount', 0) - oldbytescount
372 372 )
373 373 except AttributeError:
374 374 pass
375 375
376 376
377 377 class HTTPHandler(KeepAliveHandler, urlreq.httphandler):
378 378 pass
379 379
380 380
381 381 class HTTPResponse(httplib.HTTPResponse):
382 382 # we need to subclass HTTPResponse in order to
383 383 # 1) add readline(), readlines(), and readinto() methods
384 384 # 2) add close_connection() methods
385 385 # 3) add info() and geturl() methods
386 386
387 387 # in order to add readline(), read must be modified to deal with a
388 388 # buffer. example: readline must read a buffer and then spit back
389 389 # one line at a time. The only real alternative is to read one
390 390 # BYTE at a time (ick). Once something has been read, it can't be
391 391 # put back (ok, maybe it can, but that's even uglier than this),
392 392 # so if you THEN do a normal read, you must first take stuff from
393 393 # the buffer.
394 394
395 395 # the read method wraps the original to accommodate buffering,
396 396 # although read() never adds to the buffer.
397 397 # Both readline and readlines have been stolen with almost no
398 398 # modification from socket.py
399 399
400 400 def __init__(self, sock, debuglevel=0, strict=0, method=None):
401 401 httplib.HTTPResponse.__init__(
402 402 self, sock, debuglevel=debuglevel, method=method
403 403 )
404 404 self.fileno = sock.fileno
405 405 self.code = None
406 406 self.receivedbytescount = 0
407 407 self._rbuf = b''
408 408 self._rbufsize = 8096
409 409 self._handler = None # inserted by the handler later
410 410 self._host = None # (same)
411 411 self._url = None # (same)
412 412 self._connection = None # (same)
413 413
414 414 _raw_read = httplib.HTTPResponse.read
415 415 _raw_readinto = getattr(httplib.HTTPResponse, 'readinto', None)
416 416
417 417 # Python 2.7 has a single close() which closes the socket handle.
418 418 # This method was effectively renamed to _close_conn() in Python 3. But
419 419 # there is also a close(). _close_conn() is called by methods like
420 420 # read().
421 421
422 422 def close(self):
423 423 if self.fp:
424 424 self.fp.close()
425 425 self.fp = None
426 426 if self._handler:
427 427 self._handler._request_closed(
428 428 self, self._host, self._connection
429 429 )
430 430
431 431 def _close_conn(self):
432 432 self.close()
433 433
434 434 def close_connection(self):
435 435 self._handler._remove_connection(self._host, self._connection, close=1)
436 436 self.close()
437 437
438 438 def info(self):
439 439 return self.headers
440 440
441 441 def geturl(self):
442 442 return self._url
443 443
444 444 def read(self, amt=None):
445 445 # the _rbuf test is only in this first if for speed. It's not
446 446 # logically necessary
447 447 if self._rbuf and amt is not None:
448 448 L = len(self._rbuf)
449 449 if amt > L:
450 450 amt -= L
451 451 else:
452 452 s = self._rbuf[:amt]
453 453 self._rbuf = self._rbuf[amt:]
454 454 return s
455 455 # Careful! http.client.HTTPResponse.read() on Python 3 is
456 456 # implemented using readinto(), which can duplicate self._rbuf
457 457 # if it's not empty.
458 458 s = self._rbuf
459 459 self._rbuf = b''
460 460 data = self._raw_read(amt)
461 461
462 462 self.receivedbytescount += len(data)
463 463 try:
464 464 self._connection.receivedbytescount += len(data)
465 465 except AttributeError:
466 466 pass
467 467 try:
468 468 self._handler.parent.receivedbytescount += len(data)
469 469 except AttributeError:
470 470 pass
471 471
472 472 s += data
473 473 return s
474 474
475 475 # stolen from Python SVN #68532 to fix issue1088
476 476 def _read_chunked(self, amt):
477 477 chunk_left = self.chunk_left
478 478 parts = []
479 479
480 480 while True:
481 481 if chunk_left is None:
482 482 line = self.fp.readline()
483 483 i = line.find(b';')
484 484 if i >= 0:
485 485 line = line[:i] # strip chunk-extensions
486 486 try:
487 487 chunk_left = int(line, 16)
488 488 except ValueError:
489 489 # close the connection as protocol synchronization is
490 490 # probably lost
491 491 self.close()
492 492 raise httplib.IncompleteRead(b''.join(parts))
493 493 if chunk_left == 0:
494 494 break
495 495 if amt is None:
496 496 parts.append(self._safe_read(chunk_left))
497 497 elif amt < chunk_left:
498 498 parts.append(self._safe_read(amt))
499 499 self.chunk_left = chunk_left - amt
500 500 return b''.join(parts)
501 501 elif amt == chunk_left:
502 502 parts.append(self._safe_read(amt))
503 503 self._safe_read(2) # toss the CRLF at the end of the chunk
504 504 self.chunk_left = None
505 505 return b''.join(parts)
506 506 else:
507 507 parts.append(self._safe_read(chunk_left))
508 508 amt -= chunk_left
509 509
510 510 # we read the whole chunk, get another
511 511 self._safe_read(2) # toss the CRLF at the end of the chunk
512 512 chunk_left = None
513 513
514 514 # read and discard trailer up to the CRLF terminator
515 515 ### note: we shouldn't have any trailers!
516 516 while True:
517 517 line = self.fp.readline()
518 518 if not line:
519 519 # a vanishingly small number of sites EOF without
520 520 # sending the trailer
521 521 break
522 522 if line == b'\r\n':
523 523 break
524 524
525 525 # we read everything; close the "file"
526 526 self.close()
527 527
528 528 return b''.join(parts)
529 529
530 530 def readline(self):
531 531 # Fast path for a line is already available in read buffer.
532 532 i = self._rbuf.find(b'\n')
533 533 if i >= 0:
534 534 i += 1
535 535 line = self._rbuf[:i]
536 536 self._rbuf = self._rbuf[i:]
537 537 return line
538 538
539 539 # No newline in local buffer. Read until we find one.
540 540 # readinto read via readinto will already return _rbuf
541 541 if self._raw_readinto is None:
542 542 chunks = [self._rbuf]
543 543 else:
544 544 chunks = []
545 545 i = -1
546 546 readsize = self._rbufsize
547 547 while True:
548 548 new = self._raw_read(readsize)
549 549 if not new:
550 550 break
551 551
552 552 self.receivedbytescount += len(new)
553 553 self._connection.receivedbytescount += len(new)
554 554 try:
555 555 self._handler.parent.receivedbytescount += len(new)
556 556 except AttributeError:
557 557 pass
558 558
559 559 chunks.append(new)
560 560 i = new.find(b'\n')
561 561 if i >= 0:
562 562 break
563 563
564 564 # We either have exhausted the stream or have a newline in chunks[-1].
565 565
566 566 # EOF
567 567 if i == -1:
568 568 self._rbuf = b''
569 569 return b''.join(chunks)
570 570
571 571 i += 1
572 572 self._rbuf = chunks[-1][i:]
573 573 chunks[-1] = chunks[-1][:i]
574 574 return b''.join(chunks)
575 575
576 576 def readlines(self, sizehint=0):
577 577 total = 0
578 578 list = []
579 579 while True:
580 580 line = self.readline()
581 581 if not line:
582 582 break
583 583 list.append(line)
584 584 total += len(line)
585 585 if sizehint and total >= sizehint:
586 586 break
587 587 return list
588 588
589 589 def readinto(self, dest):
590 590 if self._raw_readinto is None:
591 591 res = self.read(len(dest))
592 592 if not res:
593 593 return 0
594 594 dest[0 : len(res)] = res
595 595 return len(res)
596 596 total = len(dest)
597 597 have = len(self._rbuf)
598 598 if have >= total:
599 599 dest[0:total] = self._rbuf[:total]
600 600 self._rbuf = self._rbuf[total:]
601 601 return total
602 602 mv = memoryview(dest)
603 603 got = self._raw_readinto(mv[have:total])
604 604
605 605 self.receivedbytescount += got
606 606 self._connection.receivedbytescount += got
607 607 try:
608 608 self._handler.receivedbytescount += got
609 609 except AttributeError:
610 610 pass
611 611
612 612 dest[0:have] = self._rbuf
613 613 got += len(self._rbuf)
614 614 self._rbuf = b''
615 615 return got
616 616
617 617
618 618 def safesend(self, str):
619 619 """Send `str' to the server.
620 620
621 621 Shamelessly ripped off from httplib to patch a bad behavior.
622 622 """
623 623 # _broken_pipe_resp is an attribute we set in this function
624 624 # if the socket is closed while we're sending data but
625 625 # the server sent us a response before hanging up.
626 626 # In that case, we want to pretend to send the rest of the
627 627 # outgoing data, and then let the user use getresponse()
628 628 # (which we wrap) to get this last response before
629 629 # opening a new socket.
630 630 if getattr(self, '_broken_pipe_resp', None) is not None:
631 631 return
632 632
633 633 if self.sock is None:
634 634 if self.auto_open:
635 635 self.connect()
636 636 else:
637 637 raise httplib.NotConnected
638 638
639 639 # send the data to the server. if we get a broken pipe, then close
640 640 # the socket. we want to reconnect when somebody tries to send again.
641 641 #
642 642 # NOTE: we DO propagate the error, though, because we cannot simply
643 643 # ignore the error... the caller will know if they can retry.
644 644 if self.debuglevel > 0:
645 645 print(b"send:", repr(str))
646 646 try:
647 647 blocksize = 8192
648 648 read = getattr(str, 'read', None)
649 649 if read is not None:
650 650 if self.debuglevel > 0:
651 651 print(b"sending a read()able")
652 652 data = read(blocksize)
653 653 while data:
654 654 self.sock.sendall(data)
655 655 self.sentbytescount += len(data)
656 656 data = read(blocksize)
657 657 else:
658 658 self.sock.sendall(str)
659 659 self.sentbytescount += len(str)
660 660 except socket.error as v:
661 661 reraise = True
662 662 if v.args[0] == errno.EPIPE: # Broken pipe
663 663 if self._HTTPConnection__state == httplib._CS_REQ_SENT:
664 664 self._broken_pipe_resp = None
665 665 self._broken_pipe_resp = self.getresponse()
666 666 reraise = False
667 667 self.close()
668 668 if reraise:
669 669 raise
670 670
671 671
672 672 def wrapgetresponse(cls):
673 673 """Wraps getresponse in cls with a broken-pipe sane version."""
674 674
675 675 def safegetresponse(self):
676 676 # In safesend() we might set the _broken_pipe_resp
677 677 # attribute, in which case the socket has already
678 678 # been closed and we just need to give them the response
679 679 # back. Otherwise, we use the normal response path.
680 680 r = getattr(self, '_broken_pipe_resp', None)
681 681 if r is not None:
682 682 return r
683 683 return cls.getresponse(self)
684 684
685 685 safegetresponse.__doc__ = cls.getresponse.__doc__
686 686 return safegetresponse
687 687
688 688
689 689 class HTTPConnection(httplib.HTTPConnection):
690 690 # url.httpsconnection inherits from this. So when adding/removing
691 691 # attributes, be sure to audit httpsconnection() for unintended
692 692 # consequences.
693 693
694 694 # use the modified response class
695 695 response_class = HTTPResponse
696 696 send = safesend
697 697 getresponse = wrapgetresponse(httplib.HTTPConnection)
698 698
699 699 def __init__(self, *args, **kwargs):
700 700 httplib.HTTPConnection.__init__(self, *args, **kwargs)
701 701 self.sentbytescount = 0
702 702 self.receivedbytescount = 0
703 703
704 704
705 705 #########################################################################
706 706 ##### TEST FUNCTIONS
707 707 #########################################################################
708 708
709 709
710 710 def continuity(url):
711 711 md5 = hashlib.md5
712 712 format = b'%25s: %s'
713 713
714 714 # first fetch the file with the normal http handler
715 715 opener = urlreq.buildopener()
716 716 urlreq.installopener(opener)
717 717 fo = urlreq.urlopen(url)
718 718 foo = fo.read()
719 719 fo.close()
720 720 m = md5(foo)
721 721 print(format % (b'normal urllib', hex(m.digest())))
722 722
723 723 # now install the keepalive handler and try again
724 724 opener = urlreq.buildopener(HTTPHandler())
725 725 urlreq.installopener(opener)
726 726
727 727 fo = urlreq.urlopen(url)
728 728 foo = fo.read()
729 729 fo.close()
730 730 m = md5(foo)
731 731 print(format % (b'keepalive read', hex(m.digest())))
732 732
733 733 fo = urlreq.urlopen(url)
734 734 foo = b''
735 735 while True:
736 736 f = fo.readline()
737 737 if f:
738 738 foo = foo + f
739 739 else:
740 740 break
741 741 fo.close()
742 742 m = md5(foo)
743 743 print(format % (b'keepalive readline', hex(m.digest())))
744 744
745 745
746 746 def comp(N, url):
747 747 print(b' making %i connections to:\n %s' % (N, url))
748 748
749 749 procutil.stdout.write(b' first using the normal urllib handlers')
750 750 # first use normal opener
751 751 opener = urlreq.buildopener()
752 752 urlreq.installopener(opener)
753 753 t1 = fetch(N, url)
754 754 print(b' TIME: %.3f s' % t1)
755 755
756 756 procutil.stdout.write(b' now using the keepalive handler ')
757 757 # now install the keepalive handler and try again
758 758 opener = urlreq.buildopener(HTTPHandler())
759 759 urlreq.installopener(opener)
760 760 t2 = fetch(N, url)
761 761 print(b' TIME: %.3f s' % t2)
762 762 print(b' improvement factor: %.2f' % (t1 / t2))
763 763
764 764
765 765 def fetch(N, url, delay=0):
766 766 import time
767 767
768 768 lens = []
769 769 starttime = time.time()
770 770 for i in range(N):
771 771 if delay and i > 0:
772 772 time.sleep(delay)
773 773 fo = urlreq.urlopen(url)
774 774 foo = fo.read()
775 775 fo.close()
776 776 lens.append(len(foo))
777 777 diff = time.time() - starttime
778 778
779 779 j = 0
780 780 for i in lens[1:]:
781 781 j = j + 1
782 782 if not i == lens[0]:
783 783 print(b"WARNING: inconsistent length on read %i: %i" % (j, i))
784 784
785 785 return diff
786 786
787 787
788 788 def test_timeout(url):
789 789 global DEBUG
790 790 dbbackup = DEBUG
791 791
792 792 class FakeLogger(object):
793 793 def debug(self, msg, *args):
794 794 print(msg % args)
795 795
796 796 info = warning = error = debug
797 797
798 798 DEBUG = FakeLogger()
799 799 print(b" fetching the file to establish a connection")
800 800 fo = urlreq.urlopen(url)
801 801 data1 = fo.read()
802 802 fo.close()
803 803
804 804 i = 20
805 805 print(b" waiting %i seconds for the server to close the connection" % i)
806 806 while i > 0:
807 807 procutil.stdout.write(b'\r %2i' % i)
808 808 procutil.stdout.flush()
809 809 time.sleep(1)
810 810 i -= 1
811 811 procutil.stderr.write(b'\r')
812 812
813 813 print(b" fetching the file a second time")
814 814 fo = urlreq.urlopen(url)
815 815 data2 = fo.read()
816 816 fo.close()
817 817
818 818 if data1 == data2:
819 819 print(b' data are identical')
820 820 else:
821 821 print(b' ERROR: DATA DIFFER')
822 822
823 823 DEBUG = dbbackup
824 824
825 825
826 826 def test(url, N=10):
827 827 print(b"performing continuity test (making sure stuff isn't corrupted)")
828 828 continuity(url)
829 829 print(b'')
830 830 print(b"performing speed comparison")
831 831 comp(N, url)
832 832 print(b'')
833 833 print(b"performing dropped-connection check")
834 834 test_timeout(url)
835 835
836 836
837 837 if __name__ == '__main__':
838 838 import time
839 839
840 840 try:
841 841 N = int(sys.argv[1])
842 842 url = sys.argv[2]
843 843 except (IndexError, ValueError):
844 844 print(b"%s <integer> <url>" % sys.argv[0])
845 845 else:
846 846 test(url, N)
General Comments 0
You need to be logged in to leave comments. Login now