##// END OF EJS Templates
keepalive: Do not append _rbuf if _raw_readinto exists (issue6356)...
Cédric Krier -
r45930:49f8ba4f stable
parent child Browse files
Show More
@@ -1,848 +1,852
1 1 # This library is free software; you can redistribute it and/or
2 2 # modify it under the terms of the GNU Lesser General Public
3 3 # License as published by the Free Software Foundation; either
4 4 # version 2.1 of the License, or (at your option) any later version.
5 5 #
6 6 # This library is distributed in the hope that it will be useful,
7 7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9 9 # Lesser General Public License for more details.
10 10 #
11 11 # You should have received a copy of the GNU Lesser General Public
12 12 # License along with this library; if not, see
13 13 # <http://www.gnu.org/licenses/>.
14 14
15 15 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
16 16 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
17 17
18 18 # Modified by Benoit Boissinot:
19 19 # - fix for digest auth (inspired from urllib2.py @ Python v2.4)
20 20 # Modified by Dirkjan Ochtman:
21 21 # - import md5 function from a local util module
22 22 # Modified by Augie Fackler:
23 23 # - add safesend method and use it to prevent broken pipe errors
24 24 # on large POST requests
25 25
26 26 """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
27 27
28 28 >>> import urllib2
29 29 >>> from keepalive import HTTPHandler
30 30 >>> keepalive_handler = HTTPHandler()
31 31 >>> opener = urlreq.buildopener(keepalive_handler)
32 32 >>> urlreq.installopener(opener)
33 33 >>>
34 34 >>> fo = urlreq.urlopen('http://www.python.org')
35 35
36 36 If a connection to a given host is requested, and all of the existing
37 37 connections are still in use, another connection will be opened. If
38 38 the handler tries to use an existing connection but it fails in some
39 39 way, it will be closed and removed from the pool.
40 40
41 41 To remove the handler, simply re-run build_opener with no arguments, and
42 42 install that opener.
43 43
44 44 You can explicitly close connections by using the close_connection()
45 45 method of the returned file-like object (described below) or you can
46 46 use the handler methods:
47 47
48 48 close_connection(host)
49 49 close_all()
50 50 open_connections()
51 51
52 52 NOTE: using the close_connection and close_all methods of the handler
53 53 should be done with care when using multiple threads.
54 54 * there is nothing that prevents another thread from creating new
55 55 connections immediately after connections are closed
56 56 * no checks are done to prevent in-use connections from being closed
57 57
58 58 >>> keepalive_handler.close_all()
59 59
60 60 EXTRA ATTRIBUTES AND METHODS
61 61
62 62 Upon a status of 200, the object returned has a few additional
63 63 attributes and methods, which should not be used if you want to
64 64 remain consistent with the normal urllib2-returned objects:
65 65
66 66 close_connection() - close the connection to the host
67 67 readlines() - you know, readlines()
68 68 status - the return status (i.e. 404)
69 69 reason - english translation of status (i.e. 'File not found')
70 70
71 71 If you want the best of both worlds, use this inside an
72 72 AttributeError-catching try:
73 73
74 74 >>> try: status = fo.status
75 75 >>> except AttributeError: status = None
76 76
77 77 Unfortunately, these are ONLY there if status == 200, so it's not
78 78 easy to distinguish between non-200 responses. The reason is that
79 79 urllib2 tries to do clever things with error codes 301, 302, 401,
80 80 and 407, and it wraps the object upon return.
81 81 """
82 82
83 83 # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $
84 84
85 85 from __future__ import absolute_import, print_function
86 86
87 87 import collections
88 88 import errno
89 89 import hashlib
90 90 import socket
91 91 import sys
92 92 import threading
93 93
94 94 from .i18n import _
95 95 from .pycompat import getattr
96 96 from . import (
97 97 node,
98 98 pycompat,
99 99 urllibcompat,
100 100 util,
101 101 )
102 102 from .utils import procutil
103 103
104 104 httplib = util.httplib
105 105 urlerr = util.urlerr
106 106 urlreq = util.urlreq
107 107
108 108 DEBUG = None
109 109
110 110
111 111 class ConnectionManager(object):
112 112 """
113 113 The connection manager must be able to:
114 114 * keep track of all existing
115 115 """
116 116
117 117 def __init__(self):
118 118 self._lock = threading.Lock()
119 119 self._hostmap = collections.defaultdict(list) # host -> [connection]
120 120 self._connmap = {} # map connections to host
121 121 self._readymap = {} # map connection to ready state
122 122
123 123 def add(self, host, connection, ready):
124 124 self._lock.acquire()
125 125 try:
126 126 self._hostmap[host].append(connection)
127 127 self._connmap[connection] = host
128 128 self._readymap[connection] = ready
129 129 finally:
130 130 self._lock.release()
131 131
132 132 def remove(self, connection):
133 133 self._lock.acquire()
134 134 try:
135 135 try:
136 136 host = self._connmap[connection]
137 137 except KeyError:
138 138 pass
139 139 else:
140 140 del self._connmap[connection]
141 141 del self._readymap[connection]
142 142 self._hostmap[host].remove(connection)
143 143 if not self._hostmap[host]:
144 144 del self._hostmap[host]
145 145 finally:
146 146 self._lock.release()
147 147
148 148 def set_ready(self, connection, ready):
149 149 try:
150 150 self._readymap[connection] = ready
151 151 except KeyError:
152 152 pass
153 153
154 154 def get_ready_conn(self, host):
155 155 conn = None
156 156 self._lock.acquire()
157 157 try:
158 158 for c in self._hostmap[host]:
159 159 if self._readymap[c]:
160 160 self._readymap[c] = False
161 161 conn = c
162 162 break
163 163 finally:
164 164 self._lock.release()
165 165 return conn
166 166
167 167 def get_all(self, host=None):
168 168 if host:
169 169 return list(self._hostmap[host])
170 170 else:
171 171 return dict(self._hostmap)
172 172
173 173
174 174 class KeepAliveHandler(object):
175 175 def __init__(self, timeout=None):
176 176 self._cm = ConnectionManager()
177 177 self._timeout = timeout
178 178 self.requestscount = 0
179 179 self.sentbytescount = 0
180 180
181 181 #### Connection Management
182 182 def open_connections(self):
183 183 """return a list of connected hosts and the number of connections
184 184 to each. [('foo.com:80', 2), ('bar.org', 1)]"""
185 185 return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
186 186
187 187 def close_connection(self, host):
188 188 """close connection(s) to <host>
189 189 host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
190 190 no error occurs if there is no connection to that host."""
191 191 for h in self._cm.get_all(host):
192 192 self._cm.remove(h)
193 193 h.close()
194 194
195 195 def close_all(self):
196 196 """close all open connections"""
197 197 for host, conns in pycompat.iteritems(self._cm.get_all()):
198 198 for h in conns:
199 199 self._cm.remove(h)
200 200 h.close()
201 201
202 202 def _request_closed(self, request, host, connection):
203 203 """tells us that this request is now closed and that the
204 204 connection is ready for another request"""
205 205 self._cm.set_ready(connection, True)
206 206
207 207 def _remove_connection(self, host, connection, close=0):
208 208 if close:
209 209 connection.close()
210 210 self._cm.remove(connection)
211 211
212 212 #### Transaction Execution
213 213 def http_open(self, req):
214 214 return self.do_open(HTTPConnection, req)
215 215
216 216 def do_open(self, http_class, req):
217 217 host = urllibcompat.gethost(req)
218 218 if not host:
219 219 raise urlerr.urlerror(b'no host given')
220 220
221 221 try:
222 222 h = self._cm.get_ready_conn(host)
223 223 while h:
224 224 r = self._reuse_connection(h, req, host)
225 225
226 226 # if this response is non-None, then it worked and we're
227 227 # done. Break out, skipping the else block.
228 228 if r:
229 229 break
230 230
231 231 # connection is bad - possibly closed by server
232 232 # discard it and ask for the next free connection
233 233 h.close()
234 234 self._cm.remove(h)
235 235 h = self._cm.get_ready_conn(host)
236 236 else:
237 237 # no (working) free connections were found. Create a new one.
238 238 h = http_class(host, timeout=self._timeout)
239 239 if DEBUG:
240 240 DEBUG.info(
241 241 b"creating new connection to %s (%d)", host, id(h)
242 242 )
243 243 self._cm.add(host, h, False)
244 244 self._start_transaction(h, req)
245 245 r = h.getresponse()
246 246 # The string form of BadStatusLine is the status line. Add some context
247 247 # to make the error message slightly more useful.
248 248 except httplib.BadStatusLine as err:
249 249 raise urlerr.urlerror(
250 250 _(b'bad HTTP status line: %s') % pycompat.sysbytes(err.line)
251 251 )
252 252 except (socket.error, httplib.HTTPException) as err:
253 253 raise urlerr.urlerror(err)
254 254
255 255 # If not a persistent connection, don't try to reuse it. Look
256 256 # for this using getattr() since vcr doesn't define this
257 257 # attribute, and in that case always close the connection.
258 258 if getattr(r, 'will_close', True):
259 259 self._cm.remove(h)
260 260
261 261 if DEBUG:
262 262 DEBUG.info(b"STATUS: %s, %s", r.status, r.reason)
263 263 r._handler = self
264 264 r._host = host
265 265 r._url = req.get_full_url()
266 266 r._connection = h
267 267 r.code = r.status
268 268 r.headers = r.msg
269 269 r.msg = r.reason
270 270
271 271 return r
272 272
273 273 def _reuse_connection(self, h, req, host):
274 274 """start the transaction with a re-used connection
275 275 return a response object (r) upon success or None on failure.
276 276 This DOES not close or remove bad connections in cases where
277 277 it returns. However, if an unexpected exception occurs, it
278 278 will close and remove the connection before re-raising.
279 279 """
280 280 try:
281 281 self._start_transaction(h, req)
282 282 r = h.getresponse()
283 283 # note: just because we got something back doesn't mean it
284 284 # worked. We'll check the version below, too.
285 285 except (socket.error, httplib.HTTPException):
286 286 r = None
287 287 except: # re-raises
288 288 # adding this block just in case we've missed
289 289 # something we will still raise the exception, but
290 290 # lets try and close the connection and remove it
291 291 # first. We previously got into a nasty loop
292 292 # where an exception was uncaught, and so the
293 293 # connection stayed open. On the next try, the
294 294 # same exception was raised, etc. The trade-off is
295 295 # that it's now possible this call will raise
296 296 # a DIFFERENT exception
297 297 if DEBUG:
298 298 DEBUG.error(
299 299 b"unexpected exception - closing connection to %s (%d)",
300 300 host,
301 301 id(h),
302 302 )
303 303 self._cm.remove(h)
304 304 h.close()
305 305 raise
306 306
307 307 if r is None or r.version == 9:
308 308 # httplib falls back to assuming HTTP 0.9 if it gets a
309 309 # bad header back. This is most likely to happen if
310 310 # the socket has been closed by the server since we
311 311 # last used the connection.
312 312 if DEBUG:
313 313 DEBUG.info(
314 314 b"failed to re-use connection to %s (%d)", host, id(h)
315 315 )
316 316 r = None
317 317 else:
318 318 if DEBUG:
319 319 DEBUG.info(b"re-using connection to %s (%d)", host, id(h))
320 320
321 321 return r
322 322
323 323 def _start_transaction(self, h, req):
324 324 oldbytescount = getattr(h, 'sentbytescount', 0)
325 325
326 326 # What follows mostly reimplements HTTPConnection.request()
327 327 # except it adds self.parent.addheaders in the mix and sends headers
328 328 # in a deterministic order (to make testing easier).
329 329 headers = util.sortdict(self.parent.addheaders)
330 330 headers.update(sorted(req.headers.items()))
331 331 headers.update(sorted(req.unredirected_hdrs.items()))
332 332 headers = util.sortdict((n.lower(), v) for n, v in headers.items())
333 333 skipheaders = {}
334 334 for n in ('host', 'accept-encoding'):
335 335 if n in headers:
336 336 skipheaders['skip_' + n.replace('-', '_')] = 1
337 337 try:
338 338 if urllibcompat.hasdata(req):
339 339 data = urllibcompat.getdata(req)
340 340 h.putrequest(
341 341 req.get_method(),
342 342 urllibcompat.getselector(req),
343 343 **skipheaders
344 344 )
345 345 if 'content-type' not in headers:
346 346 h.putheader(
347 347 'Content-type', 'application/x-www-form-urlencoded'
348 348 )
349 349 if 'content-length' not in headers:
350 350 h.putheader('Content-length', '%d' % len(data))
351 351 else:
352 352 h.putrequest(
353 353 req.get_method(),
354 354 urllibcompat.getselector(req),
355 355 **skipheaders
356 356 )
357 357 except socket.error as err:
358 358 raise urlerr.urlerror(err)
359 359 for k, v in headers.items():
360 360 h.putheader(k, v)
361 361 h.endheaders()
362 362 if urllibcompat.hasdata(req):
363 363 h.send(data)
364 364
365 365 # This will fail to record events in case of I/O failure. That's OK.
366 366 self.requestscount += 1
367 367 self.sentbytescount += getattr(h, 'sentbytescount', 0) - oldbytescount
368 368
369 369 try:
370 370 self.parent.requestscount += 1
371 371 self.parent.sentbytescount += (
372 372 getattr(h, 'sentbytescount', 0) - oldbytescount
373 373 )
374 374 except AttributeError:
375 375 pass
376 376
377 377
378 378 class HTTPHandler(KeepAliveHandler, urlreq.httphandler):
379 379 pass
380 380
381 381
382 382 class HTTPResponse(httplib.HTTPResponse):
383 383 # we need to subclass HTTPResponse in order to
384 384 # 1) add readline(), readlines(), and readinto() methods
385 385 # 2) add close_connection() methods
386 386 # 3) add info() and geturl() methods
387 387
388 388 # in order to add readline(), read must be modified to deal with a
389 389 # buffer. example: readline must read a buffer and then spit back
390 390 # one line at a time. The only real alternative is to read one
391 391 # BYTE at a time (ick). Once something has been read, it can't be
392 392 # put back (ok, maybe it can, but that's even uglier than this),
393 393 # so if you THEN do a normal read, you must first take stuff from
394 394 # the buffer.
395 395
396 396 # the read method wraps the original to accommodate buffering,
397 397 # although read() never adds to the buffer.
398 398 # Both readline and readlines have been stolen with almost no
399 399 # modification from socket.py
400 400
401 401 def __init__(self, sock, debuglevel=0, strict=0, method=None):
402 402 extrakw = {}
403 403 if not pycompat.ispy3:
404 404 extrakw['strict'] = True
405 405 extrakw['buffering'] = True
406 406 httplib.HTTPResponse.__init__(
407 407 self, sock, debuglevel=debuglevel, method=method, **extrakw
408 408 )
409 409 self.fileno = sock.fileno
410 410 self.code = None
411 411 self.receivedbytescount = 0
412 412 self._rbuf = b''
413 413 self._rbufsize = 8096
414 414 self._handler = None # inserted by the handler later
415 415 self._host = None # (same)
416 416 self._url = None # (same)
417 417 self._connection = None # (same)
418 418
419 419 _raw_read = httplib.HTTPResponse.read
420 420 _raw_readinto = getattr(httplib.HTTPResponse, 'readinto', None)
421 421
422 422 # Python 2.7 has a single close() which closes the socket handle.
423 423 # This method was effectively renamed to _close_conn() in Python 3. But
424 424 # there is also a close(). _close_conn() is called by methods like
425 425 # read().
426 426
427 427 def close(self):
428 428 if self.fp:
429 429 self.fp.close()
430 430 self.fp = None
431 431 if self._handler:
432 432 self._handler._request_closed(
433 433 self, self._host, self._connection
434 434 )
435 435
436 436 def _close_conn(self):
437 437 self.close()
438 438
439 439 def close_connection(self):
440 440 self._handler._remove_connection(self._host, self._connection, close=1)
441 441 self.close()
442 442
443 443 def info(self):
444 444 return self.headers
445 445
446 446 def geturl(self):
447 447 return self._url
448 448
449 449 def read(self, amt=None):
450 450 # the _rbuf test is only in this first if for speed. It's not
451 451 # logically necessary
452 452 if self._rbuf and amt is not None:
453 453 L = len(self._rbuf)
454 454 if amt > L:
455 455 amt -= L
456 456 else:
457 457 s = self._rbuf[:amt]
458 458 self._rbuf = self._rbuf[amt:]
459 459 return s
460 460 # Careful! http.client.HTTPResponse.read() on Python 3 is
461 461 # implemented using readinto(), which can duplicate self._rbuf
462 462 # if it's not empty.
463 463 s = self._rbuf
464 464 self._rbuf = b''
465 465 data = self._raw_read(amt)
466 466
467 467 self.receivedbytescount += len(data)
468 468 try:
469 469 self._connection.receivedbytescount += len(data)
470 470 except AttributeError:
471 471 pass
472 472 try:
473 473 self._handler.parent.receivedbytescount += len(data)
474 474 except AttributeError:
475 475 pass
476 476
477 477 s += data
478 478 return s
479 479
480 480 # stolen from Python SVN #68532 to fix issue1088
481 481 def _read_chunked(self, amt):
482 482 chunk_left = self.chunk_left
483 483 parts = []
484 484
485 485 while True:
486 486 if chunk_left is None:
487 487 line = self.fp.readline()
488 488 i = line.find(b';')
489 489 if i >= 0:
490 490 line = line[:i] # strip chunk-extensions
491 491 try:
492 492 chunk_left = int(line, 16)
493 493 except ValueError:
494 494 # close the connection as protocol synchronization is
495 495 # probably lost
496 496 self.close()
497 497 raise httplib.IncompleteRead(b''.join(parts))
498 498 if chunk_left == 0:
499 499 break
500 500 if amt is None:
501 501 parts.append(self._safe_read(chunk_left))
502 502 elif amt < chunk_left:
503 503 parts.append(self._safe_read(amt))
504 504 self.chunk_left = chunk_left - amt
505 505 return b''.join(parts)
506 506 elif amt == chunk_left:
507 507 parts.append(self._safe_read(amt))
508 508 self._safe_read(2) # toss the CRLF at the end of the chunk
509 509 self.chunk_left = None
510 510 return b''.join(parts)
511 511 else:
512 512 parts.append(self._safe_read(chunk_left))
513 513 amt -= chunk_left
514 514
515 515 # we read the whole chunk, get another
516 516 self._safe_read(2) # toss the CRLF at the end of the chunk
517 517 chunk_left = None
518 518
519 519 # read and discard trailer up to the CRLF terminator
520 520 ### note: we shouldn't have any trailers!
521 521 while True:
522 522 line = self.fp.readline()
523 523 if not line:
524 524 # a vanishingly small number of sites EOF without
525 525 # sending the trailer
526 526 break
527 527 if line == b'\r\n':
528 528 break
529 529
530 530 # we read everything; close the "file"
531 531 self.close()
532 532
533 533 return b''.join(parts)
534 534
535 535 def readline(self):
536 536 # Fast path for a line is already available in read buffer.
537 537 i = self._rbuf.find(b'\n')
538 538 if i >= 0:
539 539 i += 1
540 540 line = self._rbuf[:i]
541 541 self._rbuf = self._rbuf[i:]
542 542 return line
543 543
544 544 # No newline in local buffer. Read until we find one.
545 # readinto read via readinto will already return _rbuf
546 if self._raw_readinto is None:
545 547 chunks = [self._rbuf]
548 else:
549 chunks = []
546 550 i = -1
547 551 readsize = self._rbufsize
548 552 while True:
549 553 new = self._raw_read(readsize)
550 554 if not new:
551 555 break
552 556
553 557 self.receivedbytescount += len(new)
554 558 self._connection.receivedbytescount += len(new)
555 559 try:
556 560 self._handler.parent.receivedbytescount += len(new)
557 561 except AttributeError:
558 562 pass
559 563
560 564 chunks.append(new)
561 565 i = new.find(b'\n')
562 566 if i >= 0:
563 567 break
564 568
565 569 # We either have exhausted the stream or have a newline in chunks[-1].
566 570
567 571 # EOF
568 572 if i == -1:
569 573 self._rbuf = b''
570 574 return b''.join(chunks)
571 575
572 576 i += 1
573 577 self._rbuf = chunks[-1][i:]
574 578 chunks[-1] = chunks[-1][:i]
575 579 return b''.join(chunks)
576 580
577 581 def readlines(self, sizehint=0):
578 582 total = 0
579 583 list = []
580 584 while True:
581 585 line = self.readline()
582 586 if not line:
583 587 break
584 588 list.append(line)
585 589 total += len(line)
586 590 if sizehint and total >= sizehint:
587 591 break
588 592 return list
589 593
590 594 def readinto(self, dest):
591 595 if self._raw_readinto is None:
592 596 res = self.read(len(dest))
593 597 if not res:
594 598 return 0
595 599 dest[0 : len(res)] = res
596 600 return len(res)
597 601 total = len(dest)
598 602 have = len(self._rbuf)
599 603 if have >= total:
600 604 dest[0:total] = self._rbuf[:total]
601 605 self._rbuf = self._rbuf[total:]
602 606 return total
603 607 mv = memoryview(dest)
604 608 got = self._raw_readinto(mv[have:total])
605 609
606 610 self.receivedbytescount += got
607 611 self._connection.receivedbytescount += got
608 612 try:
609 613 self._handler.receivedbytescount += got
610 614 except AttributeError:
611 615 pass
612 616
613 617 dest[0:have] = self._rbuf
614 618 got += len(self._rbuf)
615 619 self._rbuf = b''
616 620 return got
617 621
618 622
619 623 def safesend(self, str):
620 624 """Send `str' to the server.
621 625
622 626 Shamelessly ripped off from httplib to patch a bad behavior.
623 627 """
624 628 # _broken_pipe_resp is an attribute we set in this function
625 629 # if the socket is closed while we're sending data but
626 630 # the server sent us a response before hanging up.
627 631 # In that case, we want to pretend to send the rest of the
628 632 # outgoing data, and then let the user use getresponse()
629 633 # (which we wrap) to get this last response before
630 634 # opening a new socket.
631 635 if getattr(self, '_broken_pipe_resp', None) is not None:
632 636 return
633 637
634 638 if self.sock is None:
635 639 if self.auto_open:
636 640 self.connect()
637 641 else:
638 642 raise httplib.NotConnected
639 643
640 644 # send the data to the server. if we get a broken pipe, then close
641 645 # the socket. we want to reconnect when somebody tries to send again.
642 646 #
643 647 # NOTE: we DO propagate the error, though, because we cannot simply
644 648 # ignore the error... the caller will know if they can retry.
645 649 if self.debuglevel > 0:
646 650 print(b"send:", repr(str))
647 651 try:
648 652 blocksize = 8192
649 653 read = getattr(str, 'read', None)
650 654 if read is not None:
651 655 if self.debuglevel > 0:
652 656 print(b"sending a read()able")
653 657 data = read(blocksize)
654 658 while data:
655 659 self.sock.sendall(data)
656 660 self.sentbytescount += len(data)
657 661 data = read(blocksize)
658 662 else:
659 663 self.sock.sendall(str)
660 664 self.sentbytescount += len(str)
661 665 except socket.error as v:
662 666 reraise = True
663 667 if v.args[0] == errno.EPIPE: # Broken pipe
664 668 if self._HTTPConnection__state == httplib._CS_REQ_SENT:
665 669 self._broken_pipe_resp = None
666 670 self._broken_pipe_resp = self.getresponse()
667 671 reraise = False
668 672 self.close()
669 673 if reraise:
670 674 raise
671 675
672 676
673 677 def wrapgetresponse(cls):
674 678 """Wraps getresponse in cls with a broken-pipe sane version.
675 679 """
676 680
677 681 def safegetresponse(self):
678 682 # In safesend() we might set the _broken_pipe_resp
679 683 # attribute, in which case the socket has already
680 684 # been closed and we just need to give them the response
681 685 # back. Otherwise, we use the normal response path.
682 686 r = getattr(self, '_broken_pipe_resp', None)
683 687 if r is not None:
684 688 return r
685 689 return cls.getresponse(self)
686 690
687 691 safegetresponse.__doc__ = cls.getresponse.__doc__
688 692 return safegetresponse
689 693
690 694
691 695 class HTTPConnection(httplib.HTTPConnection):
692 696 # url.httpsconnection inherits from this. So when adding/removing
693 697 # attributes, be sure to audit httpsconnection() for unintended
694 698 # consequences.
695 699
696 700 # use the modified response class
697 701 response_class = HTTPResponse
698 702 send = safesend
699 703 getresponse = wrapgetresponse(httplib.HTTPConnection)
700 704
701 705 def __init__(self, *args, **kwargs):
702 706 httplib.HTTPConnection.__init__(self, *args, **kwargs)
703 707 self.sentbytescount = 0
704 708 self.receivedbytescount = 0
705 709
706 710
707 711 #########################################################################
708 712 ##### TEST FUNCTIONS
709 713 #########################################################################
710 714
711 715
712 716 def continuity(url):
713 717 md5 = hashlib.md5
714 718 format = b'%25s: %s'
715 719
716 720 # first fetch the file with the normal http handler
717 721 opener = urlreq.buildopener()
718 722 urlreq.installopener(opener)
719 723 fo = urlreq.urlopen(url)
720 724 foo = fo.read()
721 725 fo.close()
722 726 m = md5(foo)
723 727 print(format % (b'normal urllib', node.hex(m.digest())))
724 728
725 729 # now install the keepalive handler and try again
726 730 opener = urlreq.buildopener(HTTPHandler())
727 731 urlreq.installopener(opener)
728 732
729 733 fo = urlreq.urlopen(url)
730 734 foo = fo.read()
731 735 fo.close()
732 736 m = md5(foo)
733 737 print(format % (b'keepalive read', node.hex(m.digest())))
734 738
735 739 fo = urlreq.urlopen(url)
736 740 foo = b''
737 741 while True:
738 742 f = fo.readline()
739 743 if f:
740 744 foo = foo + f
741 745 else:
742 746 break
743 747 fo.close()
744 748 m = md5(foo)
745 749 print(format % (b'keepalive readline', node.hex(m.digest())))
746 750
747 751
748 752 def comp(N, url):
749 753 print(b' making %i connections to:\n %s' % (N, url))
750 754
751 755 procutil.stdout.write(b' first using the normal urllib handlers')
752 756 # first use normal opener
753 757 opener = urlreq.buildopener()
754 758 urlreq.installopener(opener)
755 759 t1 = fetch(N, url)
756 760 print(b' TIME: %.3f s' % t1)
757 761
758 762 procutil.stdout.write(b' now using the keepalive handler ')
759 763 # now install the keepalive handler and try again
760 764 opener = urlreq.buildopener(HTTPHandler())
761 765 urlreq.installopener(opener)
762 766 t2 = fetch(N, url)
763 767 print(b' TIME: %.3f s' % t2)
764 768 print(b' improvement factor: %.2f' % (t1 / t2))
765 769
766 770
767 771 def fetch(N, url, delay=0):
768 772 import time
769 773
770 774 lens = []
771 775 starttime = time.time()
772 776 for i in range(N):
773 777 if delay and i > 0:
774 778 time.sleep(delay)
775 779 fo = urlreq.urlopen(url)
776 780 foo = fo.read()
777 781 fo.close()
778 782 lens.append(len(foo))
779 783 diff = time.time() - starttime
780 784
781 785 j = 0
782 786 for i in lens[1:]:
783 787 j = j + 1
784 788 if not i == lens[0]:
785 789 print(b"WARNING: inconsistent length on read %i: %i" % (j, i))
786 790
787 791 return diff
788 792
789 793
790 794 def test_timeout(url):
791 795 global DEBUG
792 796 dbbackup = DEBUG
793 797
794 798 class FakeLogger(object):
795 799 def debug(self, msg, *args):
796 800 print(msg % args)
797 801
798 802 info = warning = error = debug
799 803
800 804 DEBUG = FakeLogger()
801 805 print(b" fetching the file to establish a connection")
802 806 fo = urlreq.urlopen(url)
803 807 data1 = fo.read()
804 808 fo.close()
805 809
806 810 i = 20
807 811 print(b" waiting %i seconds for the server to close the connection" % i)
808 812 while i > 0:
809 813 procutil.stdout.write(b'\r %2i' % i)
810 814 procutil.stdout.flush()
811 815 time.sleep(1)
812 816 i -= 1
813 817 procutil.stderr.write(b'\r')
814 818
815 819 print(b" fetching the file a second time")
816 820 fo = urlreq.urlopen(url)
817 821 data2 = fo.read()
818 822 fo.close()
819 823
820 824 if data1 == data2:
821 825 print(b' data are identical')
822 826 else:
823 827 print(b' ERROR: DATA DIFFER')
824 828
825 829 DEBUG = dbbackup
826 830
827 831
828 832 def test(url, N=10):
829 833 print(b"performing continuity test (making sure stuff isn't corrupted)")
830 834 continuity(url)
831 835 print(b'')
832 836 print(b"performing speed comparison")
833 837 comp(N, url)
834 838 print(b'')
835 839 print(b"performing dropped-connection check")
836 840 test_timeout(url)
837 841
838 842
839 843 if __name__ == '__main__':
840 844 import time
841 845
842 846 try:
843 847 N = int(sys.argv[1])
844 848 url = sys.argv[2]
845 849 except (IndexError, ValueError):
846 850 print(b"%s <integer> <url>" % sys.argv[0])
847 851 else:
848 852 test(url, N)
General Comments 0
You need to be logged in to leave comments. Login now