##// END OF EJS Templates
keepalive: track ready state with a bool...
Gregory Szorc -
r41454:1db94ebb default
parent child Browse files
Show More
@@ -1,808 +1,808 b''
1 1 # This library is free software; you can redistribute it and/or
2 2 # modify it under the terms of the GNU Lesser General Public
3 3 # License as published by the Free Software Foundation; either
4 4 # version 2.1 of the License, or (at your option) any later version.
5 5 #
6 6 # This library is distributed in the hope that it will be useful,
7 7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9 9 # Lesser General Public License for more details.
10 10 #
11 11 # You should have received a copy of the GNU Lesser General Public
12 12 # License along with this library; if not, see
13 13 # <http://www.gnu.org/licenses/>.
14 14
15 15 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
16 16 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
17 17
18 18 # Modified by Benoit Boissinot:
19 19 # - fix for digest auth (inspired from urllib2.py @ Python v2.4)
20 20 # Modified by Dirkjan Ochtman:
21 21 # - import md5 function from a local util module
22 22 # Modified by Augie Fackler:
23 23 # - add safesend method and use it to prevent broken pipe errors
24 24 # on large POST requests
25 25
26 26 """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
27 27
28 28 >>> import urllib2
29 29 >>> from keepalive import HTTPHandler
30 30 >>> keepalive_handler = HTTPHandler()
31 31 >>> opener = urlreq.buildopener(keepalive_handler)
32 32 >>> urlreq.installopener(opener)
33 33 >>>
34 34 >>> fo = urlreq.urlopen('http://www.python.org')
35 35
36 36 If a connection to a given host is requested, and all of the existing
37 37 connections are still in use, another connection will be opened. If
38 38 the handler tries to use an existing connection but it fails in some
39 39 way, it will be closed and removed from the pool.
40 40
41 41 To remove the handler, simply re-run build_opener with no arguments, and
42 42 install that opener.
43 43
44 44 You can explicitly close connections by using the close_connection()
45 45 method of the returned file-like object (described below) or you can
46 46 use the handler methods:
47 47
48 48 close_connection(host)
49 49 close_all()
50 50 open_connections()
51 51
52 52 NOTE: using the close_connection and close_all methods of the handler
53 53 should be done with care when using multiple threads.
54 54 * there is nothing that prevents another thread from creating new
55 55 connections immediately after connections are closed
56 56 * no checks are done to prevent in-use connections from being closed
57 57
58 58 >>> keepalive_handler.close_all()
59 59
60 60 EXTRA ATTRIBUTES AND METHODS
61 61
62 62 Upon a status of 200, the object returned has a few additional
63 63 attributes and methods, which should not be used if you want to
64 64 remain consistent with the normal urllib2-returned objects:
65 65
66 66 close_connection() - close the connection to the host
67 67 readlines() - you know, readlines()
68 68 status - the return status (i.e. 404)
69 69 reason - english translation of status (i.e. 'File not found')
70 70
71 71 If you want the best of both worlds, use this inside an
72 72 AttributeError-catching try:
73 73
74 74 >>> try: status = fo.status
75 75 >>> except AttributeError: status = None
76 76
77 77 Unfortunately, these are ONLY there if status == 200, so it's not
78 78 easy to distinguish between non-200 responses. The reason is that
79 79 urllib2 tries to do clever things with error codes 301, 302, 401,
80 80 and 407, and it wraps the object upon return.
81 81 """
82 82
83 83 # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $
84 84
85 85 from __future__ import absolute_import, print_function
86 86
87 87 import collections
88 88 import errno
89 89 import hashlib
90 90 import socket
91 91 import sys
92 92 import threading
93 93
94 94 from .i18n import _
95 95 from . import (
96 96 node,
97 97 pycompat,
98 98 urllibcompat,
99 99 util,
100 100 )
101 101 from .utils import (
102 102 procutil,
103 103 )
104 104
105 105 httplib = util.httplib
106 106 urlerr = util.urlerr
107 107 urlreq = util.urlreq
108 108
109 109 DEBUG = None
110 110
111 111 class ConnectionManager(object):
112 112 """
113 113 The connection manager must be able to:
114 114 * keep track of all existing
115 115 """
116 116 def __init__(self):
117 117 self._lock = threading.Lock()
118 118 self._hostmap = collections.defaultdict(list) # host -> [connection]
119 119 self._connmap = {} # map connections to host
120 120 self._readymap = {} # map connection to ready state
121 121
122 122 def add(self, host, connection, ready):
123 123 self._lock.acquire()
124 124 try:
125 125 self._hostmap[host].append(connection)
126 126 self._connmap[connection] = host
127 127 self._readymap[connection] = ready
128 128 finally:
129 129 self._lock.release()
130 130
131 131 def remove(self, connection):
132 132 self._lock.acquire()
133 133 try:
134 134 try:
135 135 host = self._connmap[connection]
136 136 except KeyError:
137 137 pass
138 138 else:
139 139 del self._connmap[connection]
140 140 del self._readymap[connection]
141 141 self._hostmap[host].remove(connection)
142 142 if not self._hostmap[host]:
143 143 del self._hostmap[host]
144 144 finally:
145 145 self._lock.release()
146 146
147 147 def set_ready(self, connection, ready):
148 148 try:
149 149 self._readymap[connection] = ready
150 150 except KeyError:
151 151 pass
152 152
153 153 def get_ready_conn(self, host):
154 154 conn = None
155 155 self._lock.acquire()
156 156 try:
157 157 for c in self._hostmap[host]:
158 158 if self._readymap[c]:
159 self._readymap[c] = 0
159 self._readymap[c] = False
160 160 conn = c
161 161 break
162 162 finally:
163 163 self._lock.release()
164 164 return conn
165 165
166 166 def get_all(self, host=None):
167 167 if host:
168 168 return list(self._hostmap[host])
169 169 else:
170 170 return dict(self._hostmap)
171 171
172 172 class KeepAliveHandler(object):
173 173 def __init__(self, timeout=None):
174 174 self._cm = ConnectionManager()
175 175 self._timeout = timeout
176 176 self.requestscount = 0
177 177 self.sentbytescount = 0
178 178
179 179 #### Connection Management
180 180 def open_connections(self):
181 181 """return a list of connected hosts and the number of connections
182 182 to each. [('foo.com:80', 2), ('bar.org', 1)]"""
183 183 return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
184 184
185 185 def close_connection(self, host):
186 186 """close connection(s) to <host>
187 187 host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
188 188 no error occurs if there is no connection to that host."""
189 189 for h in self._cm.get_all(host):
190 190 self._cm.remove(h)
191 191 h.close()
192 192
193 193 def close_all(self):
194 194 """close all open connections"""
195 195 for host, conns in self._cm.get_all().iteritems():
196 196 for h in conns:
197 197 self._cm.remove(h)
198 198 h.close()
199 199
200 200 def _request_closed(self, request, host, connection):
201 201 """tells us that this request is now closed and that the
202 202 connection is ready for another request"""
203 self._cm.set_ready(connection, 1)
203 self._cm.set_ready(connection, True)
204 204
205 205 def _remove_connection(self, host, connection, close=0):
206 206 if close:
207 207 connection.close()
208 208 self._cm.remove(connection)
209 209
210 210 #### Transaction Execution
211 211 def http_open(self, req):
212 212 return self.do_open(HTTPConnection, req)
213 213
214 214 def do_open(self, http_class, req):
215 215 host = urllibcompat.gethost(req)
216 216 if not host:
217 217 raise urlerr.urlerror('no host given')
218 218
219 219 try:
220 220 h = self._cm.get_ready_conn(host)
221 221 while h:
222 222 r = self._reuse_connection(h, req, host)
223 223
224 224 # if this response is non-None, then it worked and we're
225 225 # done. Break out, skipping the else block.
226 226 if r:
227 227 break
228 228
229 229 # connection is bad - possibly closed by server
230 230 # discard it and ask for the next free connection
231 231 h.close()
232 232 self._cm.remove(h)
233 233 h = self._cm.get_ready_conn(host)
234 234 else:
235 235 # no (working) free connections were found. Create a new one.
236 236 h = http_class(host, timeout=self._timeout)
237 237 if DEBUG:
238 238 DEBUG.info("creating new connection to %s (%d)",
239 239 host, id(h))
240 self._cm.add(host, h, 0)
240 self._cm.add(host, h, False)
241 241 self._start_transaction(h, req)
242 242 r = h.getresponse()
243 243 # The string form of BadStatusLine is the status line. Add some context
244 244 # to make the error message slightly more useful.
245 245 except httplib.BadStatusLine as err:
246 246 raise urlerr.urlerror(
247 247 _('bad HTTP status line: %s') % pycompat.sysbytes(err.line))
248 248 except (socket.error, httplib.HTTPException) as err:
249 249 raise urlerr.urlerror(err)
250 250
251 251 # If not a persistent connection, don't try to reuse it. Look
252 252 # for this using getattr() since vcr doesn't define this
253 253 # attribute, and in that case always close the connection.
254 254 if getattr(r, r'will_close', True):
255 255 self._cm.remove(h)
256 256
257 257 if DEBUG:
258 258 DEBUG.info("STATUS: %s, %s", r.status, r.reason)
259 259 r._handler = self
260 260 r._host = host
261 261 r._url = req.get_full_url()
262 262 r._connection = h
263 263 r.code = r.status
264 264 r.headers = r.msg
265 265 r.msg = r.reason
266 266
267 267 return r
268 268
269 269 def _reuse_connection(self, h, req, host):
270 270 """start the transaction with a re-used connection
271 271 return a response object (r) upon success or None on failure.
272 272 This DOES not close or remove bad connections in cases where
273 273 it returns. However, if an unexpected exception occurs, it
274 274 will close and remove the connection before re-raising.
275 275 """
276 276 try:
277 277 self._start_transaction(h, req)
278 278 r = h.getresponse()
279 279 # note: just because we got something back doesn't mean it
280 280 # worked. We'll check the version below, too.
281 281 except (socket.error, httplib.HTTPException):
282 282 r = None
283 283 except: # re-raises
284 284 # adding this block just in case we've missed
285 285 # something we will still raise the exception, but
286 286 # lets try and close the connection and remove it
287 287 # first. We previously got into a nasty loop
288 288 # where an exception was uncaught, and so the
289 289 # connection stayed open. On the next try, the
290 290 # same exception was raised, etc. The trade-off is
291 291 # that it's now possible this call will raise
292 292 # a DIFFERENT exception
293 293 if DEBUG:
294 294 DEBUG.error("unexpected exception - closing "
295 295 "connection to %s (%d)", host, id(h))
296 296 self._cm.remove(h)
297 297 h.close()
298 298 raise
299 299
300 300 if r is None or r.version == 9:
301 301 # httplib falls back to assuming HTTP 0.9 if it gets a
302 302 # bad header back. This is most likely to happen if
303 303 # the socket has been closed by the server since we
304 304 # last used the connection.
305 305 if DEBUG:
306 306 DEBUG.info("failed to re-use connection to %s (%d)",
307 307 host, id(h))
308 308 r = None
309 309 else:
310 310 if DEBUG:
311 311 DEBUG.info("re-using connection to %s (%d)", host, id(h))
312 312
313 313 return r
314 314
315 315 def _start_transaction(self, h, req):
316 316 oldbytescount = getattr(h, 'sentbytescount', 0)
317 317
318 318 # What follows mostly reimplements HTTPConnection.request()
319 319 # except it adds self.parent.addheaders in the mix and sends headers
320 320 # in a deterministic order (to make testing easier).
321 321 headers = util.sortdict(self.parent.addheaders)
322 322 headers.update(sorted(req.headers.items()))
323 323 headers.update(sorted(req.unredirected_hdrs.items()))
324 324 headers = util.sortdict((n.lower(), v) for n, v in headers.items())
325 325 skipheaders = {}
326 326 for n in (r'host', r'accept-encoding'):
327 327 if n in headers:
328 328 skipheaders[r'skip_' + n.replace(r'-', r'_')] = 1
329 329 try:
330 330 if urllibcompat.hasdata(req):
331 331 data = urllibcompat.getdata(req)
332 332 h.putrequest(
333 333 req.get_method(), urllibcompat.getselector(req),
334 334 **skipheaders)
335 335 if r'content-type' not in headers:
336 336 h.putheader(r'Content-type',
337 337 r'application/x-www-form-urlencoded')
338 338 if r'content-length' not in headers:
339 339 h.putheader(r'Content-length', r'%d' % len(data))
340 340 else:
341 341 h.putrequest(
342 342 req.get_method(), urllibcompat.getselector(req),
343 343 **skipheaders)
344 344 except socket.error as err:
345 345 raise urlerr.urlerror(err)
346 346 for k, v in headers.items():
347 347 h.putheader(k, v)
348 348 h.endheaders()
349 349 if urllibcompat.hasdata(req):
350 350 h.send(data)
351 351
352 352 # This will fail to record events in case of I/O failure. That's OK.
353 353 self.requestscount += 1
354 354 self.sentbytescount += getattr(h, 'sentbytescount', 0) - oldbytescount
355 355
356 356 try:
357 357 self.parent.requestscount += 1
358 358 self.parent.sentbytescount += (
359 359 getattr(h, 'sentbytescount', 0) - oldbytescount)
360 360 except AttributeError:
361 361 pass
362 362
363 363 class HTTPHandler(KeepAliveHandler, urlreq.httphandler):
364 364 pass
365 365
366 366 class HTTPResponse(httplib.HTTPResponse):
367 367 # we need to subclass HTTPResponse in order to
368 368 # 1) add readline(), readlines(), and readinto() methods
369 369 # 2) add close_connection() methods
370 370 # 3) add info() and geturl() methods
371 371
372 372 # in order to add readline(), read must be modified to deal with a
373 373 # buffer. example: readline must read a buffer and then spit back
374 374 # one line at a time. The only real alternative is to read one
375 375 # BYTE at a time (ick). Once something has been read, it can't be
376 376 # put back (ok, maybe it can, but that's even uglier than this),
377 377 # so if you THEN do a normal read, you must first take stuff from
378 378 # the buffer.
379 379
380 380 # the read method wraps the original to accommodate buffering,
381 381 # although read() never adds to the buffer.
382 382 # Both readline and readlines have been stolen with almost no
383 383 # modification from socket.py
384 384
385 385
386 386 def __init__(self, sock, debuglevel=0, strict=0, method=None):
387 387 extrakw = {}
388 388 if not pycompat.ispy3:
389 389 extrakw[r'strict'] = True
390 390 extrakw[r'buffering'] = True
391 391 httplib.HTTPResponse.__init__(self, sock, debuglevel=debuglevel,
392 392 method=method, **extrakw)
393 393 self.fileno = sock.fileno
394 394 self.code = None
395 395 self.receivedbytescount = 0
396 396 self._rbuf = ''
397 397 self._rbufsize = 8096
398 398 self._handler = None # inserted by the handler later
399 399 self._host = None # (same)
400 400 self._url = None # (same)
401 401 self._connection = None # (same)
402 402
403 403 _raw_read = httplib.HTTPResponse.read
404 404 _raw_readinto = getattr(httplib.HTTPResponse, 'readinto', None)
405 405
406 406 def close(self):
407 407 if self.fp:
408 408 self.fp.close()
409 409 self.fp = None
410 410 if self._handler:
411 411 self._handler._request_closed(self, self._host,
412 412 self._connection)
413 413
414 414 def close_connection(self):
415 415 self._handler._remove_connection(self._host, self._connection, close=1)
416 416 self.close()
417 417
418 418 def info(self):
419 419 return self.headers
420 420
421 421 def geturl(self):
422 422 return self._url
423 423
424 424 def read(self, amt=None):
425 425 # the _rbuf test is only in this first if for speed. It's not
426 426 # logically necessary
427 427 if self._rbuf and amt is not None:
428 428 L = len(self._rbuf)
429 429 if amt > L:
430 430 amt -= L
431 431 else:
432 432 s = self._rbuf[:amt]
433 433 self._rbuf = self._rbuf[amt:]
434 434 return s
435 435 # Careful! http.client.HTTPResponse.read() on Python 3 is
436 436 # implemented using readinto(), which can duplicate self._rbuf
437 437 # if it's not empty.
438 438 s = self._rbuf
439 439 self._rbuf = ''
440 440 data = self._raw_read(amt)
441 441
442 442 self.receivedbytescount += len(data)
443 443 try:
444 444 self._connection.receivedbytescount += len(data)
445 445 except AttributeError:
446 446 pass
447 447 try:
448 448 self._handler.parent.receivedbytescount += len(data)
449 449 except AttributeError:
450 450 pass
451 451
452 452 s += data
453 453 return s
454 454
455 455 # stolen from Python SVN #68532 to fix issue1088
456 456 def _read_chunked(self, amt):
457 457 chunk_left = self.chunk_left
458 458 parts = []
459 459
460 460 while True:
461 461 if chunk_left is None:
462 462 line = self.fp.readline()
463 463 i = line.find(';')
464 464 if i >= 0:
465 465 line = line[:i] # strip chunk-extensions
466 466 try:
467 467 chunk_left = int(line, 16)
468 468 except ValueError:
469 469 # close the connection as protocol synchronization is
470 470 # probably lost
471 471 self.close()
472 472 raise httplib.IncompleteRead(''.join(parts))
473 473 if chunk_left == 0:
474 474 break
475 475 if amt is None:
476 476 parts.append(self._safe_read(chunk_left))
477 477 elif amt < chunk_left:
478 478 parts.append(self._safe_read(amt))
479 479 self.chunk_left = chunk_left - amt
480 480 return ''.join(parts)
481 481 elif amt == chunk_left:
482 482 parts.append(self._safe_read(amt))
483 483 self._safe_read(2) # toss the CRLF at the end of the chunk
484 484 self.chunk_left = None
485 485 return ''.join(parts)
486 486 else:
487 487 parts.append(self._safe_read(chunk_left))
488 488 amt -= chunk_left
489 489
490 490 # we read the whole chunk, get another
491 491 self._safe_read(2) # toss the CRLF at the end of the chunk
492 492 chunk_left = None
493 493
494 494 # read and discard trailer up to the CRLF terminator
495 495 ### note: we shouldn't have any trailers!
496 496 while True:
497 497 line = self.fp.readline()
498 498 if not line:
499 499 # a vanishingly small number of sites EOF without
500 500 # sending the trailer
501 501 break
502 502 if line == '\r\n':
503 503 break
504 504
505 505 # we read everything; close the "file"
506 506 self.close()
507 507
508 508 return ''.join(parts)
509 509
510 510 def readline(self):
511 511 # Fast path for a line is already available in read buffer.
512 512 i = self._rbuf.find('\n')
513 513 if i >= 0:
514 514 i += 1
515 515 line = self._rbuf[:i]
516 516 self._rbuf = self._rbuf[i:]
517 517 return line
518 518
519 519 # No newline in local buffer. Read until we find one.
520 520 chunks = [self._rbuf]
521 521 i = -1
522 522 readsize = self._rbufsize
523 523 while True:
524 524 new = self._raw_read(readsize)
525 525 if not new:
526 526 break
527 527
528 528 self.receivedbytescount += len(new)
529 529 self._connection.receivedbytescount += len(new)
530 530 try:
531 531 self._handler.parent.receivedbytescount += len(new)
532 532 except AttributeError:
533 533 pass
534 534
535 535 chunks.append(new)
536 536 i = new.find('\n')
537 537 if i >= 0:
538 538 break
539 539
540 540 # We either have exhausted the stream or have a newline in chunks[-1].
541 541
542 542 # EOF
543 543 if i == -1:
544 544 self._rbuf = ''
545 545 return ''.join(chunks)
546 546
547 547 i += 1
548 548 self._rbuf = chunks[-1][i:]
549 549 chunks[-1] = chunks[-1][:i]
550 550 return ''.join(chunks)
551 551
552 552 def readlines(self, sizehint=0):
553 553 total = 0
554 554 list = []
555 555 while True:
556 556 line = self.readline()
557 557 if not line:
558 558 break
559 559 list.append(line)
560 560 total += len(line)
561 561 if sizehint and total >= sizehint:
562 562 break
563 563 return list
564 564
565 565 def readinto(self, dest):
566 566 if self._raw_readinto is None:
567 567 res = self.read(len(dest))
568 568 if not res:
569 569 return 0
570 570 dest[0:len(res)] = res
571 571 return len(res)
572 572 total = len(dest)
573 573 have = len(self._rbuf)
574 574 if have >= total:
575 575 dest[0:total] = self._rbuf[:total]
576 576 self._rbuf = self._rbuf[total:]
577 577 return total
578 578 mv = memoryview(dest)
579 579 got = self._raw_readinto(mv[have:total])
580 580
581 581 self.receivedbytescount += got
582 582 self._connection.receivedbytescount += got
583 583 try:
584 584 self._handler.receivedbytescount += got
585 585 except AttributeError:
586 586 pass
587 587
588 588 dest[0:have] = self._rbuf
589 589 got += len(self._rbuf)
590 590 self._rbuf = ''
591 591 return got
592 592
593 593 def safesend(self, str):
594 594 """Send `str' to the server.
595 595
596 596 Shamelessly ripped off from httplib to patch a bad behavior.
597 597 """
598 598 # _broken_pipe_resp is an attribute we set in this function
599 599 # if the socket is closed while we're sending data but
600 600 # the server sent us a response before hanging up.
601 601 # In that case, we want to pretend to send the rest of the
602 602 # outgoing data, and then let the user use getresponse()
603 603 # (which we wrap) to get this last response before
604 604 # opening a new socket.
605 605 if getattr(self, '_broken_pipe_resp', None) is not None:
606 606 return
607 607
608 608 if self.sock is None:
609 609 if self.auto_open:
610 610 self.connect()
611 611 else:
612 612 raise httplib.NotConnected
613 613
614 614 # send the data to the server. if we get a broken pipe, then close
615 615 # the socket. we want to reconnect when somebody tries to send again.
616 616 #
617 617 # NOTE: we DO propagate the error, though, because we cannot simply
618 618 # ignore the error... the caller will know if they can retry.
619 619 if self.debuglevel > 0:
620 620 print("send:", repr(str))
621 621 try:
622 622 blocksize = 8192
623 623 read = getattr(str, 'read', None)
624 624 if read is not None:
625 625 if self.debuglevel > 0:
626 626 print("sending a read()able")
627 627 data = read(blocksize)
628 628 while data:
629 629 self.sock.sendall(data)
630 630 self.sentbytescount += len(data)
631 631 data = read(blocksize)
632 632 else:
633 633 self.sock.sendall(str)
634 634 self.sentbytescount += len(str)
635 635 except socket.error as v:
636 636 reraise = True
637 637 if v.args[0] == errno.EPIPE: # Broken pipe
638 638 if self._HTTPConnection__state == httplib._CS_REQ_SENT:
639 639 self._broken_pipe_resp = None
640 640 self._broken_pipe_resp = self.getresponse()
641 641 reraise = False
642 642 self.close()
643 643 if reraise:
644 644 raise
645 645
646 646 def wrapgetresponse(cls):
647 647 """Wraps getresponse in cls with a broken-pipe sane version.
648 648 """
649 649 def safegetresponse(self):
650 650 # In safesend() we might set the _broken_pipe_resp
651 651 # attribute, in which case the socket has already
652 652 # been closed and we just need to give them the response
653 653 # back. Otherwise, we use the normal response path.
654 654 r = getattr(self, '_broken_pipe_resp', None)
655 655 if r is not None:
656 656 return r
657 657 return cls.getresponse(self)
658 658 safegetresponse.__doc__ = cls.getresponse.__doc__
659 659 return safegetresponse
660 660
661 661 class HTTPConnection(httplib.HTTPConnection):
662 662 # url.httpsconnection inherits from this. So when adding/removing
663 663 # attributes, be sure to audit httpsconnection() for unintended
664 664 # consequences.
665 665
666 666 # use the modified response class
667 667 response_class = HTTPResponse
668 668 send = safesend
669 669 getresponse = wrapgetresponse(httplib.HTTPConnection)
670 670
671 671 def __init__(self, *args, **kwargs):
672 672 httplib.HTTPConnection.__init__(self, *args, **kwargs)
673 673 self.sentbytescount = 0
674 674 self.receivedbytescount = 0
675 675
676 676 #########################################################################
677 677 ##### TEST FUNCTIONS
678 678 #########################################################################
679 679
680 680
681 681 def continuity(url):
682 682 md5 = hashlib.md5
683 683 format = '%25s: %s'
684 684
685 685 # first fetch the file with the normal http handler
686 686 opener = urlreq.buildopener()
687 687 urlreq.installopener(opener)
688 688 fo = urlreq.urlopen(url)
689 689 foo = fo.read()
690 690 fo.close()
691 691 m = md5(foo)
692 692 print(format % ('normal urllib', node.hex(m.digest())))
693 693
694 694 # now install the keepalive handler and try again
695 695 opener = urlreq.buildopener(HTTPHandler())
696 696 urlreq.installopener(opener)
697 697
698 698 fo = urlreq.urlopen(url)
699 699 foo = fo.read()
700 700 fo.close()
701 701 m = md5(foo)
702 702 print(format % ('keepalive read', node.hex(m.digest())))
703 703
704 704 fo = urlreq.urlopen(url)
705 705 foo = ''
706 706 while True:
707 707 f = fo.readline()
708 708 if f:
709 709 foo = foo + f
710 710 else:
711 711 break
712 712 fo.close()
713 713 m = md5(foo)
714 714 print(format % ('keepalive readline', node.hex(m.digest())))
715 715
716 716 def comp(N, url):
717 717 print(' making %i connections to:\n %s' % (N, url))
718 718
719 719 procutil.stdout.write(' first using the normal urllib handlers')
720 720 # first use normal opener
721 721 opener = urlreq.buildopener()
722 722 urlreq.installopener(opener)
723 723 t1 = fetch(N, url)
724 724 print(' TIME: %.3f s' % t1)
725 725
726 726 procutil.stdout.write(' now using the keepalive handler ')
727 727 # now install the keepalive handler and try again
728 728 opener = urlreq.buildopener(HTTPHandler())
729 729 urlreq.installopener(opener)
730 730 t2 = fetch(N, url)
731 731 print(' TIME: %.3f s' % t2)
732 732 print(' improvement factor: %.2f' % (t1 / t2))
733 733
734 734 def fetch(N, url, delay=0):
735 735 import time
736 736 lens = []
737 737 starttime = time.time()
738 738 for i in range(N):
739 739 if delay and i > 0:
740 740 time.sleep(delay)
741 741 fo = urlreq.urlopen(url)
742 742 foo = fo.read()
743 743 fo.close()
744 744 lens.append(len(foo))
745 745 diff = time.time() - starttime
746 746
747 747 j = 0
748 748 for i in lens[1:]:
749 749 j = j + 1
750 750 if not i == lens[0]:
751 751 print("WARNING: inconsistent length on read %i: %i" % (j, i))
752 752
753 753 return diff
754 754
755 755 def test_timeout(url):
756 756 global DEBUG
757 757 dbbackup = DEBUG
758 758 class FakeLogger(object):
759 759 def debug(self, msg, *args):
760 760 print(msg % args)
761 761 info = warning = error = debug
762 762 DEBUG = FakeLogger()
763 763 print(" fetching the file to establish a connection")
764 764 fo = urlreq.urlopen(url)
765 765 data1 = fo.read()
766 766 fo.close()
767 767
768 768 i = 20
769 769 print(" waiting %i seconds for the server to close the connection" % i)
770 770 while i > 0:
771 771 procutil.stdout.write('\r %2i' % i)
772 772 procutil.stdout.flush()
773 773 time.sleep(1)
774 774 i -= 1
775 775 procutil.stderr.write('\r')
776 776
777 777 print(" fetching the file a second time")
778 778 fo = urlreq.urlopen(url)
779 779 data2 = fo.read()
780 780 fo.close()
781 781
782 782 if data1 == data2:
783 783 print(' data are identical')
784 784 else:
785 785 print(' ERROR: DATA DIFFER')
786 786
787 787 DEBUG = dbbackup
788 788
789 789
790 790 def test(url, N=10):
791 791 print("performing continuity test (making sure stuff isn't corrupted)")
792 792 continuity(url)
793 793 print('')
794 794 print("performing speed comparison")
795 795 comp(N, url)
796 796 print('')
797 797 print("performing dropped-connection check")
798 798 test_timeout(url)
799 799
800 800 if __name__ == '__main__':
801 801 import time
802 802 try:
803 803 N = int(sys.argv[1])
804 804 url = sys.argv[2]
805 805 except (IndexError, ValueError):
806 806 print("%s <integer> <url>" % sys.argv[0])
807 807 else:
808 808 test(url, N)
General Comments 0
You need to be logged in to leave comments. Login now