##// END OF EJS Templates
keepalive: track request count and bytes sent...
Gregory Szorc -
r40068:dc82ad1b default
parent child Browse files
Show More
@@ -1,760 +1,779 b''
1 1 # This library is free software; you can redistribute it and/or
2 2 # modify it under the terms of the GNU Lesser General Public
3 3 # License as published by the Free Software Foundation; either
4 4 # version 2.1 of the License, or (at your option) any later version.
5 5 #
6 6 # This library is distributed in the hope that it will be useful,
7 7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9 9 # Lesser General Public License for more details.
10 10 #
11 11 # You should have received a copy of the GNU Lesser General Public
12 12 # License along with this library; if not, see
13 13 # <http://www.gnu.org/licenses/>.
14 14
15 15 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
16 16 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
17 17
18 18 # Modified by Benoit Boissinot:
19 19 # - fix for digest auth (inspired from urllib2.py @ Python v2.4)
20 20 # Modified by Dirkjan Ochtman:
21 21 # - import md5 function from a local util module
22 22 # Modified by Augie Fackler:
23 23 # - add safesend method and use it to prevent broken pipe errors
24 24 # on large POST requests
25 25
26 26 """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
27 27
28 28 >>> import urllib2
29 29 >>> from keepalive import HTTPHandler
30 30 >>> keepalive_handler = HTTPHandler()
31 31 >>> opener = urlreq.buildopener(keepalive_handler)
32 32 >>> urlreq.installopener(opener)
33 33 >>>
34 34 >>> fo = urlreq.urlopen('http://www.python.org')
35 35
36 36 If a connection to a given host is requested, and all of the existing
37 37 connections are still in use, another connection will be opened. If
38 38 the handler tries to use an existing connection but it fails in some
39 39 way, it will be closed and removed from the pool.
40 40
41 41 To remove the handler, simply re-run build_opener with no arguments, and
42 42 install that opener.
43 43
44 44 You can explicitly close connections by using the close_connection()
45 45 method of the returned file-like object (described below) or you can
46 46 use the handler methods:
47 47
48 48 close_connection(host)
49 49 close_all()
50 50 open_connections()
51 51
52 52 NOTE: using the close_connection and close_all methods of the handler
53 53 should be done with care when using multiple threads.
54 54 * there is nothing that prevents another thread from creating new
55 55 connections immediately after connections are closed
56 56 * no checks are done to prevent in-use connections from being closed
57 57
58 58 >>> keepalive_handler.close_all()
59 59
60 60 EXTRA ATTRIBUTES AND METHODS
61 61
62 62 Upon a status of 200, the object returned has a few additional
63 63 attributes and methods, which should not be used if you want to
64 64 remain consistent with the normal urllib2-returned objects:
65 65
66 66 close_connection() - close the connection to the host
67 67 readlines() - you know, readlines()
68 68 status - the return status (i.e. 404)
69 69 reason - english translation of status (i.e. 'File not found')
70 70
71 71 If you want the best of both worlds, use this inside an
72 72 AttributeError-catching try:
73 73
74 74 >>> try: status = fo.status
75 75 >>> except AttributeError: status = None
76 76
77 77 Unfortunately, these are ONLY there if status == 200, so it's not
78 78 easy to distinguish between non-200 responses. The reason is that
79 79 urllib2 tries to do clever things with error codes 301, 302, 401,
80 80 and 407, and it wraps the object upon return.
81 81 """
82 82
83 83 # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $
84 84
85 85 from __future__ import absolute_import, print_function
86 86
87 87 import errno
88 88 import hashlib
89 89 import socket
90 90 import sys
91 91 import threading
92 92
93 93 from .i18n import _
94 94 from . import (
95 95 node,
96 96 pycompat,
97 97 urllibcompat,
98 98 util,
99 99 )
100 100 from .utils import (
101 101 procutil,
102 102 )
103 103
104 104 httplib = util.httplib
105 105 urlerr = util.urlerr
106 106 urlreq = util.urlreq
107 107
108 108 DEBUG = None
109 109
110 110 class ConnectionManager(object):
111 111 """
112 112 The connection manager must be able to:
113 113 * keep track of all existing
114 114 """
115 115 def __init__(self):
116 116 self._lock = threading.Lock()
117 117 self._hostmap = {} # map hosts to a list of connections
118 118 self._connmap = {} # map connections to host
119 119 self._readymap = {} # map connection to ready state
120 120
121 121 def add(self, host, connection, ready):
122 122 self._lock.acquire()
123 123 try:
124 124 if host not in self._hostmap:
125 125 self._hostmap[host] = []
126 126 self._hostmap[host].append(connection)
127 127 self._connmap[connection] = host
128 128 self._readymap[connection] = ready
129 129 finally:
130 130 self._lock.release()
131 131
132 132 def remove(self, connection):
133 133 self._lock.acquire()
134 134 try:
135 135 try:
136 136 host = self._connmap[connection]
137 137 except KeyError:
138 138 pass
139 139 else:
140 140 del self._connmap[connection]
141 141 del self._readymap[connection]
142 142 self._hostmap[host].remove(connection)
143 143 if not self._hostmap[host]:
144 144 del self._hostmap[host]
145 145 finally:
146 146 self._lock.release()
147 147
148 148 def set_ready(self, connection, ready):
149 149 try:
150 150 self._readymap[connection] = ready
151 151 except KeyError:
152 152 pass
153 153
154 154 def get_ready_conn(self, host):
155 155 conn = None
156 156 self._lock.acquire()
157 157 try:
158 158 if host in self._hostmap:
159 159 for c in self._hostmap[host]:
160 160 if self._readymap[c]:
161 161 self._readymap[c] = 0
162 162 conn = c
163 163 break
164 164 finally:
165 165 self._lock.release()
166 166 return conn
167 167
168 168 def get_all(self, host=None):
169 169 if host:
170 170 return list(self._hostmap.get(host, []))
171 171 else:
172 172 return dict(self._hostmap)
173 173
174 174 class KeepAliveHandler(object):
175 175 def __init__(self):
176 176 self._cm = ConnectionManager()
177 self.requestscount = 0
178 self.sentbytescount = 0
177 179
178 180 #### Connection Management
179 181 def open_connections(self):
180 182 """return a list of connected hosts and the number of connections
181 183 to each. [('foo.com:80', 2), ('bar.org', 1)]"""
182 184 return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
183 185
184 186 def close_connection(self, host):
185 187 """close connection(s) to <host>
186 188 host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
187 189 no error occurs if there is no connection to that host."""
188 190 for h in self._cm.get_all(host):
189 191 self._cm.remove(h)
190 192 h.close()
191 193
192 194 def close_all(self):
193 195 """close all open connections"""
194 196 for host, conns in self._cm.get_all().iteritems():
195 197 for h in conns:
196 198 self._cm.remove(h)
197 199 h.close()
198 200
199 201 def _request_closed(self, request, host, connection):
200 202 """tells us that this request is now closed and that the
201 203 connection is ready for another request"""
202 204 self._cm.set_ready(connection, 1)
203 205
204 206 def _remove_connection(self, host, connection, close=0):
205 207 if close:
206 208 connection.close()
207 209 self._cm.remove(connection)
208 210
209 211 #### Transaction Execution
210 212 def http_open(self, req):
211 213 return self.do_open(HTTPConnection, req)
212 214
213 215 def do_open(self, http_class, req):
214 216 host = urllibcompat.gethost(req)
215 217 if not host:
216 218 raise urlerr.urlerror('no host given')
217 219
218 220 try:
219 221 h = self._cm.get_ready_conn(host)
220 222 while h:
221 223 r = self._reuse_connection(h, req, host)
222 224
223 225 # if this response is non-None, then it worked and we're
224 226 # done. Break out, skipping the else block.
225 227 if r:
226 228 break
227 229
228 230 # connection is bad - possibly closed by server
229 231 # discard it and ask for the next free connection
230 232 h.close()
231 233 self._cm.remove(h)
232 234 h = self._cm.get_ready_conn(host)
233 235 else:
234 236 # no (working) free connections were found. Create a new one.
235 237 h = http_class(host)
236 238 if DEBUG:
237 239 DEBUG.info("creating new connection to %s (%d)",
238 240 host, id(h))
239 241 self._cm.add(host, h, 0)
240 242 self._start_transaction(h, req)
241 243 r = h.getresponse()
242 244 # The string form of BadStatusLine is the status line. Add some context
243 245 # to make the error message slightly more useful.
244 246 except httplib.BadStatusLine as err:
245 247 raise urlerr.urlerror(
246 248 _('bad HTTP status line: %s') % pycompat.sysbytes(err.line))
247 249 except (socket.error, httplib.HTTPException) as err:
248 250 raise urlerr.urlerror(err)
249 251
250 252 # If not a persistent connection, don't try to reuse it. Look
251 253 # for this using getattr() since vcr doesn't define this
252 254 # attribute, and in that case always close the connection.
253 255 if getattr(r, r'will_close', True):
254 256 self._cm.remove(h)
255 257
256 258 if DEBUG:
257 259 DEBUG.info("STATUS: %s, %s", r.status, r.reason)
258 260 r._handler = self
259 261 r._host = host
260 262 r._url = req.get_full_url()
261 263 r._connection = h
262 264 r.code = r.status
263 265 r.headers = r.msg
264 266 r.msg = r.reason
265 267
266 268 return r
267 269
268 270 def _reuse_connection(self, h, req, host):
269 271 """start the transaction with a re-used connection
270 272 return a response object (r) upon success or None on failure.
271 273 This DOES not close or remove bad connections in cases where
272 274 it returns. However, if an unexpected exception occurs, it
273 275 will close and remove the connection before re-raising.
274 276 """
275 277 try:
276 278 self._start_transaction(h, req)
277 279 r = h.getresponse()
278 280 # note: just because we got something back doesn't mean it
279 281 # worked. We'll check the version below, too.
280 282 except (socket.error, httplib.HTTPException):
281 283 r = None
282 284 except: # re-raises
283 285 # adding this block just in case we've missed
284 286 # something we will still raise the exception, but
285 287 # lets try and close the connection and remove it
286 288 # first. We previously got into a nasty loop
287 289 # where an exception was uncaught, and so the
288 290 # connection stayed open. On the next try, the
289 291 # same exception was raised, etc. The trade-off is
290 292 # that it's now possible this call will raise
291 293 # a DIFFERENT exception
292 294 if DEBUG:
293 295 DEBUG.error("unexpected exception - closing "
294 296 "connection to %s (%d)", host, id(h))
295 297 self._cm.remove(h)
296 298 h.close()
297 299 raise
298 300
299 301 if r is None or r.version == 9:
300 302 # httplib falls back to assuming HTTP 0.9 if it gets a
301 303 # bad header back. This is most likely to happen if
302 304 # the socket has been closed by the server since we
303 305 # last used the connection.
304 306 if DEBUG:
305 307 DEBUG.info("failed to re-use connection to %s (%d)",
306 308 host, id(h))
307 309 r = None
308 310 else:
309 311 if DEBUG:
310 312 DEBUG.info("re-using connection to %s (%d)", host, id(h))
311 313
312 314 return r
313 315
314 316 def _start_transaction(self, h, req):
317 oldbytescount = h.sentbytescount
318
315 319 # What follows mostly reimplements HTTPConnection.request()
316 320 # except it adds self.parent.addheaders in the mix and sends headers
317 321 # in a deterministic order (to make testing easier).
318 322 headers = util.sortdict(self.parent.addheaders)
319 323 headers.update(sorted(req.headers.items()))
320 324 headers.update(sorted(req.unredirected_hdrs.items()))
321 325 headers = util.sortdict((n.lower(), v) for n, v in headers.items())
322 326 skipheaders = {}
323 327 for n in (r'host', r'accept-encoding'):
324 328 if n in headers:
325 329 skipheaders[r'skip_' + n.replace(r'-', r'_')] = 1
326 330 try:
327 331 if urllibcompat.hasdata(req):
328 332 data = urllibcompat.getdata(req)
329 333 h.putrequest(
330 334 req.get_method(), urllibcompat.getselector(req),
331 335 **skipheaders)
332 336 if r'content-type' not in headers:
333 337 h.putheader(r'Content-type',
334 338 r'application/x-www-form-urlencoded')
335 339 if r'content-length' not in headers:
336 340 h.putheader(r'Content-length', r'%d' % len(data))
337 341 else:
338 342 h.putrequest(
339 343 req.get_method(), urllibcompat.getselector(req),
340 344 **skipheaders)
341 345 except socket.error as err:
342 346 raise urlerr.urlerror(err)
343 347 for k, v in headers.items():
344 348 h.putheader(k, v)
345 349 h.endheaders()
346 350 if urllibcompat.hasdata(req):
347 351 h.send(data)
348 352
353 # This will fail to record events in case of I/O failure. That's OK.
354 self.requestscount += 1
355 self.sentbytescount += h.sentbytescount - oldbytescount
356
357 try:
358 self.parent.requestscount += 1
359 self.parent.sentbytescount += h.sentbytescount - oldbytescount
360 except AttributeError:
361 pass
362
349 363 class HTTPHandler(KeepAliveHandler, urlreq.httphandler):
350 364 pass
351 365
352 366 class HTTPResponse(httplib.HTTPResponse):
353 367 # we need to subclass HTTPResponse in order to
354 368 # 1) add readline(), readlines(), and readinto() methods
355 369 # 2) add close_connection() methods
356 370 # 3) add info() and geturl() methods
357 371
358 372 # in order to add readline(), read must be modified to deal with a
359 373 # buffer. example: readline must read a buffer and then spit back
360 374 # one line at a time. The only real alternative is to read one
361 375 # BYTE at a time (ick). Once something has been read, it can't be
362 376 # put back (ok, maybe it can, but that's even uglier than this),
363 377 # so if you THEN do a normal read, you must first take stuff from
364 378 # the buffer.
365 379
366 380 # the read method wraps the original to accommodate buffering,
367 381 # although read() never adds to the buffer.
368 382 # Both readline and readlines have been stolen with almost no
369 383 # modification from socket.py
370 384
371 385
372 386 def __init__(self, sock, debuglevel=0, strict=0, method=None):
373 387 extrakw = {}
374 388 if not pycompat.ispy3:
375 389 extrakw[r'strict'] = True
376 390 extrakw[r'buffering'] = True
377 391 httplib.HTTPResponse.__init__(self, sock, debuglevel=debuglevel,
378 392 method=method, **extrakw)
379 393 self.fileno = sock.fileno
380 394 self.code = None
381 395 self._rbuf = ''
382 396 self._rbufsize = 8096
383 397 self._handler = None # inserted by the handler later
384 398 self._host = None # (same)
385 399 self._url = None # (same)
386 400 self._connection = None # (same)
387 401
388 402 _raw_read = httplib.HTTPResponse.read
389 403 _raw_readinto = getattr(httplib.HTTPResponse, 'readinto', None)
390 404
391 405 def close(self):
392 406 if self.fp:
393 407 self.fp.close()
394 408 self.fp = None
395 409 if self._handler:
396 410 self._handler._request_closed(self, self._host,
397 411 self._connection)
398 412
399 413 def close_connection(self):
400 414 self._handler._remove_connection(self._host, self._connection, close=1)
401 415 self.close()
402 416
403 417 def info(self):
404 418 return self.headers
405 419
406 420 def geturl(self):
407 421 return self._url
408 422
409 423 def read(self, amt=None):
410 424 # the _rbuf test is only in this first if for speed. It's not
411 425 # logically necessary
412 426 if self._rbuf and amt is not None:
413 427 L = len(self._rbuf)
414 428 if amt > L:
415 429 amt -= L
416 430 else:
417 431 s = self._rbuf[:amt]
418 432 self._rbuf = self._rbuf[amt:]
419 433 return s
420 434 # Careful! http.client.HTTPResponse.read() on Python 3 is
421 435 # implemented using readinto(), which can duplicate self._rbuf
422 436 # if it's not empty.
423 437 s = self._rbuf
424 438 self._rbuf = ''
425 439 s += self._raw_read(amt)
426 440 return s
427 441
428 442 # stolen from Python SVN #68532 to fix issue1088
429 443 def _read_chunked(self, amt):
430 444 chunk_left = self.chunk_left
431 445 parts = []
432 446
433 447 while True:
434 448 if chunk_left is None:
435 449 line = self.fp.readline()
436 450 i = line.find(';')
437 451 if i >= 0:
438 452 line = line[:i] # strip chunk-extensions
439 453 try:
440 454 chunk_left = int(line, 16)
441 455 except ValueError:
442 456 # close the connection as protocol synchronization is
443 457 # probably lost
444 458 self.close()
445 459 raise httplib.IncompleteRead(''.join(parts))
446 460 if chunk_left == 0:
447 461 break
448 462 if amt is None:
449 463 parts.append(self._safe_read(chunk_left))
450 464 elif amt < chunk_left:
451 465 parts.append(self._safe_read(amt))
452 466 self.chunk_left = chunk_left - amt
453 467 return ''.join(parts)
454 468 elif amt == chunk_left:
455 469 parts.append(self._safe_read(amt))
456 470 self._safe_read(2) # toss the CRLF at the end of the chunk
457 471 self.chunk_left = None
458 472 return ''.join(parts)
459 473 else:
460 474 parts.append(self._safe_read(chunk_left))
461 475 amt -= chunk_left
462 476
463 477 # we read the whole chunk, get another
464 478 self._safe_read(2) # toss the CRLF at the end of the chunk
465 479 chunk_left = None
466 480
467 481 # read and discard trailer up to the CRLF terminator
468 482 ### note: we shouldn't have any trailers!
469 483 while True:
470 484 line = self.fp.readline()
471 485 if not line:
472 486 # a vanishingly small number of sites EOF without
473 487 # sending the trailer
474 488 break
475 489 if line == '\r\n':
476 490 break
477 491
478 492 # we read everything; close the "file"
479 493 self.close()
480 494
481 495 return ''.join(parts)
482 496
483 497 def readline(self):
484 498 # Fast path for a line is already available in read buffer.
485 499 i = self._rbuf.find('\n')
486 500 if i >= 0:
487 501 i += 1
488 502 line = self._rbuf[:i]
489 503 self._rbuf = self._rbuf[i:]
490 504 return line
491 505
492 506 # No newline in local buffer. Read until we find one.
493 507 chunks = [self._rbuf]
494 508 i = -1
495 509 readsize = self._rbufsize
496 510 while True:
497 511 new = self._raw_read(readsize)
498 512 if not new:
499 513 break
500 514
501 515 chunks.append(new)
502 516 i = new.find('\n')
503 517 if i >= 0:
504 518 break
505 519
506 520 # We either have exhausted the stream or have a newline in chunks[-1].
507 521
508 522 # EOF
509 523 if i == -1:
510 524 self._rbuf = ''
511 525 return ''.join(chunks)
512 526
513 527 i += 1
514 528 self._rbuf = chunks[-1][i:]
515 529 chunks[-1] = chunks[-1][:i]
516 530 return ''.join(chunks)
517 531
518 532 def readlines(self, sizehint=0):
519 533 total = 0
520 534 list = []
521 535 while True:
522 536 line = self.readline()
523 537 if not line:
524 538 break
525 539 list.append(line)
526 540 total += len(line)
527 541 if sizehint and total >= sizehint:
528 542 break
529 543 return list
530 544
531 545 def readinto(self, dest):
532 546 if self._raw_readinto is None:
533 547 res = self.read(len(dest))
534 548 if not res:
535 549 return 0
536 550 dest[0:len(res)] = res
537 551 return len(res)
538 552 total = len(dest)
539 553 have = len(self._rbuf)
540 554 if have >= total:
541 555 dest[0:total] = self._rbuf[:total]
542 556 self._rbuf = self._rbuf[total:]
543 557 return total
544 558 mv = memoryview(dest)
545 559 got = self._raw_readinto(mv[have:total])
546 560 dest[0:have] = self._rbuf
547 561 got += len(self._rbuf)
548 562 self._rbuf = ''
549 563 return got
550 564
551 565 def safesend(self, str):
552 566 """Send `str' to the server.
553 567
554 568 Shamelessly ripped off from httplib to patch a bad behavior.
555 569 """
556 570 # _broken_pipe_resp is an attribute we set in this function
557 571 # if the socket is closed while we're sending data but
558 572 # the server sent us a response before hanging up.
559 573 # In that case, we want to pretend to send the rest of the
560 574 # outgoing data, and then let the user use getresponse()
561 575 # (which we wrap) to get this last response before
562 576 # opening a new socket.
563 577 if getattr(self, '_broken_pipe_resp', None) is not None:
564 578 return
565 579
566 580 if self.sock is None:
567 581 if self.auto_open:
568 582 self.connect()
569 583 else:
570 584 raise httplib.NotConnected
571 585
572 586 # send the data to the server. if we get a broken pipe, then close
573 587 # the socket. we want to reconnect when somebody tries to send again.
574 588 #
575 589 # NOTE: we DO propagate the error, though, because we cannot simply
576 590 # ignore the error... the caller will know if they can retry.
577 591 if self.debuglevel > 0:
578 592 print("send:", repr(str))
579 593 try:
580 594 blocksize = 8192
581 595 read = getattr(str, 'read', None)
582 596 if read is not None:
583 597 if self.debuglevel > 0:
584 598 print("sending a read()able")
585 599 data = read(blocksize)
586 600 while data:
587 601 self.sock.sendall(data)
602 self.sentbytescount += len(data)
588 603 data = read(blocksize)
589 604 else:
590 605 self.sock.sendall(str)
606 self.sentbytescount += len(str)
591 607 except socket.error as v:
592 608 reraise = True
593 609 if v[0] == errno.EPIPE: # Broken pipe
594 610 if self._HTTPConnection__state == httplib._CS_REQ_SENT:
595 611 self._broken_pipe_resp = None
596 612 self._broken_pipe_resp = self.getresponse()
597 613 reraise = False
598 614 self.close()
599 615 if reraise:
600 616 raise
601 617
602 618 def wrapgetresponse(cls):
603 619 """Wraps getresponse in cls with a broken-pipe sane version.
604 620 """
605 621 def safegetresponse(self):
606 622 # In safesend() we might set the _broken_pipe_resp
607 623 # attribute, in which case the socket has already
608 624 # been closed and we just need to give them the response
609 625 # back. Otherwise, we use the normal response path.
610 626 r = getattr(self, '_broken_pipe_resp', None)
611 627 if r is not None:
612 628 return r
613 629 return cls.getresponse(self)
614 630 safegetresponse.__doc__ = cls.getresponse.__doc__
615 631 return safegetresponse
616 632
617 633 class HTTPConnection(httplib.HTTPConnection):
618 634 # url.httpsconnection inherits from this. So when adding/removing
619 635 # attributes, be sure to audit httpsconnection() for unintended
620 636 # consequences.
621 637
622 638 # use the modified response class
623 639 response_class = HTTPResponse
624 640 send = safesend
625 641 getresponse = wrapgetresponse(httplib.HTTPConnection)
626 642
643 def __init__(self, *args, **kwargs):
644 httplib.HTTPConnection.__init__(self, *args, **kwargs)
645 self.sentbytescount = 0
627 646
628 647 #########################################################################
629 648 ##### TEST FUNCTIONS
630 649 #########################################################################
631 650
632 651
633 652 def continuity(url):
634 653 md5 = hashlib.md5
635 654 format = '%25s: %s'
636 655
637 656 # first fetch the file with the normal http handler
638 657 opener = urlreq.buildopener()
639 658 urlreq.installopener(opener)
640 659 fo = urlreq.urlopen(url)
641 660 foo = fo.read()
642 661 fo.close()
643 662 m = md5(foo)
644 663 print(format % ('normal urllib', node.hex(m.digest())))
645 664
646 665 # now install the keepalive handler and try again
647 666 opener = urlreq.buildopener(HTTPHandler())
648 667 urlreq.installopener(opener)
649 668
650 669 fo = urlreq.urlopen(url)
651 670 foo = fo.read()
652 671 fo.close()
653 672 m = md5(foo)
654 673 print(format % ('keepalive read', node.hex(m.digest())))
655 674
656 675 fo = urlreq.urlopen(url)
657 676 foo = ''
658 677 while True:
659 678 f = fo.readline()
660 679 if f:
661 680 foo = foo + f
662 681 else:
663 682 break
664 683 fo.close()
665 684 m = md5(foo)
666 685 print(format % ('keepalive readline', node.hex(m.digest())))
667 686
668 687 def comp(N, url):
669 688 print(' making %i connections to:\n %s' % (N, url))
670 689
671 690 procutil.stdout.write(' first using the normal urllib handlers')
672 691 # first use normal opener
673 692 opener = urlreq.buildopener()
674 693 urlreq.installopener(opener)
675 694 t1 = fetch(N, url)
676 695 print(' TIME: %.3f s' % t1)
677 696
678 697 procutil.stdout.write(' now using the keepalive handler ')
679 698 # now install the keepalive handler and try again
680 699 opener = urlreq.buildopener(HTTPHandler())
681 700 urlreq.installopener(opener)
682 701 t2 = fetch(N, url)
683 702 print(' TIME: %.3f s' % t2)
684 703 print(' improvement factor: %.2f' % (t1 / t2))
685 704
686 705 def fetch(N, url, delay=0):
687 706 import time
688 707 lens = []
689 708 starttime = time.time()
690 709 for i in range(N):
691 710 if delay and i > 0:
692 711 time.sleep(delay)
693 712 fo = urlreq.urlopen(url)
694 713 foo = fo.read()
695 714 fo.close()
696 715 lens.append(len(foo))
697 716 diff = time.time() - starttime
698 717
699 718 j = 0
700 719 for i in lens[1:]:
701 720 j = j + 1
702 721 if not i == lens[0]:
703 722 print("WARNING: inconsistent length on read %i: %i" % (j, i))
704 723
705 724 return diff
706 725
707 726 def test_timeout(url):
708 727 global DEBUG
709 728 dbbackup = DEBUG
710 729 class FakeLogger(object):
711 730 def debug(self, msg, *args):
712 731 print(msg % args)
713 732 info = warning = error = debug
714 733 DEBUG = FakeLogger()
715 734 print(" fetching the file to establish a connection")
716 735 fo = urlreq.urlopen(url)
717 736 data1 = fo.read()
718 737 fo.close()
719 738
720 739 i = 20
721 740 print(" waiting %i seconds for the server to close the connection" % i)
722 741 while i > 0:
723 742 procutil.stdout.write('\r %2i' % i)
724 743 procutil.stdout.flush()
725 744 time.sleep(1)
726 745 i -= 1
727 746 procutil.stderr.write('\r')
728 747
729 748 print(" fetching the file a second time")
730 749 fo = urlreq.urlopen(url)
731 750 data2 = fo.read()
732 751 fo.close()
733 752
734 753 if data1 == data2:
735 754 print(' data are identical')
736 755 else:
737 756 print(' ERROR: DATA DIFFER')
738 757
739 758 DEBUG = dbbackup
740 759
741 760
742 761 def test(url, N=10):
743 762 print("performing continuity test (making sure stuff isn't corrupted)")
744 763 continuity(url)
745 764 print('')
746 765 print("performing speed comparison")
747 766 comp(N, url)
748 767 print('')
749 768 print("performing dropped-connection check")
750 769 test_timeout(url)
751 770
752 771 if __name__ == '__main__':
753 772 import time
754 773 try:
755 774 N = int(sys.argv[1])
756 775 url = sys.argv[2]
757 776 except (IndexError, ValueError):
758 777 print("%s <integer> <url>" % sys.argv[0])
759 778 else:
760 779 test(url, N)
General Comments 0
You need to be logged in to leave comments. Login now