##// END OF EJS Templates
keepalive: be more careful about self._rbuf when calling super impls...
Augie Fackler -
r39841:1cf1680b default
parent child Browse files
Show More
@@ -1,753 +1,756 b''
1 1 # This library is free software; you can redistribute it and/or
2 2 # modify it under the terms of the GNU Lesser General Public
3 3 # License as published by the Free Software Foundation; either
4 4 # version 2.1 of the License, or (at your option) any later version.
5 5 #
6 6 # This library is distributed in the hope that it will be useful,
7 7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9 9 # Lesser General Public License for more details.
10 10 #
11 11 # You should have received a copy of the GNU Lesser General Public
12 12 # License along with this library; if not, see
13 13 # <http://www.gnu.org/licenses/>.
14 14
15 15 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
16 16 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
17 17
18 18 # Modified by Benoit Boissinot:
19 19 # - fix for digest auth (inspired from urllib2.py @ Python v2.4)
20 20 # Modified by Dirkjan Ochtman:
21 21 # - import md5 function from a local util module
22 22 # Modified by Augie Fackler:
23 23 # - add safesend method and use it to prevent broken pipe errors
24 24 # on large POST requests
25 25
26 26 """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
27 27
28 28 >>> import urllib2
29 29 >>> from keepalive import HTTPHandler
30 30 >>> keepalive_handler = HTTPHandler()
31 31 >>> opener = urlreq.buildopener(keepalive_handler)
32 32 >>> urlreq.installopener(opener)
33 33 >>>
34 34 >>> fo = urlreq.urlopen('http://www.python.org')
35 35
36 36 If a connection to a given host is requested, and all of the existing
37 37 connections are still in use, another connection will be opened. If
38 38 the handler tries to use an existing connection but it fails in some
39 39 way, it will be closed and removed from the pool.
40 40
41 41 To remove the handler, simply re-run build_opener with no arguments, and
42 42 install that opener.
43 43
44 44 You can explicitly close connections by using the close_connection()
45 45 method of the returned file-like object (described below) or you can
46 46 use the handler methods:
47 47
48 48 close_connection(host)
49 49 close_all()
50 50 open_connections()
51 51
52 52 NOTE: using the close_connection and close_all methods of the handler
53 53 should be done with care when using multiple threads.
54 54 * there is nothing that prevents another thread from creating new
55 55 connections immediately after connections are closed
56 56 * no checks are done to prevent in-use connections from being closed
57 57
58 58 >>> keepalive_handler.close_all()
59 59
60 60 EXTRA ATTRIBUTES AND METHODS
61 61
62 62 Upon a status of 200, the object returned has a few additional
63 63 attributes and methods, which should not be used if you want to
64 64 remain consistent with the normal urllib2-returned objects:
65 65
66 66 close_connection() - close the connection to the host
67 67 readlines() - you know, readlines()
68 68 status - the return status (i.e. 404)
69 69 reason - english translation of status (i.e. 'File not found')
70 70
71 71 If you want the best of both worlds, use this inside an
72 72 AttributeError-catching try:
73 73
74 74 >>> try: status = fo.status
75 75 >>> except AttributeError: status = None
76 76
77 77 Unfortunately, these are ONLY there if status == 200, so it's not
78 78 easy to distinguish between non-200 responses. The reason is that
79 79 urllib2 tries to do clever things with error codes 301, 302, 401,
80 80 and 407, and it wraps the object upon return.
81 81 """
82 82
83 83 # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $
84 84
85 85 from __future__ import absolute_import, print_function
86 86
87 87 import errno
88 88 import hashlib
89 89 import socket
90 90 import sys
91 91 import threading
92 92
93 93 from .i18n import _
94 94 from . import (
95 95 node,
96 96 pycompat,
97 97 urllibcompat,
98 98 util,
99 99 )
100 100 from .utils import (
101 101 procutil,
102 102 )
103 103
104 104 httplib = util.httplib
105 105 urlerr = util.urlerr
106 106 urlreq = util.urlreq
107 107
108 108 DEBUG = None
109 109
110 110 class ConnectionManager(object):
111 111 """
112 112 The connection manager must be able to:
113 113 * keep track of all existing
114 114 """
115 115 def __init__(self):
116 116 self._lock = threading.Lock()
117 117 self._hostmap = {} # map hosts to a list of connections
118 118 self._connmap = {} # map connections to host
119 119 self._readymap = {} # map connection to ready state
120 120
121 121 def add(self, host, connection, ready):
122 122 self._lock.acquire()
123 123 try:
124 124 if host not in self._hostmap:
125 125 self._hostmap[host] = []
126 126 self._hostmap[host].append(connection)
127 127 self._connmap[connection] = host
128 128 self._readymap[connection] = ready
129 129 finally:
130 130 self._lock.release()
131 131
132 132 def remove(self, connection):
133 133 self._lock.acquire()
134 134 try:
135 135 try:
136 136 host = self._connmap[connection]
137 137 except KeyError:
138 138 pass
139 139 else:
140 140 del self._connmap[connection]
141 141 del self._readymap[connection]
142 142 self._hostmap[host].remove(connection)
143 143 if not self._hostmap[host]:
144 144 del self._hostmap[host]
145 145 finally:
146 146 self._lock.release()
147 147
148 148 def set_ready(self, connection, ready):
149 149 try:
150 150 self._readymap[connection] = ready
151 151 except KeyError:
152 152 pass
153 153
154 154 def get_ready_conn(self, host):
155 155 conn = None
156 156 self._lock.acquire()
157 157 try:
158 158 if host in self._hostmap:
159 159 for c in self._hostmap[host]:
160 160 if self._readymap[c]:
161 161 self._readymap[c] = 0
162 162 conn = c
163 163 break
164 164 finally:
165 165 self._lock.release()
166 166 return conn
167 167
168 168 def get_all(self, host=None):
169 169 if host:
170 170 return list(self._hostmap.get(host, []))
171 171 else:
172 172 return dict(self._hostmap)
173 173
174 174 class KeepAliveHandler(object):
175 175 def __init__(self):
176 176 self._cm = ConnectionManager()
177 177
178 178 #### Connection Management
179 179 def open_connections(self):
180 180 """return a list of connected hosts and the number of connections
181 181 to each. [('foo.com:80', 2), ('bar.org', 1)]"""
182 182 return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
183 183
184 184 def close_connection(self, host):
185 185 """close connection(s) to <host>
186 186 host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
187 187 no error occurs if there is no connection to that host."""
188 188 for h in self._cm.get_all(host):
189 189 self._cm.remove(h)
190 190 h.close()
191 191
192 192 def close_all(self):
193 193 """close all open connections"""
194 194 for host, conns in self._cm.get_all().iteritems():
195 195 for h in conns:
196 196 self._cm.remove(h)
197 197 h.close()
198 198
199 199 def _request_closed(self, request, host, connection):
200 200 """tells us that this request is now closed and that the
201 201 connection is ready for another request"""
202 202 self._cm.set_ready(connection, 1)
203 203
204 204 def _remove_connection(self, host, connection, close=0):
205 205 if close:
206 206 connection.close()
207 207 self._cm.remove(connection)
208 208
209 209 #### Transaction Execution
210 210 def http_open(self, req):
211 211 return self.do_open(HTTPConnection, req)
212 212
213 213 def do_open(self, http_class, req):
214 214 host = urllibcompat.gethost(req)
215 215 if not host:
216 216 raise urlerr.urlerror('no host given')
217 217
218 218 try:
219 219 h = self._cm.get_ready_conn(host)
220 220 while h:
221 221 r = self._reuse_connection(h, req, host)
222 222
223 223 # if this response is non-None, then it worked and we're
224 224 # done. Break out, skipping the else block.
225 225 if r:
226 226 break
227 227
228 228 # connection is bad - possibly closed by server
229 229 # discard it and ask for the next free connection
230 230 h.close()
231 231 self._cm.remove(h)
232 232 h = self._cm.get_ready_conn(host)
233 233 else:
234 234 # no (working) free connections were found. Create a new one.
235 235 h = http_class(host)
236 236 if DEBUG:
237 237 DEBUG.info("creating new connection to %s (%d)",
238 238 host, id(h))
239 239 self._cm.add(host, h, 0)
240 240 self._start_transaction(h, req)
241 241 r = h.getresponse()
242 242 # The string form of BadStatusLine is the status line. Add some context
243 243 # to make the error message slightly more useful.
244 244 except httplib.BadStatusLine as err:
245 245 raise urlerr.urlerror(
246 246 _('bad HTTP status line: %s') % pycompat.sysbytes(err.line))
247 247 except (socket.error, httplib.HTTPException) as err:
248 248 raise urlerr.urlerror(err)
249 249
250 250 # If not a persistent connection, don't try to reuse it. Look
251 251 # for this using getattr() since vcr doesn't define this
252 252 # attribute, and in that case always close the connection.
253 253 if getattr(r, r'will_close', True):
254 254 self._cm.remove(h)
255 255
256 256 if DEBUG:
257 257 DEBUG.info("STATUS: %s, %s", r.status, r.reason)
258 258 r._handler = self
259 259 r._host = host
260 260 r._url = req.get_full_url()
261 261 r._connection = h
262 262 r.code = r.status
263 263 r.headers = r.msg
264 264 r.msg = r.reason
265 265
266 266 return r
267 267
268 268 def _reuse_connection(self, h, req, host):
269 269 """start the transaction with a re-used connection
270 270 return a response object (r) upon success or None on failure.
271 271 This DOES not close or remove bad connections in cases where
272 272 it returns. However, if an unexpected exception occurs, it
273 273 will close and remove the connection before re-raising.
274 274 """
275 275 try:
276 276 self._start_transaction(h, req)
277 277 r = h.getresponse()
278 278 # note: just because we got something back doesn't mean it
279 279 # worked. We'll check the version below, too.
280 280 except (socket.error, httplib.HTTPException):
281 281 r = None
282 282 except: # re-raises
283 283 # adding this block just in case we've missed
284 284 # something we will still raise the exception, but
285 285 # lets try and close the connection and remove it
286 286 # first. We previously got into a nasty loop
287 287 # where an exception was uncaught, and so the
288 288 # connection stayed open. On the next try, the
289 289 # same exception was raised, etc. The trade-off is
290 290 # that it's now possible this call will raise
291 291 # a DIFFERENT exception
292 292 if DEBUG:
293 293 DEBUG.error("unexpected exception - closing "
294 294 "connection to %s (%d)", host, id(h))
295 295 self._cm.remove(h)
296 296 h.close()
297 297 raise
298 298
299 299 if r is None or r.version == 9:
300 300 # httplib falls back to assuming HTTP 0.9 if it gets a
301 301 # bad header back. This is most likely to happen if
302 302 # the socket has been closed by the server since we
303 303 # last used the connection.
304 304 if DEBUG:
305 305 DEBUG.info("failed to re-use connection to %s (%d)",
306 306 host, id(h))
307 307 r = None
308 308 else:
309 309 if DEBUG:
310 310 DEBUG.info("re-using connection to %s (%d)", host, id(h))
311 311
312 312 return r
313 313
314 314 def _start_transaction(self, h, req):
315 315 # What follows mostly reimplements HTTPConnection.request()
316 316 # except it adds self.parent.addheaders in the mix and sends headers
317 317 # in a deterministic order (to make testing easier).
318 318 headers = util.sortdict(self.parent.addheaders)
319 319 headers.update(sorted(req.headers.items()))
320 320 headers.update(sorted(req.unredirected_hdrs.items()))
321 321 headers = util.sortdict((n.lower(), v) for n, v in headers.items())
322 322 skipheaders = {}
323 323 for n in (r'host', r'accept-encoding'):
324 324 if n in headers:
325 325 skipheaders[r'skip_' + n.replace(r'-', r'_')] = 1
326 326 try:
327 327 if urllibcompat.hasdata(req):
328 328 data = urllibcompat.getdata(req)
329 329 h.putrequest(
330 330 req.get_method(), urllibcompat.getselector(req),
331 331 **skipheaders)
332 332 if r'content-type' not in headers:
333 333 h.putheader(r'Content-type',
334 334 r'application/x-www-form-urlencoded')
335 335 if r'content-length' not in headers:
336 336 h.putheader(r'Content-length', r'%d' % len(data))
337 337 else:
338 338 h.putrequest(
339 339 req.get_method(), urllibcompat.getselector(req),
340 340 **skipheaders)
341 341 except socket.error as err:
342 342 raise urlerr.urlerror(err)
343 343 for k, v in headers.items():
344 344 h.putheader(k, v)
345 345 h.endheaders()
346 346 if urllibcompat.hasdata(req):
347 347 h.send(data)
348 348
349 349 class HTTPHandler(KeepAliveHandler, urlreq.httphandler):
350 350 pass
351 351
352 352 class HTTPResponse(httplib.HTTPResponse):
353 353 # we need to subclass HTTPResponse in order to
354 354 # 1) add readline(), readlines(), and readinto() methods
355 355 # 2) add close_connection() methods
356 356 # 3) add info() and geturl() methods
357 357
358 358 # in order to add readline(), read must be modified to deal with a
359 359 # buffer. example: readline must read a buffer and then spit back
360 360 # one line at a time. The only real alternative is to read one
361 361 # BYTE at a time (ick). Once something has been read, it can't be
362 362 # put back (ok, maybe it can, but that's even uglier than this),
363 363 # so if you THEN do a normal read, you must first take stuff from
364 364 # the buffer.
365 365
366 366 # the read method wraps the original to accommodate buffering,
367 367 # although read() never adds to the buffer.
368 368 # Both readline and readlines have been stolen with almost no
369 369 # modification from socket.py
370 370
371 371
372 372 def __init__(self, sock, debuglevel=0, strict=0, method=None):
373 373 extrakw = {}
374 374 if not pycompat.ispy3:
375 375 extrakw[r'strict'] = True
376 376 extrakw[r'buffering'] = True
377 377 httplib.HTTPResponse.__init__(self, sock, debuglevel=debuglevel,
378 378 method=method, **extrakw)
379 379 self.fileno = sock.fileno
380 380 self.code = None
381 381 self._rbuf = ''
382 382 self._rbufsize = 8096
383 383 self._handler = None # inserted by the handler later
384 384 self._host = None # (same)
385 385 self._url = None # (same)
386 386 self._connection = None # (same)
387 387
388 388 _raw_read = httplib.HTTPResponse.read
389 389 _raw_readinto = getattr(httplib.HTTPResponse, 'readinto', None)
390 390
391 391 def close(self):
392 392 if self.fp:
393 393 self.fp.close()
394 394 self.fp = None
395 395 if self._handler:
396 396 self._handler._request_closed(self, self._host,
397 397 self._connection)
398 398
399 399 def close_connection(self):
400 400 self._handler._remove_connection(self._host, self._connection, close=1)
401 401 self.close()
402 402
403 403 def info(self):
404 404 return self.headers
405 405
406 406 def geturl(self):
407 407 return self._url
408 408
409 409 def read(self, amt=None):
410 410 # the _rbuf test is only in this first if for speed. It's not
411 411 # logically necessary
412 412 if self._rbuf and amt is not None:
413 413 L = len(self._rbuf)
414 414 if amt > L:
415 415 amt -= L
416 416 else:
417 417 s = self._rbuf[:amt]
418 418 self._rbuf = self._rbuf[amt:]
419 419 return s
420
421 s = self._rbuf + self._raw_read(amt)
420 # Careful! http.client.HTTPResponse.read() on Python 3 is
421 # implemented using readinto(), which can duplicate self._rbuf
422 # if it's not empty.
423 s = self._rbuf
422 424 self._rbuf = ''
425 s += self._raw_read(amt)
423 426 return s
424 427
425 428 # stolen from Python SVN #68532 to fix issue1088
426 429 def _read_chunked(self, amt):
427 430 chunk_left = self.chunk_left
428 431 parts = []
429 432
430 433 while True:
431 434 if chunk_left is None:
432 435 line = self.fp.readline()
433 436 i = line.find(';')
434 437 if i >= 0:
435 438 line = line[:i] # strip chunk-extensions
436 439 try:
437 440 chunk_left = int(line, 16)
438 441 except ValueError:
439 442 # close the connection as protocol synchronization is
440 443 # probably lost
441 444 self.close()
442 445 raise httplib.IncompleteRead(''.join(parts))
443 446 if chunk_left == 0:
444 447 break
445 448 if amt is None:
446 449 parts.append(self._safe_read(chunk_left))
447 450 elif amt < chunk_left:
448 451 parts.append(self._safe_read(amt))
449 452 self.chunk_left = chunk_left - amt
450 453 return ''.join(parts)
451 454 elif amt == chunk_left:
452 455 parts.append(self._safe_read(amt))
453 456 self._safe_read(2) # toss the CRLF at the end of the chunk
454 457 self.chunk_left = None
455 458 return ''.join(parts)
456 459 else:
457 460 parts.append(self._safe_read(chunk_left))
458 461 amt -= chunk_left
459 462
460 463 # we read the whole chunk, get another
461 464 self._safe_read(2) # toss the CRLF at the end of the chunk
462 465 chunk_left = None
463 466
464 467 # read and discard trailer up to the CRLF terminator
465 468 ### note: we shouldn't have any trailers!
466 469 while True:
467 470 line = self.fp.readline()
468 471 if not line:
469 472 # a vanishingly small number of sites EOF without
470 473 # sending the trailer
471 474 break
472 475 if line == '\r\n':
473 476 break
474 477
475 478 # we read everything; close the "file"
476 479 self.close()
477 480
478 481 return ''.join(parts)
479 482
480 483 def readline(self):
481 484 # Fast path for a line is already available in read buffer.
482 485 i = self._rbuf.find('\n')
483 486 if i >= 0:
484 487 i += 1
485 488 line = self._rbuf[:i]
486 489 self._rbuf = self._rbuf[i:]
487 490 return line
488 491
489 492 # No newline in local buffer. Read until we find one.
490 493 chunks = [self._rbuf]
491 494 i = -1
492 495 readsize = self._rbufsize
493 496 while True:
494 497 new = self._raw_read(readsize)
495 498 if not new:
496 499 break
497 500
498 501 chunks.append(new)
499 502 i = new.find('\n')
500 503 if i >= 0:
501 504 break
502 505
503 506 # We either have exhausted the stream or have a newline in chunks[-1].
504 507
505 508 # EOF
506 509 if i == -1:
507 510 self._rbuf = ''
508 511 return ''.join(chunks)
509 512
510 513 i += 1
511 514 self._rbuf = chunks[-1][i:]
512 515 chunks[-1] = chunks[-1][:i]
513 516 return ''.join(chunks)
514 517
515 518 def readlines(self, sizehint=0):
516 519 total = 0
517 520 list = []
518 521 while True:
519 522 line = self.readline()
520 523 if not line:
521 524 break
522 525 list.append(line)
523 526 total += len(line)
524 527 if sizehint and total >= sizehint:
525 528 break
526 529 return list
527 530
528 531 def readinto(self, dest):
529 532 if self._raw_readinto is None:
530 533 res = self.read(len(dest))
531 534 if not res:
532 535 return 0
533 536 dest[0:len(res)] = res
534 537 return len(res)
535 538 total = len(dest)
536 539 have = len(self._rbuf)
537 540 if have >= total:
538 541 dest[0:total] = self._rbuf[:total]
539 542 self._rbuf = self._rbuf[total:]
540 543 return total
541 544 mv = memoryview(dest)
542 545 got = self._raw_readinto(mv[have:total])
543 546 dest[0:have] = self._rbuf
544 547 got += len(self._rbuf)
545 548 self._rbuf = ''
546 549 return got
547 550
548 551 def safesend(self, str):
549 552 """Send `str' to the server.
550 553
551 554 Shamelessly ripped off from httplib to patch a bad behavior.
552 555 """
553 556 # _broken_pipe_resp is an attribute we set in this function
554 557 # if the socket is closed while we're sending data but
555 558 # the server sent us a response before hanging up.
556 559 # In that case, we want to pretend to send the rest of the
557 560 # outgoing data, and then let the user use getresponse()
558 561 # (which we wrap) to get this last response before
559 562 # opening a new socket.
560 563 if getattr(self, '_broken_pipe_resp', None) is not None:
561 564 return
562 565
563 566 if self.sock is None:
564 567 if self.auto_open:
565 568 self.connect()
566 569 else:
567 570 raise httplib.NotConnected
568 571
569 572 # send the data to the server. if we get a broken pipe, then close
570 573 # the socket. we want to reconnect when somebody tries to send again.
571 574 #
572 575 # NOTE: we DO propagate the error, though, because we cannot simply
573 576 # ignore the error... the caller will know if they can retry.
574 577 if self.debuglevel > 0:
575 578 print("send:", repr(str))
576 579 try:
577 580 blocksize = 8192
578 581 read = getattr(str, 'read', None)
579 582 if read is not None:
580 583 if self.debuglevel > 0:
581 584 print("sending a read()able")
582 585 data = read(blocksize)
583 586 while data:
584 587 self.sock.sendall(data)
585 588 data = read(blocksize)
586 589 else:
587 590 self.sock.sendall(str)
588 591 except socket.error as v:
589 592 reraise = True
590 593 if v[0] == errno.EPIPE: # Broken pipe
591 594 if self._HTTPConnection__state == httplib._CS_REQ_SENT:
592 595 self._broken_pipe_resp = None
593 596 self._broken_pipe_resp = self.getresponse()
594 597 reraise = False
595 598 self.close()
596 599 if reraise:
597 600 raise
598 601
599 602 def wrapgetresponse(cls):
600 603 """Wraps getresponse in cls with a broken-pipe sane version.
601 604 """
602 605 def safegetresponse(self):
603 606 # In safesend() we might set the _broken_pipe_resp
604 607 # attribute, in which case the socket has already
605 608 # been closed and we just need to give them the response
606 609 # back. Otherwise, we use the normal response path.
607 610 r = getattr(self, '_broken_pipe_resp', None)
608 611 if r is not None:
609 612 return r
610 613 return cls.getresponse(self)
611 614 safegetresponse.__doc__ = cls.getresponse.__doc__
612 615 return safegetresponse
613 616
614 617 class HTTPConnection(httplib.HTTPConnection):
615 618 # use the modified response class
616 619 response_class = HTTPResponse
617 620 send = safesend
618 621 getresponse = wrapgetresponse(httplib.HTTPConnection)
619 622
620 623
621 624 #########################################################################
622 625 ##### TEST FUNCTIONS
623 626 #########################################################################
624 627
625 628
626 629 def continuity(url):
627 630 md5 = hashlib.md5
628 631 format = '%25s: %s'
629 632
630 633 # first fetch the file with the normal http handler
631 634 opener = urlreq.buildopener()
632 635 urlreq.installopener(opener)
633 636 fo = urlreq.urlopen(url)
634 637 foo = fo.read()
635 638 fo.close()
636 639 m = md5(foo)
637 640 print(format % ('normal urllib', node.hex(m.digest())))
638 641
639 642 # now install the keepalive handler and try again
640 643 opener = urlreq.buildopener(HTTPHandler())
641 644 urlreq.installopener(opener)
642 645
643 646 fo = urlreq.urlopen(url)
644 647 foo = fo.read()
645 648 fo.close()
646 649 m = md5(foo)
647 650 print(format % ('keepalive read', node.hex(m.digest())))
648 651
649 652 fo = urlreq.urlopen(url)
650 653 foo = ''
651 654 while True:
652 655 f = fo.readline()
653 656 if f:
654 657 foo = foo + f
655 658 else:
656 659 break
657 660 fo.close()
658 661 m = md5(foo)
659 662 print(format % ('keepalive readline', node.hex(m.digest())))
660 663
661 664 def comp(N, url):
662 665 print(' making %i connections to:\n %s' % (N, url))
663 666
664 667 procutil.stdout.write(' first using the normal urllib handlers')
665 668 # first use normal opener
666 669 opener = urlreq.buildopener()
667 670 urlreq.installopener(opener)
668 671 t1 = fetch(N, url)
669 672 print(' TIME: %.3f s' % t1)
670 673
671 674 procutil.stdout.write(' now using the keepalive handler ')
672 675 # now install the keepalive handler and try again
673 676 opener = urlreq.buildopener(HTTPHandler())
674 677 urlreq.installopener(opener)
675 678 t2 = fetch(N, url)
676 679 print(' TIME: %.3f s' % t2)
677 680 print(' improvement factor: %.2f' % (t1 / t2))
678 681
679 682 def fetch(N, url, delay=0):
680 683 import time
681 684 lens = []
682 685 starttime = time.time()
683 686 for i in range(N):
684 687 if delay and i > 0:
685 688 time.sleep(delay)
686 689 fo = urlreq.urlopen(url)
687 690 foo = fo.read()
688 691 fo.close()
689 692 lens.append(len(foo))
690 693 diff = time.time() - starttime
691 694
692 695 j = 0
693 696 for i in lens[1:]:
694 697 j = j + 1
695 698 if not i == lens[0]:
696 699 print("WARNING: inconsistent length on read %i: %i" % (j, i))
697 700
698 701 return diff
699 702
700 703 def test_timeout(url):
701 704 global DEBUG
702 705 dbbackup = DEBUG
703 706 class FakeLogger(object):
704 707 def debug(self, msg, *args):
705 708 print(msg % args)
706 709 info = warning = error = debug
707 710 DEBUG = FakeLogger()
708 711 print(" fetching the file to establish a connection")
709 712 fo = urlreq.urlopen(url)
710 713 data1 = fo.read()
711 714 fo.close()
712 715
713 716 i = 20
714 717 print(" waiting %i seconds for the server to close the connection" % i)
715 718 while i > 0:
716 719 procutil.stdout.write('\r %2i' % i)
717 720 procutil.stdout.flush()
718 721 time.sleep(1)
719 722 i -= 1
720 723 procutil.stderr.write('\r')
721 724
722 725 print(" fetching the file a second time")
723 726 fo = urlreq.urlopen(url)
724 727 data2 = fo.read()
725 728 fo.close()
726 729
727 730 if data1 == data2:
728 731 print(' data are identical')
729 732 else:
730 733 print(' ERROR: DATA DIFFER')
731 734
732 735 DEBUG = dbbackup
733 736
734 737
735 738 def test(url, N=10):
736 739 print("performing continuity test (making sure stuff isn't corrupted)")
737 740 continuity(url)
738 741 print('')
739 742 print("performing speed comparison")
740 743 comp(N, url)
741 744 print('')
742 745 print("performing dropped-connection check")
743 746 test_timeout(url)
744 747
745 748 if __name__ == '__main__':
746 749 import time
747 750 try:
748 751 N = int(sys.argv[1])
749 752 url = sys.argv[2]
750 753 except (IndexError, ValueError):
751 754 print("%s <integer> <url>" % sys.argv[0])
752 755 else:
753 756 test(url, N)
General Comments 0
You need to be logged in to leave comments. Login now