##// END OF EJS Templates
keepalive: fix how md5 is used...
Mike Hommey -
r22505:232d437a stable
parent child Browse files
Show More
@@ -1,761 +1,761
1 1 # This library is free software; you can redistribute it and/or
2 2 # modify it under the terms of the GNU Lesser General Public
3 3 # License as published by the Free Software Foundation; either
4 4 # version 2.1 of the License, or (at your option) any later version.
5 5 #
6 6 # This library is distributed in the hope that it will be useful,
7 7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9 9 # Lesser General Public License for more details.
10 10 #
11 11 # You should have received a copy of the GNU Lesser General Public
12 12 # License along with this library; if not, see
13 13 # <http://www.gnu.org/licenses/>.
14 14
15 15 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
16 16 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
17 17
18 18 # Modified by Benoit Boissinot:
19 19 # - fix for digest auth (inspired from urllib2.py @ Python v2.4)
20 20 # Modified by Dirkjan Ochtman:
21 21 # - import md5 function from a local util module
22 22 # Modified by Martin Geisler:
23 23 # - moved md5 function from local util module to this module
24 24 # Modified by Augie Fackler:
25 25 # - add safesend method and use it to prevent broken pipe errors
26 26 # on large POST requests
27 27
28 28 """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
29 29
30 30 >>> import urllib2
31 31 >>> from keepalive import HTTPHandler
32 32 >>> keepalive_handler = HTTPHandler()
33 33 >>> opener = urllib2.build_opener(keepalive_handler)
34 34 >>> urllib2.install_opener(opener)
35 35 >>>
36 36 >>> fo = urllib2.urlopen('http://www.python.org')
37 37
38 38 If a connection to a given host is requested, and all of the existing
39 39 connections are still in use, another connection will be opened. If
40 40 the handler tries to use an existing connection but it fails in some
41 41 way, it will be closed and removed from the pool.
42 42
43 43 To remove the handler, simply re-run build_opener with no arguments, and
44 44 install that opener.
45 45
46 46 You can explicitly close connections by using the close_connection()
47 47 method of the returned file-like object (described below) or you can
48 48 use the handler methods:
49 49
50 50 close_connection(host)
51 51 close_all()
52 52 open_connections()
53 53
54 54 NOTE: using the close_connection and close_all methods of the handler
55 55 should be done with care when using multiple threads.
56 56 * there is nothing that prevents another thread from creating new
57 57 connections immediately after connections are closed
58 58 * no checks are done to prevent in-use connections from being closed
59 59
60 60 >>> keepalive_handler.close_all()
61 61
62 62 EXTRA ATTRIBUTES AND METHODS
63 63
64 64 Upon a status of 200, the object returned has a few additional
65 65 attributes and methods, which should not be used if you want to
66 66 remain consistent with the normal urllib2-returned objects:
67 67
68 68 close_connection() - close the connection to the host
69 69 readlines() - you know, readlines()
70 70 status - the return status (i.e. 404)
71 71 reason - english translation of status (i.e. 'File not found')
72 72
73 73 If you want the best of both worlds, use this inside an
74 74 AttributeError-catching try:
75 75
76 76 >>> try: status = fo.status
77 77 >>> except AttributeError: status = None
78 78
79 79 Unfortunately, these are ONLY there if status == 200, so it's not
80 80 easy to distinguish between non-200 responses. The reason is that
81 81 urllib2 tries to do clever things with error codes 301, 302, 401,
82 82 and 407, and it wraps the object upon return.
83 83
84 84 For python versions earlier than 2.4, you can avoid this fancy error
85 85 handling by setting the module-level global HANDLE_ERRORS to zero.
86 86 You see, prior to 2.4, it's the HTTP Handler's job to determine what
87 87 to handle specially, and what to just pass up. HANDLE_ERRORS == 0
88 88 means "pass everything up". In python 2.4, however, this job no
89 89 longer belongs to the HTTP Handler and is now done by a NEW handler,
90 90 HTTPErrorProcessor. Here's the bottom line:
91 91
92 92 python version < 2.4
93 93 HANDLE_ERRORS == 1 (default) pass up 200, treat the rest as
94 94 errors
95 95 HANDLE_ERRORS == 0 pass everything up, error processing is
96 96 left to the calling code
97 97 python version >= 2.4
98 98 HANDLE_ERRORS == 1 pass up 200, treat the rest as errors
99 99 HANDLE_ERRORS == 0 (default) pass everything up, let the
100 100 other handlers (specifically,
101 101 HTTPErrorProcessor) decide what to do
102 102
103 103 In practice, setting the variable either way makes little difference
104 104 in python 2.4, so for the most consistent behavior across versions,
105 105 you probably just want to use the defaults, which will give you
106 106 exceptions on errors.
107 107
108 108 """
109 109
110 110 # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $
111 111
112 112 import errno
113 113 import httplib
114 114 import socket
115 115 import thread
116 116 import urllib2
117 117
118 118 DEBUG = None
119 119
120 120 import sys
121 121 if sys.version_info < (2, 4):
122 122 HANDLE_ERRORS = 1
123 123 else: HANDLE_ERRORS = 0
124 124
125 125 class ConnectionManager(object):
126 126 """
127 127 The connection manager must be able to:
128 128 * keep track of all existing
129 129 """
130 130 def __init__(self):
131 131 self._lock = thread.allocate_lock()
132 132 self._hostmap = {} # map hosts to a list of connections
133 133 self._connmap = {} # map connections to host
134 134 self._readymap = {} # map connection to ready state
135 135
136 136 def add(self, host, connection, ready):
137 137 self._lock.acquire()
138 138 try:
139 139 if host not in self._hostmap:
140 140 self._hostmap[host] = []
141 141 self._hostmap[host].append(connection)
142 142 self._connmap[connection] = host
143 143 self._readymap[connection] = ready
144 144 finally:
145 145 self._lock.release()
146 146
147 147 def remove(self, connection):
148 148 self._lock.acquire()
149 149 try:
150 150 try:
151 151 host = self._connmap[connection]
152 152 except KeyError:
153 153 pass
154 154 else:
155 155 del self._connmap[connection]
156 156 del self._readymap[connection]
157 157 self._hostmap[host].remove(connection)
158 158 if not self._hostmap[host]: del self._hostmap[host]
159 159 finally:
160 160 self._lock.release()
161 161
162 162 def set_ready(self, connection, ready):
163 163 try:
164 164 self._readymap[connection] = ready
165 165 except KeyError:
166 166 pass
167 167
168 168 def get_ready_conn(self, host):
169 169 conn = None
170 170 self._lock.acquire()
171 171 try:
172 172 if host in self._hostmap:
173 173 for c in self._hostmap[host]:
174 174 if self._readymap[c]:
175 175 self._readymap[c] = 0
176 176 conn = c
177 177 break
178 178 finally:
179 179 self._lock.release()
180 180 return conn
181 181
182 182 def get_all(self, host=None):
183 183 if host:
184 184 return list(self._hostmap.get(host, []))
185 185 else:
186 186 return dict(self._hostmap)
187 187
188 188 class KeepAliveHandler(object):
189 189 def __init__(self):
190 190 self._cm = ConnectionManager()
191 191
192 192 #### Connection Management
193 193 def open_connections(self):
194 194 """return a list of connected hosts and the number of connections
195 195 to each. [('foo.com:80', 2), ('bar.org', 1)]"""
196 196 return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
197 197
198 198 def close_connection(self, host):
199 199 """close connection(s) to <host>
200 200 host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
201 201 no error occurs if there is no connection to that host."""
202 202 for h in self._cm.get_all(host):
203 203 self._cm.remove(h)
204 204 h.close()
205 205
206 206 def close_all(self):
207 207 """close all open connections"""
208 208 for host, conns in self._cm.get_all().iteritems():
209 209 for h in conns:
210 210 self._cm.remove(h)
211 211 h.close()
212 212
213 213 def _request_closed(self, request, host, connection):
214 214 """tells us that this request is now closed and that the
215 215 connection is ready for another request"""
216 216 self._cm.set_ready(connection, 1)
217 217
218 218 def _remove_connection(self, host, connection, close=0):
219 219 if close:
220 220 connection.close()
221 221 self._cm.remove(connection)
222 222
223 223 #### Transaction Execution
224 224 def http_open(self, req):
225 225 return self.do_open(HTTPConnection, req)
226 226
227 227 def do_open(self, http_class, req):
228 228 host = req.get_host()
229 229 if not host:
230 230 raise urllib2.URLError('no host given')
231 231
232 232 try:
233 233 h = self._cm.get_ready_conn(host)
234 234 while h:
235 235 r = self._reuse_connection(h, req, host)
236 236
237 237 # if this response is non-None, then it worked and we're
238 238 # done. Break out, skipping the else block.
239 239 if r:
240 240 break
241 241
242 242 # connection is bad - possibly closed by server
243 243 # discard it and ask for the next free connection
244 244 h.close()
245 245 self._cm.remove(h)
246 246 h = self._cm.get_ready_conn(host)
247 247 else:
248 248 # no (working) free connections were found. Create a new one.
249 249 h = http_class(host)
250 250 if DEBUG:
251 251 DEBUG.info("creating new connection to %s (%d)",
252 252 host, id(h))
253 253 self._cm.add(host, h, 0)
254 254 self._start_transaction(h, req)
255 255 r = h.getresponse()
256 256 except (socket.error, httplib.HTTPException), err:
257 257 raise urllib2.URLError(err)
258 258
259 259 # if not a persistent connection, don't try to reuse it
260 260 if r.will_close:
261 261 self._cm.remove(h)
262 262
263 263 if DEBUG:
264 264 DEBUG.info("STATUS: %s, %s", r.status, r.reason)
265 265 r._handler = self
266 266 r._host = host
267 267 r._url = req.get_full_url()
268 268 r._connection = h
269 269 r.code = r.status
270 270 r.headers = r.msg
271 271 r.msg = r.reason
272 272
273 273 if r.status == 200 or not HANDLE_ERRORS:
274 274 return r
275 275 else:
276 276 return self.parent.error('http', req, r,
277 277 r.status, r.msg, r.headers)
278 278
279 279 def _reuse_connection(self, h, req, host):
280 280 """start the transaction with a re-used connection
281 281 return a response object (r) upon success or None on failure.
282 282 This DOES not close or remove bad connections in cases where
283 283 it returns. However, if an unexpected exception occurs, it
284 284 will close and remove the connection before re-raising.
285 285 """
286 286 try:
287 287 self._start_transaction(h, req)
288 288 r = h.getresponse()
289 289 # note: just because we got something back doesn't mean it
290 290 # worked. We'll check the version below, too.
291 291 except (socket.error, httplib.HTTPException):
292 292 r = None
293 293 except: # re-raises
294 294 # adding this block just in case we've missed
295 295 # something we will still raise the exception, but
296 296 # lets try and close the connection and remove it
297 297 # first. We previously got into a nasty loop
298 298 # where an exception was uncaught, and so the
299 299 # connection stayed open. On the next try, the
300 300 # same exception was raised, etc. The trade-off is
301 301 # that it's now possible this call will raise
302 302 # a DIFFERENT exception
303 303 if DEBUG:
304 304 DEBUG.error("unexpected exception - closing "
305 305 "connection to %s (%d)", host, id(h))
306 306 self._cm.remove(h)
307 307 h.close()
308 308 raise
309 309
310 310 if r is None or r.version == 9:
311 311 # httplib falls back to assuming HTTP 0.9 if it gets a
312 312 # bad header back. This is most likely to happen if
313 313 # the socket has been closed by the server since we
314 314 # last used the connection.
315 315 if DEBUG:
316 316 DEBUG.info("failed to re-use connection to %s (%d)",
317 317 host, id(h))
318 318 r = None
319 319 else:
320 320 if DEBUG:
321 321 DEBUG.info("re-using connection to %s (%d)", host, id(h))
322 322
323 323 return r
324 324
325 325 def _start_transaction(self, h, req):
326 326 # What follows mostly reimplements HTTPConnection.request()
327 327 # except it adds self.parent.addheaders in the mix.
328 328 headers = req.headers.copy()
329 329 if sys.version_info >= (2, 4):
330 330 headers.update(req.unredirected_hdrs)
331 331 headers.update(self.parent.addheaders)
332 332 headers = dict((n.lower(), v) for n, v in headers.items())
333 333 skipheaders = {}
334 334 for n in ('host', 'accept-encoding'):
335 335 if n in headers:
336 336 skipheaders['skip_' + n.replace('-', '_')] = 1
337 337 try:
338 338 if req.has_data():
339 339 data = req.get_data()
340 340 h.putrequest('POST', req.get_selector(), **skipheaders)
341 341 if 'content-type' not in headers:
342 342 h.putheader('Content-type',
343 343 'application/x-www-form-urlencoded')
344 344 if 'content-length' not in headers:
345 345 h.putheader('Content-length', '%d' % len(data))
346 346 else:
347 347 h.putrequest('GET', req.get_selector(), **skipheaders)
348 348 except (socket.error), err:
349 349 raise urllib2.URLError(err)
350 350 for k, v in headers.items():
351 351 h.putheader(k, v)
352 352 h.endheaders()
353 353 if req.has_data():
354 354 h.send(data)
355 355
356 356 class HTTPHandler(KeepAliveHandler, urllib2.HTTPHandler):
357 357 pass
358 358
359 359 class HTTPResponse(httplib.HTTPResponse):
360 360 # we need to subclass HTTPResponse in order to
361 361 # 1) add readline() and readlines() methods
362 362 # 2) add close_connection() methods
363 363 # 3) add info() and geturl() methods
364 364
365 365 # in order to add readline(), read must be modified to deal with a
366 366 # buffer. example: readline must read a buffer and then spit back
367 367 # one line at a time. The only real alternative is to read one
368 368 # BYTE at a time (ick). Once something has been read, it can't be
369 369 # put back (ok, maybe it can, but that's even uglier than this),
370 370 # so if you THEN do a normal read, you must first take stuff from
371 371 # the buffer.
372 372
373 373 # the read method wraps the original to accommodate buffering,
374 374 # although read() never adds to the buffer.
375 375 # Both readline and readlines have been stolen with almost no
376 376 # modification from socket.py
377 377
378 378
379 379 def __init__(self, sock, debuglevel=0, strict=0, method=None):
380 380 httplib.HTTPResponse.__init__(self, sock, debuglevel, method)
381 381 self.fileno = sock.fileno
382 382 self.code = None
383 383 self._rbuf = ''
384 384 self._rbufsize = 8096
385 385 self._handler = None # inserted by the handler later
386 386 self._host = None # (same)
387 387 self._url = None # (same)
388 388 self._connection = None # (same)
389 389
390 390 _raw_read = httplib.HTTPResponse.read
391 391
392 392 def close(self):
393 393 if self.fp:
394 394 self.fp.close()
395 395 self.fp = None
396 396 if self._handler:
397 397 self._handler._request_closed(self, self._host,
398 398 self._connection)
399 399
400 400 def close_connection(self):
401 401 self._handler._remove_connection(self._host, self._connection, close=1)
402 402 self.close()
403 403
404 404 def info(self):
405 405 return self.headers
406 406
407 407 def geturl(self):
408 408 return self._url
409 409
410 410 def read(self, amt=None):
411 411 # the _rbuf test is only in this first if for speed. It's not
412 412 # logically necessary
413 413 if self._rbuf and not amt is None:
414 414 L = len(self._rbuf)
415 415 if amt > L:
416 416 amt -= L
417 417 else:
418 418 s = self._rbuf[:amt]
419 419 self._rbuf = self._rbuf[amt:]
420 420 return s
421 421
422 422 s = self._rbuf + self._raw_read(amt)
423 423 self._rbuf = ''
424 424 return s
425 425
426 426 # stolen from Python SVN #68532 to fix issue1088
427 427 def _read_chunked(self, amt):
428 428 chunk_left = self.chunk_left
429 429 value = ''
430 430
431 431 # XXX This accumulates chunks by repeated string concatenation,
432 432 # which is not efficient as the number or size of chunks gets big.
433 433 while True:
434 434 if chunk_left is None:
435 435 line = self.fp.readline()
436 436 i = line.find(';')
437 437 if i >= 0:
438 438 line = line[:i] # strip chunk-extensions
439 439 try:
440 440 chunk_left = int(line, 16)
441 441 except ValueError:
442 442 # close the connection as protocol synchronization is
443 443 # probably lost
444 444 self.close()
445 445 raise httplib.IncompleteRead(value)
446 446 if chunk_left == 0:
447 447 break
448 448 if amt is None:
449 449 value += self._safe_read(chunk_left)
450 450 elif amt < chunk_left:
451 451 value += self._safe_read(amt)
452 452 self.chunk_left = chunk_left - amt
453 453 return value
454 454 elif amt == chunk_left:
455 455 value += self._safe_read(amt)
456 456 self._safe_read(2) # toss the CRLF at the end of the chunk
457 457 self.chunk_left = None
458 458 return value
459 459 else:
460 460 value += self._safe_read(chunk_left)
461 461 amt -= chunk_left
462 462
463 463 # we read the whole chunk, get another
464 464 self._safe_read(2) # toss the CRLF at the end of the chunk
465 465 chunk_left = None
466 466
467 467 # read and discard trailer up to the CRLF terminator
468 468 ### note: we shouldn't have any trailers!
469 469 while True:
470 470 line = self.fp.readline()
471 471 if not line:
472 472 # a vanishingly small number of sites EOF without
473 473 # sending the trailer
474 474 break
475 475 if line == '\r\n':
476 476 break
477 477
478 478 # we read everything; close the "file"
479 479 self.close()
480 480
481 481 return value
482 482
483 483 def readline(self, limit=-1):
484 484 i = self._rbuf.find('\n')
485 485 while i < 0 and not (0 < limit <= len(self._rbuf)):
486 486 new = self._raw_read(self._rbufsize)
487 487 if not new:
488 488 break
489 489 i = new.find('\n')
490 490 if i >= 0:
491 491 i = i + len(self._rbuf)
492 492 self._rbuf = self._rbuf + new
493 493 if i < 0:
494 494 i = len(self._rbuf)
495 495 else:
496 496 i = i + 1
497 497 if 0 <= limit < len(self._rbuf):
498 498 i = limit
499 499 data, self._rbuf = self._rbuf[:i], self._rbuf[i:]
500 500 return data
501 501
502 502 def readlines(self, sizehint=0):
503 503 total = 0
504 504 list = []
505 505 while True:
506 506 line = self.readline()
507 507 if not line:
508 508 break
509 509 list.append(line)
510 510 total += len(line)
511 511 if sizehint and total >= sizehint:
512 512 break
513 513 return list
514 514
515 515 def safesend(self, str):
516 516 """Send `str' to the server.
517 517
518 518 Shamelessly ripped off from httplib to patch a bad behavior.
519 519 """
520 520 # _broken_pipe_resp is an attribute we set in this function
521 521 # if the socket is closed while we're sending data but
522 522 # the server sent us a response before hanging up.
523 523 # In that case, we want to pretend to send the rest of the
524 524 # outgoing data, and then let the user use getresponse()
525 525 # (which we wrap) to get this last response before
526 526 # opening a new socket.
527 527 if getattr(self, '_broken_pipe_resp', None) is not None:
528 528 return
529 529
530 530 if self.sock is None:
531 531 if self.auto_open:
532 532 self.connect()
533 533 else:
534 534 raise httplib.NotConnected
535 535
536 536 # send the data to the server. if we get a broken pipe, then close
537 537 # the socket. we want to reconnect when somebody tries to send again.
538 538 #
539 539 # NOTE: we DO propagate the error, though, because we cannot simply
540 540 # ignore the error... the caller will know if they can retry.
541 541 if self.debuglevel > 0:
542 542 print "send:", repr(str)
543 543 try:
544 544 blocksize = 8192
545 545 read = getattr(str, 'read', None)
546 546 if read is not None:
547 547 if self.debuglevel > 0:
548 548 print "sending a read()able"
549 549 data = read(blocksize)
550 550 while data:
551 551 self.sock.sendall(data)
552 552 data = read(blocksize)
553 553 else:
554 554 self.sock.sendall(str)
555 555 except socket.error, v:
556 556 reraise = True
557 557 if v[0] == errno.EPIPE: # Broken pipe
558 558 if self._HTTPConnection__state == httplib._CS_REQ_SENT:
559 559 self._broken_pipe_resp = None
560 560 self._broken_pipe_resp = self.getresponse()
561 561 reraise = False
562 562 self.close()
563 563 if reraise:
564 564 raise
565 565
566 566 def wrapgetresponse(cls):
567 567 """Wraps getresponse in cls with a broken-pipe sane version.
568 568 """
569 569 def safegetresponse(self):
570 570 # In safesend() we might set the _broken_pipe_resp
571 571 # attribute, in which case the socket has already
572 572 # been closed and we just need to give them the response
573 573 # back. Otherwise, we use the normal response path.
574 574 r = getattr(self, '_broken_pipe_resp', None)
575 575 if r is not None:
576 576 return r
577 577 return cls.getresponse(self)
578 578 safegetresponse.__doc__ = cls.getresponse.__doc__
579 579 return safegetresponse
580 580
581 581 class HTTPConnection(httplib.HTTPConnection):
582 582 # use the modified response class
583 583 response_class = HTTPResponse
584 584 send = safesend
585 585 getresponse = wrapgetresponse(httplib.HTTPConnection)
586 586
587 587
588 588 #########################################################################
589 589 ##### TEST FUNCTIONS
590 590 #########################################################################
591 591
592 592 def error_handler(url):
593 593 global HANDLE_ERRORS
594 594 orig = HANDLE_ERRORS
595 595 keepalive_handler = HTTPHandler()
596 596 opener = urllib2.build_opener(keepalive_handler)
597 597 urllib2.install_opener(opener)
598 598 pos = {0: 'off', 1: 'on'}
599 599 for i in (0, 1):
600 600 print " fancy error handling %s (HANDLE_ERRORS = %i)" % (pos[i], i)
601 601 HANDLE_ERRORS = i
602 602 try:
603 603 fo = urllib2.urlopen(url)
604 604 fo.read()
605 605 fo.close()
606 606 try:
607 607 status, reason = fo.status, fo.reason
608 608 except AttributeError:
609 609 status, reason = None, None
610 610 except IOError, e:
611 611 print " EXCEPTION: %s" % e
612 612 raise
613 613 else:
614 614 print " status = %s, reason = %s" % (status, reason)
615 615 HANDLE_ERRORS = orig
616 616 hosts = keepalive_handler.open_connections()
617 617 print "open connections:", hosts
618 618 keepalive_handler.close_all()
619 619
620 620 def md5(s):
621 621 try:
622 622 from hashlib import md5 as _md5
623 623 except ImportError:
624 624 from md5 import md5 as _md5
625 625 global md5
626 626 md5 = _md5
627 627 return _md5(s)
628 628
629 629 def continuity(url):
630 630 format = '%25s: %s'
631 631
632 632 # first fetch the file with the normal http handler
633 633 opener = urllib2.build_opener()
634 634 urllib2.install_opener(opener)
635 635 fo = urllib2.urlopen(url)
636 636 foo = fo.read()
637 637 fo.close()
638 m = md5.new(foo)
638 m = md5(foo)
639 639 print format % ('normal urllib', m.hexdigest())
640 640
641 641 # now install the keepalive handler and try again
642 642 opener = urllib2.build_opener(HTTPHandler())
643 643 urllib2.install_opener(opener)
644 644
645 645 fo = urllib2.urlopen(url)
646 646 foo = fo.read()
647 647 fo.close()
648 m = md5.new(foo)
648 m = md5(foo)
649 649 print format % ('keepalive read', m.hexdigest())
650 650
651 651 fo = urllib2.urlopen(url)
652 652 foo = ''
653 653 while True:
654 654 f = fo.readline()
655 655 if f:
656 656 foo = foo + f
657 657 else: break
658 658 fo.close()
659 m = md5.new(foo)
659 m = md5(foo)
660 660 print format % ('keepalive readline', m.hexdigest())
661 661
662 662 def comp(N, url):
663 663 print ' making %i connections to:\n %s' % (N, url)
664 664
665 665 sys.stdout.write(' first using the normal urllib handlers')
666 666 # first use normal opener
667 667 opener = urllib2.build_opener()
668 668 urllib2.install_opener(opener)
669 669 t1 = fetch(N, url)
670 670 print ' TIME: %.3f s' % t1
671 671
672 672 sys.stdout.write(' now using the keepalive handler ')
673 673 # now install the keepalive handler and try again
674 674 opener = urllib2.build_opener(HTTPHandler())
675 675 urllib2.install_opener(opener)
676 676 t2 = fetch(N, url)
677 677 print ' TIME: %.3f s' % t2
678 678 print ' improvement factor: %.2f' % (t1 / t2)
679 679
680 680 def fetch(N, url, delay=0):
681 681 import time
682 682 lens = []
683 683 starttime = time.time()
684 684 for i in range(N):
685 685 if delay and i > 0:
686 686 time.sleep(delay)
687 687 fo = urllib2.urlopen(url)
688 688 foo = fo.read()
689 689 fo.close()
690 690 lens.append(len(foo))
691 691 diff = time.time() - starttime
692 692
693 693 j = 0
694 694 for i in lens[1:]:
695 695 j = j + 1
696 696 if not i == lens[0]:
697 697 print "WARNING: inconsistent length on read %i: %i" % (j, i)
698 698
699 699 return diff
700 700
701 701 def test_timeout(url):
702 702 global DEBUG
703 703 dbbackup = DEBUG
704 704 class FakeLogger(object):
705 705 def debug(self, msg, *args):
706 706 print msg % args
707 707 info = warning = error = debug
708 708 DEBUG = FakeLogger()
709 709 print " fetching the file to establish a connection"
710 710 fo = urllib2.urlopen(url)
711 711 data1 = fo.read()
712 712 fo.close()
713 713
714 714 i = 20
715 715 print " waiting %i seconds for the server to close the connection" % i
716 716 while i > 0:
717 717 sys.stdout.write('\r %2i' % i)
718 718 sys.stdout.flush()
719 719 time.sleep(1)
720 720 i -= 1
721 721 sys.stderr.write('\r')
722 722
723 723 print " fetching the file a second time"
724 724 fo = urllib2.urlopen(url)
725 725 data2 = fo.read()
726 726 fo.close()
727 727
728 728 if data1 == data2:
729 729 print ' data are identical'
730 730 else:
731 731 print ' ERROR: DATA DIFFER'
732 732
733 733 DEBUG = dbbackup
734 734
735 735
736 736 def test(url, N=10):
737 737 print "checking error handler (do this on a non-200)"
738 738 try: error_handler(url)
739 739 except IOError:
740 740 print "exiting - exception will prevent further tests"
741 741 sys.exit()
742 742 print
743 743 print "performing continuity test (making sure stuff isn't corrupted)"
744 744 continuity(url)
745 745 print
746 746 print "performing speed comparison"
747 747 comp(N, url)
748 748 print
749 749 print "performing dropped-connection check"
750 750 test_timeout(url)
751 751
752 752 if __name__ == '__main__':
753 753 import time
754 754 import sys
755 755 try:
756 756 N = int(sys.argv[1])
757 757 url = sys.argv[2]
758 758 except (IndexError, ValueError):
759 759 print "%s <integer> <url>" % sys.argv[0]
760 760 else:
761 761 test(url, N)
General Comments 0
You need to be logged in to leave comments. Login now