##// END OF EJS Templates
keepalive: use safehasattr instead of hasattr
Augie Fackler -
r14958:fd246aef default
parent child Browse files
Show More
@@ -1,765 +1,766 b''
1 1 # This library is free software; you can redistribute it and/or
2 2 # modify it under the terms of the GNU Lesser General Public
3 3 # License as published by the Free Software Foundation; either
4 4 # version 2.1 of the License, or (at your option) any later version.
5 5 #
6 6 # This library is distributed in the hope that it will be useful,
7 7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9 9 # Lesser General Public License for more details.
10 10 #
11 11 # You should have received a copy of the GNU Lesser General Public
12 12 # License along with this library; if not, write to the
13 13 # Free Software Foundation, Inc.,
14 14 # 59 Temple Place, Suite 330,
15 15 # Boston, MA 02111-1307 USA
16 16
17 17 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
18 18 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
19 19
20 20 # Modified by Benoit Boissinot:
21 21 # - fix for digest auth (inspired from urllib2.py @ Python v2.4)
22 22 # Modified by Dirkjan Ochtman:
23 23 # - import md5 function from a local util module
24 24 # Modified by Martin Geisler:
25 25 # - moved md5 function from local util module to this module
26 26 # Modified by Augie Fackler:
27 27 # - add safesend method and use it to prevent broken pipe errors
28 28 # on large POST requests
29 29
30 30 """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
31 31
32 32 >>> import urllib2
33 33 >>> from keepalive import HTTPHandler
34 34 >>> keepalive_handler = HTTPHandler()
35 35 >>> opener = urllib2.build_opener(keepalive_handler)
36 36 >>> urllib2.install_opener(opener)
37 37 >>>
38 38 >>> fo = urllib2.urlopen('http://www.python.org')
39 39
40 40 If a connection to a given host is requested, and all of the existing
41 41 connections are still in use, another connection will be opened. If
42 42 the handler tries to use an existing connection but it fails in some
43 43 way, it will be closed and removed from the pool.
44 44
45 45 To remove the handler, simply re-run build_opener with no arguments, and
46 46 install that opener.
47 47
48 48 You can explicitly close connections by using the close_connection()
49 49 method of the returned file-like object (described below) or you can
50 50 use the handler methods:
51 51
52 52 close_connection(host)
53 53 close_all()
54 54 open_connections()
55 55
56 56 NOTE: using the close_connection and close_all methods of the handler
57 57 should be done with care when using multiple threads.
58 58 * there is nothing that prevents another thread from creating new
59 59 connections immediately after connections are closed
60 60 * no checks are done to prevent in-use connections from being closed
61 61
62 62 >>> keepalive_handler.close_all()
63 63
64 64 EXTRA ATTRIBUTES AND METHODS
65 65
66 66 Upon a status of 200, the object returned has a few additional
67 67 attributes and methods, which should not be used if you want to
68 68 remain consistent with the normal urllib2-returned objects:
69 69
70 70 close_connection() - close the connection to the host
71 71 readlines() - you know, readlines()
72 72 status - the return status (ie 404)
73 73 reason - english translation of status (ie 'File not found')
74 74
75 75 If you want the best of both worlds, use this inside an
76 76 AttributeError-catching try:
77 77
78 78 >>> try: status = fo.status
79 79 >>> except AttributeError: status = None
80 80
81 81 Unfortunately, these are ONLY there if status == 200, so it's not
82 82 easy to distinguish between non-200 responses. The reason is that
83 83 urllib2 tries to do clever things with error codes 301, 302, 401,
84 84 and 407, and it wraps the object upon return.
85 85
86 86 For python versions earlier than 2.4, you can avoid this fancy error
87 87 handling by setting the module-level global HANDLE_ERRORS to zero.
88 88 You see, prior to 2.4, it's the HTTP Handler's job to determine what
89 89 to handle specially, and what to just pass up. HANDLE_ERRORS == 0
90 90 means "pass everything up". In python 2.4, however, this job no
91 91 longer belongs to the HTTP Handler and is now done by a NEW handler,
92 92 HTTPErrorProcessor. Here's the bottom line:
93 93
94 94 python version < 2.4
95 95 HANDLE_ERRORS == 1 (default) pass up 200, treat the rest as
96 96 errors
97 97 HANDLE_ERRORS == 0 pass everything up, error processing is
98 98 left to the calling code
99 99 python version >= 2.4
100 100 HANDLE_ERRORS == 1 pass up 200, treat the rest as errors
101 101 HANDLE_ERRORS == 0 (default) pass everything up, let the
102 102 other handlers (specifically,
103 103 HTTPErrorProcessor) decide what to do
104 104
105 105 In practice, setting the variable either way makes little difference
106 106 in python 2.4, so for the most consistent behavior across versions,
107 107 you probably just want to use the defaults, which will give you
108 108 exceptions on errors.
109 109
110 110 """
111 111
112 112 # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $
113 113
114 114 import errno
115 115 import httplib
116 116 import socket
117 117 import thread
118 118 import urllib2
119 119
120 120 DEBUG = None
121 121
122 122 import sys
123 123 if sys.version_info < (2, 4):
124 124 HANDLE_ERRORS = 1
125 125 else: HANDLE_ERRORS = 0
126 126
127 127 class ConnectionManager(object):
128 128 """
129 129 The connection manager must be able to:
130 130 * keep track of all existing
131 131 """
132 132 def __init__(self):
133 133 self._lock = thread.allocate_lock()
134 134 self._hostmap = {} # map hosts to a list of connections
135 135 self._connmap = {} # map connections to host
136 136 self._readymap = {} # map connection to ready state
137 137
138 138 def add(self, host, connection, ready):
139 139 self._lock.acquire()
140 140 try:
141 141 if not host in self._hostmap:
142 142 self._hostmap[host] = []
143 143 self._hostmap[host].append(connection)
144 144 self._connmap[connection] = host
145 145 self._readymap[connection] = ready
146 146 finally:
147 147 self._lock.release()
148 148
149 149 def remove(self, connection):
150 150 self._lock.acquire()
151 151 try:
152 152 try:
153 153 host = self._connmap[connection]
154 154 except KeyError:
155 155 pass
156 156 else:
157 157 del self._connmap[connection]
158 158 del self._readymap[connection]
159 159 self._hostmap[host].remove(connection)
160 160 if not self._hostmap[host]: del self._hostmap[host]
161 161 finally:
162 162 self._lock.release()
163 163
164 164 def set_ready(self, connection, ready):
165 165 try:
166 166 self._readymap[connection] = ready
167 167 except KeyError:
168 168 pass
169 169
170 170 def get_ready_conn(self, host):
171 171 conn = None
172 172 self._lock.acquire()
173 173 try:
174 174 if host in self._hostmap:
175 175 for c in self._hostmap[host]:
176 176 if self._readymap[c]:
177 177 self._readymap[c] = 0
178 178 conn = c
179 179 break
180 180 finally:
181 181 self._lock.release()
182 182 return conn
183 183
184 184 def get_all(self, host=None):
185 185 if host:
186 186 return list(self._hostmap.get(host, []))
187 187 else:
188 188 return dict(self._hostmap)
189 189
190 190 class KeepAliveHandler(object):
191 191 def __init__(self):
192 192 self._cm = ConnectionManager()
193 193
194 194 #### Connection Management
195 195 def open_connections(self):
196 196 """return a list of connected hosts and the number of connections
197 197 to each. [('foo.com:80', 2), ('bar.org', 1)]"""
198 198 return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
199 199
200 200 def close_connection(self, host):
201 201 """close connection(s) to <host>
202 202 host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
203 203 no error occurs if there is no connection to that host."""
204 204 for h in self._cm.get_all(host):
205 205 self._cm.remove(h)
206 206 h.close()
207 207
208 208 def close_all(self):
209 209 """close all open connections"""
210 210 for host, conns in self._cm.get_all().iteritems():
211 211 for h in conns:
212 212 self._cm.remove(h)
213 213 h.close()
214 214
215 215 def _request_closed(self, request, host, connection):
216 216 """tells us that this request is now closed and the the
217 217 connection is ready for another request"""
218 218 self._cm.set_ready(connection, 1)
219 219
220 220 def _remove_connection(self, host, connection, close=0):
221 221 if close:
222 222 connection.close()
223 223 self._cm.remove(connection)
224 224
225 225 #### Transaction Execution
226 226 def http_open(self, req):
227 227 return self.do_open(HTTPConnection, req)
228 228
229 229 def do_open(self, http_class, req):
230 230 host = req.get_host()
231 231 if not host:
232 232 raise urllib2.URLError('no host given')
233 233
234 234 try:
235 235 h = self._cm.get_ready_conn(host)
236 236 while h:
237 237 r = self._reuse_connection(h, req, host)
238 238
239 239 # if this response is non-None, then it worked and we're
240 240 # done. Break out, skipping the else block.
241 241 if r:
242 242 break
243 243
244 244 # connection is bad - possibly closed by server
245 245 # discard it and ask for the next free connection
246 246 h.close()
247 247 self._cm.remove(h)
248 248 h = self._cm.get_ready_conn(host)
249 249 else:
250 250 # no (working) free connections were found. Create a new one.
251 251 h = http_class(host)
252 252 if DEBUG:
253 253 DEBUG.info("creating new connection to %s (%d)",
254 254 host, id(h))
255 255 self._cm.add(host, h, 0)
256 256 self._start_transaction(h, req)
257 257 r = h.getresponse()
258 258 except (socket.error, httplib.HTTPException), err:
259 259 raise urllib2.URLError(err)
260 260
261 261 # if not a persistent connection, don't try to reuse it
262 262 if r.will_close:
263 263 self._cm.remove(h)
264 264
265 265 if DEBUG:
266 266 DEBUG.info("STATUS: %s, %s", r.status, r.reason)
267 267 r._handler = self
268 268 r._host = host
269 269 r._url = req.get_full_url()
270 270 r._connection = h
271 271 r.code = r.status
272 272 r.headers = r.msg
273 273 r.msg = r.reason
274 274
275 275 if r.status == 200 or not HANDLE_ERRORS:
276 276 return r
277 277 else:
278 278 return self.parent.error('http', req, r,
279 279 r.status, r.msg, r.headers)
280 280
281 281 def _reuse_connection(self, h, req, host):
282 282 """start the transaction with a re-used connection
283 283 return a response object (r) upon success or None on failure.
284 284 This DOES not close or remove bad connections in cases where
285 285 it returns. However, if an unexpected exception occurs, it
286 286 will close and remove the connection before re-raising.
287 287 """
288 288 try:
289 289 self._start_transaction(h, req)
290 290 r = h.getresponse()
291 291 # note: just because we got something back doesn't mean it
292 292 # worked. We'll check the version below, too.
293 293 except (socket.error, httplib.HTTPException):
294 294 r = None
295 295 except:
296 296 # adding this block just in case we've missed
297 297 # something we will still raise the exception, but
298 298 # lets try and close the connection and remove it
299 299 # first. We previously got into a nasty loop
300 300 # where an exception was uncaught, and so the
301 301 # connection stayed open. On the next try, the
302 302 # same exception was raised, etc. The tradeoff is
303 303 # that it's now possible this call will raise
304 304 # a DIFFERENT exception
305 305 if DEBUG:
306 306 DEBUG.error("unexpected exception - closing "
307 307 "connection to %s (%d)", host, id(h))
308 308 self._cm.remove(h)
309 309 h.close()
310 310 raise
311 311
312 312 if r is None or r.version == 9:
313 313 # httplib falls back to assuming HTTP 0.9 if it gets a
314 314 # bad header back. This is most likely to happen if
315 315 # the socket has been closed by the server since we
316 316 # last used the connection.
317 317 if DEBUG:
318 318 DEBUG.info("failed to re-use connection to %s (%d)",
319 319 host, id(h))
320 320 r = None
321 321 else:
322 322 if DEBUG:
323 323 DEBUG.info("re-using connection to %s (%d)", host, id(h))
324 324
325 325 return r
326 326
327 327 def _start_transaction(self, h, req):
328 328 # What follows mostly reimplements HTTPConnection.request()
329 329 # except it adds self.parent.addheaders in the mix.
330 330 headers = req.headers.copy()
331 331 if sys.version_info >= (2, 4):
332 332 headers.update(req.unredirected_hdrs)
333 333 headers.update(self.parent.addheaders)
334 334 headers = dict((n.lower(), v) for n, v in headers.items())
335 335 skipheaders = {}
336 336 for n in ('host', 'accept-encoding'):
337 337 if n in headers:
338 338 skipheaders['skip_' + n.replace('-', '_')] = 1
339 339 try:
340 340 if req.has_data():
341 341 data = req.get_data()
342 342 h.putrequest('POST', req.get_selector(), **skipheaders)
343 343 if 'content-type' not in headers:
344 344 h.putheader('Content-type',
345 345 'application/x-www-form-urlencoded')
346 346 if 'content-length' not in headers:
347 347 h.putheader('Content-length', '%d' % len(data))
348 348 else:
349 349 h.putrequest('GET', req.get_selector(), **skipheaders)
350 350 except (socket.error), err:
351 351 raise urllib2.URLError(err)
352 352 for k, v in headers.items():
353 353 h.putheader(k, v)
354 354 h.endheaders()
355 355 if req.has_data():
356 356 h.send(data)
357 357
358 358 class HTTPHandler(KeepAliveHandler, urllib2.HTTPHandler):
359 359 pass
360 360
361 361 class HTTPResponse(httplib.HTTPResponse):
362 362 # we need to subclass HTTPResponse in order to
363 363 # 1) add readline() and readlines() methods
364 364 # 2) add close_connection() methods
365 365 # 3) add info() and geturl() methods
366 366
367 367 # in order to add readline(), read must be modified to deal with a
368 368 # buffer. example: readline must read a buffer and then spit back
369 369 # one line at a time. The only real alternative is to read one
370 370 # BYTE at a time (ick). Once something has been read, it can't be
371 371 # put back (ok, maybe it can, but that's even uglier than this),
372 372 # so if you THEN do a normal read, you must first take stuff from
373 373 # the buffer.
374 374
375 375 # the read method wraps the original to accomodate buffering,
376 376 # although read() never adds to the buffer.
377 377 # Both readline and readlines have been stolen with almost no
378 378 # modification from socket.py
379 379
380 380
381 381 def __init__(self, sock, debuglevel=0, strict=0, method=None):
382 382 if method: # the httplib in python 2.3 uses the method arg
383 383 httplib.HTTPResponse.__init__(self, sock, debuglevel, method)
384 384 else: # 2.2 doesn't
385 385 httplib.HTTPResponse.__init__(self, sock, debuglevel)
386 386 self.fileno = sock.fileno
387 387 self.code = None
388 388 self._rbuf = ''
389 389 self._rbufsize = 8096
390 390 self._handler = None # inserted by the handler later
391 391 self._host = None # (same)
392 392 self._url = None # (same)
393 393 self._connection = None # (same)
394 394
395 395 _raw_read = httplib.HTTPResponse.read
396 396
397 397 def close(self):
398 398 if self.fp:
399 399 self.fp.close()
400 400 self.fp = None
401 401 if self._handler:
402 402 self._handler._request_closed(self, self._host,
403 403 self._connection)
404 404
405 405 def close_connection(self):
406 406 self._handler._remove_connection(self._host, self._connection, close=1)
407 407 self.close()
408 408
409 409 def info(self):
410 410 return self.headers
411 411
412 412 def geturl(self):
413 413 return self._url
414 414
415 415 def read(self, amt=None):
416 416 # the _rbuf test is only in this first if for speed. It's not
417 417 # logically necessary
418 418 if self._rbuf and not amt is None:
419 419 L = len(self._rbuf)
420 420 if amt > L:
421 421 amt -= L
422 422 else:
423 423 s = self._rbuf[:amt]
424 424 self._rbuf = self._rbuf[amt:]
425 425 return s
426 426
427 427 s = self._rbuf + self._raw_read(amt)
428 428 self._rbuf = ''
429 429 return s
430 430
431 431 # stolen from Python SVN #68532 to fix issue1088
432 432 def _read_chunked(self, amt):
433 433 chunk_left = self.chunk_left
434 434 value = ''
435 435
436 436 # XXX This accumulates chunks by repeated string concatenation,
437 437 # which is not efficient as the number or size of chunks gets big.
438 438 while True:
439 439 if chunk_left is None:
440 440 line = self.fp.readline()
441 441 i = line.find(';')
442 442 if i >= 0:
443 443 line = line[:i] # strip chunk-extensions
444 444 try:
445 445 chunk_left = int(line, 16)
446 446 except ValueError:
447 447 # close the connection as protocol synchronisation is
448 448 # probably lost
449 449 self.close()
450 450 raise httplib.IncompleteRead(value)
451 451 if chunk_left == 0:
452 452 break
453 453 if amt is None:
454 454 value += self._safe_read(chunk_left)
455 455 elif amt < chunk_left:
456 456 value += self._safe_read(amt)
457 457 self.chunk_left = chunk_left - amt
458 458 return value
459 459 elif amt == chunk_left:
460 460 value += self._safe_read(amt)
461 461 self._safe_read(2) # toss the CRLF at the end of the chunk
462 462 self.chunk_left = None
463 463 return value
464 464 else:
465 465 value += self._safe_read(chunk_left)
466 466 amt -= chunk_left
467 467
468 468 # we read the whole chunk, get another
469 469 self._safe_read(2) # toss the CRLF at the end of the chunk
470 470 chunk_left = None
471 471
472 472 # read and discard trailer up to the CRLF terminator
473 473 ### note: we shouldn't have any trailers!
474 474 while True:
475 475 line = self.fp.readline()
476 476 if not line:
477 477 # a vanishingly small number of sites EOF without
478 478 # sending the trailer
479 479 break
480 480 if line == '\r\n':
481 481 break
482 482
483 483 # we read everything; close the "file"
484 484 self.close()
485 485
486 486 return value
487 487
488 488 def readline(self, limit=-1):
489 489 i = self._rbuf.find('\n')
490 490 while i < 0 and not (0 < limit <= len(self._rbuf)):
491 491 new = self._raw_read(self._rbufsize)
492 492 if not new:
493 493 break
494 494 i = new.find('\n')
495 495 if i >= 0:
496 496 i = i + len(self._rbuf)
497 497 self._rbuf = self._rbuf + new
498 498 if i < 0:
499 499 i = len(self._rbuf)
500 500 else:
501 501 i = i + 1
502 502 if 0 <= limit < len(self._rbuf):
503 503 i = limit
504 504 data, self._rbuf = self._rbuf[:i], self._rbuf[i:]
505 505 return data
506 506
507 507 def readlines(self, sizehint = 0):
508 508 total = 0
509 509 list = []
510 510 while True:
511 511 line = self.readline()
512 512 if not line:
513 513 break
514 514 list.append(line)
515 515 total += len(line)
516 516 if sizehint and total >= sizehint:
517 517 break
518 518 return list
519 519
520 520 def safesend(self, str):
521 521 """Send `str' to the server.
522 522
523 523 Shamelessly ripped off from httplib to patch a bad behavior.
524 524 """
525 525 # _broken_pipe_resp is an attribute we set in this function
526 526 # if the socket is closed while we're sending data but
527 527 # the server sent us a response before hanging up.
528 528 # In that case, we want to pretend to send the rest of the
529 529 # outgoing data, and then let the user use getresponse()
530 530 # (which we wrap) to get this last response before
531 531 # opening a new socket.
532 532 if getattr(self, '_broken_pipe_resp', None) is not None:
533 533 return
534 534
535 535 if self.sock is None:
536 536 if self.auto_open:
537 537 self.connect()
538 538 else:
539 539 raise httplib.NotConnected()
540 540
541 541 # send the data to the server. if we get a broken pipe, then close
542 542 # the socket. we want to reconnect when somebody tries to send again.
543 543 #
544 544 # NOTE: we DO propagate the error, though, because we cannot simply
545 545 # ignore the error... the caller will know if they can retry.
546 546 if self.debuglevel > 0:
547 547 print "send:", repr(str)
548 548 try:
549 549 blocksize = 8192
550 if hasattr(str,'read') :
550 read = getattr(str, 'read', None)
551 if read is not None:
551 552 if self.debuglevel > 0:
552 553 print "sendIng a read()able"
553 data = str.read(blocksize)
554 data = read(blocksize)
554 555 while data:
555 556 self.sock.sendall(data)
556 data = str.read(blocksize)
557 data = read(blocksize)
557 558 else:
558 559 self.sock.sendall(str)
559 560 except socket.error, v:
560 561 reraise = True
561 562 if v[0] == errno.EPIPE: # Broken pipe
562 563 if self._HTTPConnection__state == httplib._CS_REQ_SENT:
563 564 self._broken_pipe_resp = None
564 565 self._broken_pipe_resp = self.getresponse()
565 566 reraise = False
566 567 self.close()
567 568 if reraise:
568 569 raise
569 570
570 571 def wrapgetresponse(cls):
571 572 """Wraps getresponse in cls with a broken-pipe sane version.
572 573 """
573 574 def safegetresponse(self):
574 575 # In safesend() we might set the _broken_pipe_resp
575 576 # attribute, in which case the socket has already
576 577 # been closed and we just need to give them the response
577 578 # back. Otherwise, we use the normal response path.
578 579 r = getattr(self, '_broken_pipe_resp', None)
579 580 if r is not None:
580 581 return r
581 582 return cls.getresponse(self)
582 583 safegetresponse.__doc__ = cls.getresponse.__doc__
583 584 return safegetresponse
584 585
585 586 class HTTPConnection(httplib.HTTPConnection):
586 587 # use the modified response class
587 588 response_class = HTTPResponse
588 589 send = safesend
589 590 getresponse = wrapgetresponse(httplib.HTTPConnection)
590 591
591 592
592 593 #########################################################################
593 594 ##### TEST FUNCTIONS
594 595 #########################################################################
595 596
596 597 def error_handler(url):
597 598 global HANDLE_ERRORS
598 599 orig = HANDLE_ERRORS
599 600 keepalive_handler = HTTPHandler()
600 601 opener = urllib2.build_opener(keepalive_handler)
601 602 urllib2.install_opener(opener)
602 603 pos = {0: 'off', 1: 'on'}
603 604 for i in (0, 1):
604 605 print " fancy error handling %s (HANDLE_ERRORS = %i)" % (pos[i], i)
605 606 HANDLE_ERRORS = i
606 607 try:
607 608 fo = urllib2.urlopen(url)
608 609 fo.read()
609 610 fo.close()
610 611 try:
611 612 status, reason = fo.status, fo.reason
612 613 except AttributeError:
613 614 status, reason = None, None
614 615 except IOError, e:
615 616 print " EXCEPTION: %s" % e
616 617 raise
617 618 else:
618 619 print " status = %s, reason = %s" % (status, reason)
619 620 HANDLE_ERRORS = orig
620 621 hosts = keepalive_handler.open_connections()
621 622 print "open connections:", hosts
622 623 keepalive_handler.close_all()
623 624
624 625 def md5(s):
625 626 try:
626 627 from hashlib import md5 as _md5
627 628 except ImportError:
628 629 from md5 import md5 as _md5
629 630 global md5
630 631 md5 = _md5
631 632 return _md5(s)
632 633
633 634 def continuity(url):
634 635 format = '%25s: %s'
635 636
636 637 # first fetch the file with the normal http handler
637 638 opener = urllib2.build_opener()
638 639 urllib2.install_opener(opener)
639 640 fo = urllib2.urlopen(url)
640 641 foo = fo.read()
641 642 fo.close()
642 643 m = md5.new(foo)
643 644 print format % ('normal urllib', m.hexdigest())
644 645
645 646 # now install the keepalive handler and try again
646 647 opener = urllib2.build_opener(HTTPHandler())
647 648 urllib2.install_opener(opener)
648 649
649 650 fo = urllib2.urlopen(url)
650 651 foo = fo.read()
651 652 fo.close()
652 653 m = md5.new(foo)
653 654 print format % ('keepalive read', m.hexdigest())
654 655
655 656 fo = urllib2.urlopen(url)
656 657 foo = ''
657 658 while True:
658 659 f = fo.readline()
659 660 if f:
660 661 foo = foo + f
661 662 else: break
662 663 fo.close()
663 664 m = md5.new(foo)
664 665 print format % ('keepalive readline', m.hexdigest())
665 666
666 667 def comp(N, url):
667 668 print ' making %i connections to:\n %s' % (N, url)
668 669
669 670 sys.stdout.write(' first using the normal urllib handlers')
670 671 # first use normal opener
671 672 opener = urllib2.build_opener()
672 673 urllib2.install_opener(opener)
673 674 t1 = fetch(N, url)
674 675 print ' TIME: %.3f s' % t1
675 676
676 677 sys.stdout.write(' now using the keepalive handler ')
677 678 # now install the keepalive handler and try again
678 679 opener = urllib2.build_opener(HTTPHandler())
679 680 urllib2.install_opener(opener)
680 681 t2 = fetch(N, url)
681 682 print ' TIME: %.3f s' % t2
682 683 print ' improvement factor: %.2f' % (t1 / t2)
683 684
684 685 def fetch(N, url, delay=0):
685 686 import time
686 687 lens = []
687 688 starttime = time.time()
688 689 for i in range(N):
689 690 if delay and i > 0:
690 691 time.sleep(delay)
691 692 fo = urllib2.urlopen(url)
692 693 foo = fo.read()
693 694 fo.close()
694 695 lens.append(len(foo))
695 696 diff = time.time() - starttime
696 697
697 698 j = 0
698 699 for i in lens[1:]:
699 700 j = j + 1
700 701 if not i == lens[0]:
701 702 print "WARNING: inconsistent length on read %i: %i" % (j, i)
702 703
703 704 return diff
704 705
705 706 def test_timeout(url):
706 707 global DEBUG
707 708 dbbackup = DEBUG
708 709 class FakeLogger(object):
709 710 def debug(self, msg, *args):
710 711 print msg % args
711 712 info = warning = error = debug
712 713 DEBUG = FakeLogger()
713 714 print " fetching the file to establish a connection"
714 715 fo = urllib2.urlopen(url)
715 716 data1 = fo.read()
716 717 fo.close()
717 718
718 719 i = 20
719 720 print " waiting %i seconds for the server to close the connection" % i
720 721 while i > 0:
721 722 sys.stdout.write('\r %2i' % i)
722 723 sys.stdout.flush()
723 724 time.sleep(1)
724 725 i -= 1
725 726 sys.stderr.write('\r')
726 727
727 728 print " fetching the file a second time"
728 729 fo = urllib2.urlopen(url)
729 730 data2 = fo.read()
730 731 fo.close()
731 732
732 733 if data1 == data2:
733 734 print ' data are identical'
734 735 else:
735 736 print ' ERROR: DATA DIFFER'
736 737
737 738 DEBUG = dbbackup
738 739
739 740
740 741 def test(url, N=10):
741 742 print "checking error hander (do this on a non-200)"
742 743 try: error_handler(url)
743 744 except IOError:
744 745 print "exiting - exception will prevent further tests"
745 746 sys.exit()
746 747 print
747 748 print "performing continuity test (making sure stuff isn't corrupted)"
748 749 continuity(url)
749 750 print
750 751 print "performing speed comparison"
751 752 comp(N, url)
752 753 print
753 754 print "performing dropped-connection check"
754 755 test_timeout(url)
755 756
756 757 if __name__ == '__main__':
757 758 import time
758 759 import sys
759 760 try:
760 761 N = int(sys.argv[1])
761 762 url = sys.argv[2]
762 763 except:
763 764 print "%s <integer> <url>" % sys.argv[0]
764 765 else:
765 766 test(url, N)
General Comments 0
You need to be logged in to leave comments. Login now