##// END OF EJS Templates
keepalive: drop python 2.2 legacy code
Bryan O'Sullivan -
r17700:5b1b0e4e default
parent child Browse files
Show More
@@ -1,764 +1,761 b''
1 1 # This library is free software; you can redistribute it and/or
2 2 # modify it under the terms of the GNU Lesser General Public
3 3 # License as published by the Free Software Foundation; either
4 4 # version 2.1 of the License, or (at your option) any later version.
5 5 #
6 6 # This library is distributed in the hope that it will be useful,
7 7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9 9 # Lesser General Public License for more details.
10 10 #
11 11 # You should have received a copy of the GNU Lesser General Public
12 12 # License along with this library; if not, see
13 13 # <http://www.gnu.org/licenses/>.
14 14
15 15 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
16 16 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
17 17
18 18 # Modified by Benoit Boissinot:
19 19 # - fix for digest auth (inspired from urllib2.py @ Python v2.4)
20 20 # Modified by Dirkjan Ochtman:
21 21 # - import md5 function from a local util module
22 22 # Modified by Martin Geisler:
23 23 # - moved md5 function from local util module to this module
24 24 # Modified by Augie Fackler:
25 25 # - add safesend method and use it to prevent broken pipe errors
26 26 # on large POST requests
27 27
28 28 """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
29 29
30 30 >>> import urllib2
31 31 >>> from keepalive import HTTPHandler
32 32 >>> keepalive_handler = HTTPHandler()
33 33 >>> opener = urllib2.build_opener(keepalive_handler)
34 34 >>> urllib2.install_opener(opener)
35 35 >>>
36 36 >>> fo = urllib2.urlopen('http://www.python.org')
37 37
38 38 If a connection to a given host is requested, and all of the existing
39 39 connections are still in use, another connection will be opened. If
40 40 the handler tries to use an existing connection but it fails in some
41 41 way, it will be closed and removed from the pool.
42 42
43 43 To remove the handler, simply re-run build_opener with no arguments, and
44 44 install that opener.
45 45
46 46 You can explicitly close connections by using the close_connection()
47 47 method of the returned file-like object (described below) or you can
48 48 use the handler methods:
49 49
50 50 close_connection(host)
51 51 close_all()
52 52 open_connections()
53 53
54 54 NOTE: using the close_connection and close_all methods of the handler
55 55 should be done with care when using multiple threads.
56 56 * there is nothing that prevents another thread from creating new
57 57 connections immediately after connections are closed
58 58 * no checks are done to prevent in-use connections from being closed
59 59
60 60 >>> keepalive_handler.close_all()
61 61
62 62 EXTRA ATTRIBUTES AND METHODS
63 63
64 64 Upon a status of 200, the object returned has a few additional
65 65 attributes and methods, which should not be used if you want to
66 66 remain consistent with the normal urllib2-returned objects:
67 67
68 68 close_connection() - close the connection to the host
69 69 readlines() - you know, readlines()
70 70 status - the return status (i.e. 404)
71 71 reason - english translation of status (i.e. 'File not found')
72 72
73 73 If you want the best of both worlds, use this inside an
74 74 AttributeError-catching try:
75 75
76 76 >>> try: status = fo.status
77 77 >>> except AttributeError: status = None
78 78
79 79 Unfortunately, these are ONLY there if status == 200, so it's not
80 80 easy to distinguish between non-200 responses. The reason is that
81 81 urllib2 tries to do clever things with error codes 301, 302, 401,
82 82 and 407, and it wraps the object upon return.
83 83
84 84 For python versions earlier than 2.4, you can avoid this fancy error
85 85 handling by setting the module-level global HANDLE_ERRORS to zero.
86 86 You see, prior to 2.4, it's the HTTP Handler's job to determine what
87 87 to handle specially, and what to just pass up. HANDLE_ERRORS == 0
88 88 means "pass everything up". In python 2.4, however, this job no
89 89 longer belongs to the HTTP Handler and is now done by a NEW handler,
90 90 HTTPErrorProcessor. Here's the bottom line:
91 91
92 92 python version < 2.4
93 93 HANDLE_ERRORS == 1 (default) pass up 200, treat the rest as
94 94 errors
95 95 HANDLE_ERRORS == 0 pass everything up, error processing is
96 96 left to the calling code
97 97 python version >= 2.4
98 98 HANDLE_ERRORS == 1 pass up 200, treat the rest as errors
99 99 HANDLE_ERRORS == 0 (default) pass everything up, let the
100 100 other handlers (specifically,
101 101 HTTPErrorProcessor) decide what to do
102 102
103 103 In practice, setting the variable either way makes little difference
104 104 in python 2.4, so for the most consistent behavior across versions,
105 105 you probably just want to use the defaults, which will give you
106 106 exceptions on errors.
107 107
108 108 """
109 109
110 110 # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $
111 111
112 112 import errno
113 113 import httplib
114 114 import socket
115 115 import thread
116 116 import urllib2
117 117
118 118 DEBUG = None
119 119
120 120 import sys
121 121 if sys.version_info < (2, 4):
122 122 HANDLE_ERRORS = 1
123 123 else: HANDLE_ERRORS = 0
124 124
125 125 class ConnectionManager(object):
126 126 """
127 127 The connection manager must be able to:
128 128 * keep track of all existing
129 129 """
130 130 def __init__(self):
131 131 self._lock = thread.allocate_lock()
132 132 self._hostmap = {} # map hosts to a list of connections
133 133 self._connmap = {} # map connections to host
134 134 self._readymap = {} # map connection to ready state
135 135
136 136 def add(self, host, connection, ready):
137 137 self._lock.acquire()
138 138 try:
139 139 if host not in self._hostmap:
140 140 self._hostmap[host] = []
141 141 self._hostmap[host].append(connection)
142 142 self._connmap[connection] = host
143 143 self._readymap[connection] = ready
144 144 finally:
145 145 self._lock.release()
146 146
147 147 def remove(self, connection):
148 148 self._lock.acquire()
149 149 try:
150 150 try:
151 151 host = self._connmap[connection]
152 152 except KeyError:
153 153 pass
154 154 else:
155 155 del self._connmap[connection]
156 156 del self._readymap[connection]
157 157 self._hostmap[host].remove(connection)
158 158 if not self._hostmap[host]: del self._hostmap[host]
159 159 finally:
160 160 self._lock.release()
161 161
162 162 def set_ready(self, connection, ready):
163 163 try:
164 164 self._readymap[connection] = ready
165 165 except KeyError:
166 166 pass
167 167
168 168 def get_ready_conn(self, host):
169 169 conn = None
170 170 self._lock.acquire()
171 171 try:
172 172 if host in self._hostmap:
173 173 for c in self._hostmap[host]:
174 174 if self._readymap[c]:
175 175 self._readymap[c] = 0
176 176 conn = c
177 177 break
178 178 finally:
179 179 self._lock.release()
180 180 return conn
181 181
182 182 def get_all(self, host=None):
183 183 if host:
184 184 return list(self._hostmap.get(host, []))
185 185 else:
186 186 return dict(self._hostmap)
187 187
188 188 class KeepAliveHandler(object):
189 189 def __init__(self):
190 190 self._cm = ConnectionManager()
191 191
192 192 #### Connection Management
193 193 def open_connections(self):
194 194 """return a list of connected hosts and the number of connections
195 195 to each. [('foo.com:80', 2), ('bar.org', 1)]"""
196 196 return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
197 197
198 198 def close_connection(self, host):
199 199 """close connection(s) to <host>
200 200 host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
201 201 no error occurs if there is no connection to that host."""
202 202 for h in self._cm.get_all(host):
203 203 self._cm.remove(h)
204 204 h.close()
205 205
206 206 def close_all(self):
207 207 """close all open connections"""
208 208 for host, conns in self._cm.get_all().iteritems():
209 209 for h in conns:
210 210 self._cm.remove(h)
211 211 h.close()
212 212
213 213 def _request_closed(self, request, host, connection):
214 214 """tells us that this request is now closed and that the
215 215 connection is ready for another request"""
216 216 self._cm.set_ready(connection, 1)
217 217
218 218 def _remove_connection(self, host, connection, close=0):
219 219 if close:
220 220 connection.close()
221 221 self._cm.remove(connection)
222 222
223 223 #### Transaction Execution
224 224 def http_open(self, req):
225 225 return self.do_open(HTTPConnection, req)
226 226
227 227 def do_open(self, http_class, req):
228 228 host = req.get_host()
229 229 if not host:
230 230 raise urllib2.URLError('no host given')
231 231
232 232 try:
233 233 h = self._cm.get_ready_conn(host)
234 234 while h:
235 235 r = self._reuse_connection(h, req, host)
236 236
237 237 # if this response is non-None, then it worked and we're
238 238 # done. Break out, skipping the else block.
239 239 if r:
240 240 break
241 241
242 242 # connection is bad - possibly closed by server
243 243 # discard it and ask for the next free connection
244 244 h.close()
245 245 self._cm.remove(h)
246 246 h = self._cm.get_ready_conn(host)
247 247 else:
248 248 # no (working) free connections were found. Create a new one.
249 249 h = http_class(host)
250 250 if DEBUG:
251 251 DEBUG.info("creating new connection to %s (%d)",
252 252 host, id(h))
253 253 self._cm.add(host, h, 0)
254 254 self._start_transaction(h, req)
255 255 r = h.getresponse()
256 256 except (socket.error, httplib.HTTPException), err:
257 257 raise urllib2.URLError(err)
258 258
259 259 # if not a persistent connection, don't try to reuse it
260 260 if r.will_close:
261 261 self._cm.remove(h)
262 262
263 263 if DEBUG:
264 264 DEBUG.info("STATUS: %s, %s", r.status, r.reason)
265 265 r._handler = self
266 266 r._host = host
267 267 r._url = req.get_full_url()
268 268 r._connection = h
269 269 r.code = r.status
270 270 r.headers = r.msg
271 271 r.msg = r.reason
272 272
273 273 if r.status == 200 or not HANDLE_ERRORS:
274 274 return r
275 275 else:
276 276 return self.parent.error('http', req, r,
277 277 r.status, r.msg, r.headers)
278 278
279 279 def _reuse_connection(self, h, req, host):
280 280 """start the transaction with a re-used connection
281 281 return a response object (r) upon success or None on failure.
282 282 This DOES not close or remove bad connections in cases where
283 283 it returns. However, if an unexpected exception occurs, it
284 284 will close and remove the connection before re-raising.
285 285 """
286 286 try:
287 287 self._start_transaction(h, req)
288 288 r = h.getresponse()
289 289 # note: just because we got something back doesn't mean it
290 290 # worked. We'll check the version below, too.
291 291 except (socket.error, httplib.HTTPException):
292 292 r = None
293 293 except: # re-raises
294 294 # adding this block just in case we've missed
295 295 # something we will still raise the exception, but
296 296 # lets try and close the connection and remove it
297 297 # first. We previously got into a nasty loop
298 298 # where an exception was uncaught, and so the
299 299 # connection stayed open. On the next try, the
300 300 # same exception was raised, etc. The trade-off is
301 301 # that it's now possible this call will raise
302 302 # a DIFFERENT exception
303 303 if DEBUG:
304 304 DEBUG.error("unexpected exception - closing "
305 305 "connection to %s (%d)", host, id(h))
306 306 self._cm.remove(h)
307 307 h.close()
308 308 raise
309 309
310 310 if r is None or r.version == 9:
311 311 # httplib falls back to assuming HTTP 0.9 if it gets a
312 312 # bad header back. This is most likely to happen if
313 313 # the socket has been closed by the server since we
314 314 # last used the connection.
315 315 if DEBUG:
316 316 DEBUG.info("failed to re-use connection to %s (%d)",
317 317 host, id(h))
318 318 r = None
319 319 else:
320 320 if DEBUG:
321 321 DEBUG.info("re-using connection to %s (%d)", host, id(h))
322 322
323 323 return r
324 324
325 325 def _start_transaction(self, h, req):
326 326 # What follows mostly reimplements HTTPConnection.request()
327 327 # except it adds self.parent.addheaders in the mix.
328 328 headers = req.headers.copy()
329 329 if sys.version_info >= (2, 4):
330 330 headers.update(req.unredirected_hdrs)
331 331 headers.update(self.parent.addheaders)
332 332 headers = dict((n.lower(), v) for n, v in headers.items())
333 333 skipheaders = {}
334 334 for n in ('host', 'accept-encoding'):
335 335 if n in headers:
336 336 skipheaders['skip_' + n.replace('-', '_')] = 1
337 337 try:
338 338 if req.has_data():
339 339 data = req.get_data()
340 340 h.putrequest('POST', req.get_selector(), **skipheaders)
341 341 if 'content-type' not in headers:
342 342 h.putheader('Content-type',
343 343 'application/x-www-form-urlencoded')
344 344 if 'content-length' not in headers:
345 345 h.putheader('Content-length', '%d' % len(data))
346 346 else:
347 347 h.putrequest('GET', req.get_selector(), **skipheaders)
348 348 except (socket.error), err:
349 349 raise urllib2.URLError(err)
350 350 for k, v in headers.items():
351 351 h.putheader(k, v)
352 352 h.endheaders()
353 353 if req.has_data():
354 354 h.send(data)
355 355
356 356 class HTTPHandler(KeepAliveHandler, urllib2.HTTPHandler):
357 357 pass
358 358
359 359 class HTTPResponse(httplib.HTTPResponse):
360 360 # we need to subclass HTTPResponse in order to
361 361 # 1) add readline() and readlines() methods
362 362 # 2) add close_connection() methods
363 363 # 3) add info() and geturl() methods
364 364
365 365 # in order to add readline(), read must be modified to deal with a
366 366 # buffer. example: readline must read a buffer and then spit back
367 367 # one line at a time. The only real alternative is to read one
368 368 # BYTE at a time (ick). Once something has been read, it can't be
369 369 # put back (ok, maybe it can, but that's even uglier than this),
370 370 # so if you THEN do a normal read, you must first take stuff from
371 371 # the buffer.
372 372
373 373 # the read method wraps the original to accommodate buffering,
374 374 # although read() never adds to the buffer.
375 375 # Both readline and readlines have been stolen with almost no
376 376 # modification from socket.py
377 377
378 378
379 379 def __init__(self, sock, debuglevel=0, strict=0, method=None):
380 if method: # the httplib in python 2.3 uses the method arg
381 httplib.HTTPResponse.__init__(self, sock, debuglevel, method)
382 else: # 2.2 doesn't
383 httplib.HTTPResponse.__init__(self, sock, debuglevel)
380 httplib.HTTPResponse.__init__(self, sock, debuglevel, method)
384 381 self.fileno = sock.fileno
385 382 self.code = None
386 383 self._rbuf = ''
387 384 self._rbufsize = 8096
388 385 self._handler = None # inserted by the handler later
389 386 self._host = None # (same)
390 387 self._url = None # (same)
391 388 self._connection = None # (same)
392 389
393 390 _raw_read = httplib.HTTPResponse.read
394 391
395 392 def close(self):
396 393 if self.fp:
397 394 self.fp.close()
398 395 self.fp = None
399 396 if self._handler:
400 397 self._handler._request_closed(self, self._host,
401 398 self._connection)
402 399
403 400 def close_connection(self):
404 401 self._handler._remove_connection(self._host, self._connection, close=1)
405 402 self.close()
406 403
407 404 def info(self):
408 405 return self.headers
409 406
410 407 def geturl(self):
411 408 return self._url
412 409
413 410 def read(self, amt=None):
414 411 # the _rbuf test is only in this first if for speed. It's not
415 412 # logically necessary
416 413 if self._rbuf and not amt is None:
417 414 L = len(self._rbuf)
418 415 if amt > L:
419 416 amt -= L
420 417 else:
421 418 s = self._rbuf[:amt]
422 419 self._rbuf = self._rbuf[amt:]
423 420 return s
424 421
425 422 s = self._rbuf + self._raw_read(amt)
426 423 self._rbuf = ''
427 424 return s
428 425
429 426 # stolen from Python SVN #68532 to fix issue1088
430 427 def _read_chunked(self, amt):
431 428 chunk_left = self.chunk_left
432 429 value = ''
433 430
434 431 # XXX This accumulates chunks by repeated string concatenation,
435 432 # which is not efficient as the number or size of chunks gets big.
436 433 while True:
437 434 if chunk_left is None:
438 435 line = self.fp.readline()
439 436 i = line.find(';')
440 437 if i >= 0:
441 438 line = line[:i] # strip chunk-extensions
442 439 try:
443 440 chunk_left = int(line, 16)
444 441 except ValueError:
445 442 # close the connection as protocol synchronization is
446 443 # probably lost
447 444 self.close()
448 445 raise httplib.IncompleteRead(value)
449 446 if chunk_left == 0:
450 447 break
451 448 if amt is None:
452 449 value += self._safe_read(chunk_left)
453 450 elif amt < chunk_left:
454 451 value += self._safe_read(amt)
455 452 self.chunk_left = chunk_left - amt
456 453 return value
457 454 elif amt == chunk_left:
458 455 value += self._safe_read(amt)
459 456 self._safe_read(2) # toss the CRLF at the end of the chunk
460 457 self.chunk_left = None
461 458 return value
462 459 else:
463 460 value += self._safe_read(chunk_left)
464 461 amt -= chunk_left
465 462
466 463 # we read the whole chunk, get another
467 464 self._safe_read(2) # toss the CRLF at the end of the chunk
468 465 chunk_left = None
469 466
470 467 # read and discard trailer up to the CRLF terminator
471 468 ### note: we shouldn't have any trailers!
472 469 while True:
473 470 line = self.fp.readline()
474 471 if not line:
475 472 # a vanishingly small number of sites EOF without
476 473 # sending the trailer
477 474 break
478 475 if line == '\r\n':
479 476 break
480 477
481 478 # we read everything; close the "file"
482 479 self.close()
483 480
484 481 return value
485 482
486 483 def readline(self, limit=-1):
487 484 i = self._rbuf.find('\n')
488 485 while i < 0 and not (0 < limit <= len(self._rbuf)):
489 486 new = self._raw_read(self._rbufsize)
490 487 if not new:
491 488 break
492 489 i = new.find('\n')
493 490 if i >= 0:
494 491 i = i + len(self._rbuf)
495 492 self._rbuf = self._rbuf + new
496 493 if i < 0:
497 494 i = len(self._rbuf)
498 495 else:
499 496 i = i + 1
500 497 if 0 <= limit < len(self._rbuf):
501 498 i = limit
502 499 data, self._rbuf = self._rbuf[:i], self._rbuf[i:]
503 500 return data
504 501
505 502 def readlines(self, sizehint = 0):
506 503 total = 0
507 504 list = []
508 505 while True:
509 506 line = self.readline()
510 507 if not line:
511 508 break
512 509 list.append(line)
513 510 total += len(line)
514 511 if sizehint and total >= sizehint:
515 512 break
516 513 return list
517 514
518 515 def safesend(self, str):
519 516 """Send `str' to the server.
520 517
521 518 Shamelessly ripped off from httplib to patch a bad behavior.
522 519 """
523 520 # _broken_pipe_resp is an attribute we set in this function
524 521 # if the socket is closed while we're sending data but
525 522 # the server sent us a response before hanging up.
526 523 # In that case, we want to pretend to send the rest of the
527 524 # outgoing data, and then let the user use getresponse()
528 525 # (which we wrap) to get this last response before
529 526 # opening a new socket.
530 527 if getattr(self, '_broken_pipe_resp', None) is not None:
531 528 return
532 529
533 530 if self.sock is None:
534 531 if self.auto_open:
535 532 self.connect()
536 533 else:
537 534 raise httplib.NotConnected
538 535
539 536 # send the data to the server. if we get a broken pipe, then close
540 537 # the socket. we want to reconnect when somebody tries to send again.
541 538 #
542 539 # NOTE: we DO propagate the error, though, because we cannot simply
543 540 # ignore the error... the caller will know if they can retry.
544 541 if self.debuglevel > 0:
545 542 print "send:", repr(str)
546 543 try:
547 544 blocksize = 8192
548 545 read = getattr(str, 'read', None)
549 546 if read is not None:
550 547 if self.debuglevel > 0:
551 548 print "sending a read()able"
552 549 data = read(blocksize)
553 550 while data:
554 551 self.sock.sendall(data)
555 552 data = read(blocksize)
556 553 else:
557 554 self.sock.sendall(str)
558 555 except socket.error, v:
559 556 reraise = True
560 557 if v[0] == errno.EPIPE: # Broken pipe
561 558 if self._HTTPConnection__state == httplib._CS_REQ_SENT:
562 559 self._broken_pipe_resp = None
563 560 self._broken_pipe_resp = self.getresponse()
564 561 reraise = False
565 562 self.close()
566 563 if reraise:
567 564 raise
568 565
569 566 def wrapgetresponse(cls):
570 567 """Wraps getresponse in cls with a broken-pipe sane version.
571 568 """
572 569 def safegetresponse(self):
573 570 # In safesend() we might set the _broken_pipe_resp
574 571 # attribute, in which case the socket has already
575 572 # been closed and we just need to give them the response
576 573 # back. Otherwise, we use the normal response path.
577 574 r = getattr(self, '_broken_pipe_resp', None)
578 575 if r is not None:
579 576 return r
580 577 return cls.getresponse(self)
581 578 safegetresponse.__doc__ = cls.getresponse.__doc__
582 579 return safegetresponse
583 580
584 581 class HTTPConnection(httplib.HTTPConnection):
585 582 # use the modified response class
586 583 response_class = HTTPResponse
587 584 send = safesend
588 585 getresponse = wrapgetresponse(httplib.HTTPConnection)
589 586
590 587
591 588 #########################################################################
592 589 ##### TEST FUNCTIONS
593 590 #########################################################################
594 591
595 592 def error_handler(url):
596 593 global HANDLE_ERRORS
597 594 orig = HANDLE_ERRORS
598 595 keepalive_handler = HTTPHandler()
599 596 opener = urllib2.build_opener(keepalive_handler)
600 597 urllib2.install_opener(opener)
601 598 pos = {0: 'off', 1: 'on'}
602 599 for i in (0, 1):
603 600 print " fancy error handling %s (HANDLE_ERRORS = %i)" % (pos[i], i)
604 601 HANDLE_ERRORS = i
605 602 try:
606 603 fo = urllib2.urlopen(url)
607 604 fo.read()
608 605 fo.close()
609 606 try:
610 607 status, reason = fo.status, fo.reason
611 608 except AttributeError:
612 609 status, reason = None, None
613 610 except IOError, e:
614 611 print " EXCEPTION: %s" % e
615 612 raise
616 613 else:
617 614 print " status = %s, reason = %s" % (status, reason)
618 615 HANDLE_ERRORS = orig
619 616 hosts = keepalive_handler.open_connections()
620 617 print "open connections:", hosts
621 618 keepalive_handler.close_all()
622 619
623 620 def md5(s):
624 621 try:
625 622 from hashlib import md5 as _md5
626 623 except ImportError:
627 624 from md5 import md5 as _md5
628 625 global md5
629 626 md5 = _md5
630 627 return _md5(s)
631 628
632 629 def continuity(url):
633 630 format = '%25s: %s'
634 631
635 632 # first fetch the file with the normal http handler
636 633 opener = urllib2.build_opener()
637 634 urllib2.install_opener(opener)
638 635 fo = urllib2.urlopen(url)
639 636 foo = fo.read()
640 637 fo.close()
641 638 m = md5.new(foo)
642 639 print format % ('normal urllib', m.hexdigest())
643 640
644 641 # now install the keepalive handler and try again
645 642 opener = urllib2.build_opener(HTTPHandler())
646 643 urllib2.install_opener(opener)
647 644
648 645 fo = urllib2.urlopen(url)
649 646 foo = fo.read()
650 647 fo.close()
651 648 m = md5.new(foo)
652 649 print format % ('keepalive read', m.hexdigest())
653 650
654 651 fo = urllib2.urlopen(url)
655 652 foo = ''
656 653 while True:
657 654 f = fo.readline()
658 655 if f:
659 656 foo = foo + f
660 657 else: break
661 658 fo.close()
662 659 m = md5.new(foo)
663 660 print format % ('keepalive readline', m.hexdigest())
664 661
665 662 def comp(N, url):
666 663 print ' making %i connections to:\n %s' % (N, url)
667 664
668 665 sys.stdout.write(' first using the normal urllib handlers')
669 666 # first use normal opener
670 667 opener = urllib2.build_opener()
671 668 urllib2.install_opener(opener)
672 669 t1 = fetch(N, url)
673 670 print ' TIME: %.3f s' % t1
674 671
675 672 sys.stdout.write(' now using the keepalive handler ')
676 673 # now install the keepalive handler and try again
677 674 opener = urllib2.build_opener(HTTPHandler())
678 675 urllib2.install_opener(opener)
679 676 t2 = fetch(N, url)
680 677 print ' TIME: %.3f s' % t2
681 678 print ' improvement factor: %.2f' % (t1 / t2)
682 679
683 680 def fetch(N, url, delay=0):
684 681 import time
685 682 lens = []
686 683 starttime = time.time()
687 684 for i in range(N):
688 685 if delay and i > 0:
689 686 time.sleep(delay)
690 687 fo = urllib2.urlopen(url)
691 688 foo = fo.read()
692 689 fo.close()
693 690 lens.append(len(foo))
694 691 diff = time.time() - starttime
695 692
696 693 j = 0
697 694 for i in lens[1:]:
698 695 j = j + 1
699 696 if not i == lens[0]:
700 697 print "WARNING: inconsistent length on read %i: %i" % (j, i)
701 698
702 699 return diff
703 700
704 701 def test_timeout(url):
705 702 global DEBUG
706 703 dbbackup = DEBUG
707 704 class FakeLogger(object):
708 705 def debug(self, msg, *args):
709 706 print msg % args
710 707 info = warning = error = debug
711 708 DEBUG = FakeLogger()
712 709 print " fetching the file to establish a connection"
713 710 fo = urllib2.urlopen(url)
714 711 data1 = fo.read()
715 712 fo.close()
716 713
717 714 i = 20
718 715 print " waiting %i seconds for the server to close the connection" % i
719 716 while i > 0:
720 717 sys.stdout.write('\r %2i' % i)
721 718 sys.stdout.flush()
722 719 time.sleep(1)
723 720 i -= 1
724 721 sys.stderr.write('\r')
725 722
726 723 print " fetching the file a second time"
727 724 fo = urllib2.urlopen(url)
728 725 data2 = fo.read()
729 726 fo.close()
730 727
731 728 if data1 == data2:
732 729 print ' data are identical'
733 730 else:
734 731 print ' ERROR: DATA DIFFER'
735 732
736 733 DEBUG = dbbackup
737 734
738 735
739 736 def test(url, N=10):
740 737 print "checking error handler (do this on a non-200)"
741 738 try: error_handler(url)
742 739 except IOError:
743 740 print "exiting - exception will prevent further tests"
744 741 sys.exit()
745 742 print
746 743 print "performing continuity test (making sure stuff isn't corrupted)"
747 744 continuity(url)
748 745 print
749 746 print "performing speed comparison"
750 747 comp(N, url)
751 748 print
752 749 print "performing dropped-connection check"
753 750 test_timeout(url)
754 751
755 752 if __name__ == '__main__':
756 753 import time
757 754 import sys
758 755 try:
759 756 N = int(sys.argv[1])
760 757 url = sys.argv[2]
761 758 except (IndexError, ValueError):
762 759 print "%s <integer> <url>" % sys.argv[0]
763 760 else:
764 761 test(url, N)
General Comments 0
You need to be logged in to leave comments. Login now