##// END OF EJS Templates
keepalive: reorder header precedence...
Gregory Szorc -
r30463:bc0def54 default
parent child Browse files
Show More
@@ -1,759 +1,758 b''
1 1 # This library is free software; you can redistribute it and/or
2 2 # modify it under the terms of the GNU Lesser General Public
3 3 # License as published by the Free Software Foundation; either
4 4 # version 2.1 of the License, or (at your option) any later version.
5 5 #
6 6 # This library is distributed in the hope that it will be useful,
7 7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9 9 # Lesser General Public License for more details.
10 10 #
11 11 # You should have received a copy of the GNU Lesser General Public
12 12 # License along with this library; if not, see
13 13 # <http://www.gnu.org/licenses/>.
14 14
15 15 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
16 16 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
17 17
18 18 # Modified by Benoit Boissinot:
19 19 # - fix for digest auth (inspired from urllib2.py @ Python v2.4)
20 20 # Modified by Dirkjan Ochtman:
21 21 # - import md5 function from a local util module
22 22 # Modified by Augie Fackler:
23 23 # - add safesend method and use it to prevent broken pipe errors
24 24 # on large POST requests
25 25
26 26 """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
27 27
28 28 >>> import urllib2
29 29 >>> from keepalive import HTTPHandler
30 30 >>> keepalive_handler = HTTPHandler()
31 31 >>> opener = urlreq.buildopener(keepalive_handler)
32 32 >>> urlreq.installopener(opener)
33 33 >>>
34 34 >>> fo = urlreq.urlopen('http://www.python.org')
35 35
36 36 If a connection to a given host is requested, and all of the existing
37 37 connections are still in use, another connection will be opened. If
38 38 the handler tries to use an existing connection but it fails in some
39 39 way, it will be closed and removed from the pool.
40 40
41 41 To remove the handler, simply re-run build_opener with no arguments, and
42 42 install that opener.
43 43
44 44 You can explicitly close connections by using the close_connection()
45 45 method of the returned file-like object (described below) or you can
46 46 use the handler methods:
47 47
48 48 close_connection(host)
49 49 close_all()
50 50 open_connections()
51 51
52 52 NOTE: using the close_connection and close_all methods of the handler
53 53 should be done with care when using multiple threads.
54 54 * there is nothing that prevents another thread from creating new
55 55 connections immediately after connections are closed
56 56 * no checks are done to prevent in-use connections from being closed
57 57
58 58 >>> keepalive_handler.close_all()
59 59
60 60 EXTRA ATTRIBUTES AND METHODS
61 61
62 62 Upon a status of 200, the object returned has a few additional
63 63 attributes and methods, which should not be used if you want to
64 64 remain consistent with the normal urllib2-returned objects:
65 65
66 66 close_connection() - close the connection to the host
67 67 readlines() - you know, readlines()
68 68 status - the return status (i.e. 404)
69 69 reason - english translation of status (i.e. 'File not found')
70 70
71 71 If you want the best of both worlds, use this inside an
72 72 AttributeError-catching try:
73 73
74 74 >>> try: status = fo.status
75 75 >>> except AttributeError: status = None
76 76
77 77 Unfortunately, these are ONLY there if status == 200, so it's not
78 78 easy to distinguish between non-200 responses. The reason is that
79 79 urllib2 tries to do clever things with error codes 301, 302, 401,
80 80 and 407, and it wraps the object upon return.
81 81
82 82 For python versions earlier than 2.4, you can avoid this fancy error
83 83 handling by setting the module-level global HANDLE_ERRORS to zero.
84 84 You see, prior to 2.4, it's the HTTP Handler's job to determine what
85 85 to handle specially, and what to just pass up. HANDLE_ERRORS == 0
86 86 means "pass everything up". In python 2.4, however, this job no
87 87 longer belongs to the HTTP Handler and is now done by a NEW handler,
88 88 HTTPErrorProcessor. Here's the bottom line:
89 89
90 90 python version < 2.4
91 91 HANDLE_ERRORS == 1 (default) pass up 200, treat the rest as
92 92 errors
93 93 HANDLE_ERRORS == 0 pass everything up, error processing is
94 94 left to the calling code
95 95 python version >= 2.4
96 96 HANDLE_ERRORS == 1 pass up 200, treat the rest as errors
97 97 HANDLE_ERRORS == 0 (default) pass everything up, let the
98 98 other handlers (specifically,
99 99 HTTPErrorProcessor) decide what to do
100 100
101 101 In practice, setting the variable either way makes little difference
102 102 in python 2.4, so for the most consistent behavior across versions,
103 103 you probably just want to use the defaults, which will give you
104 104 exceptions on errors.
105 105
106 106 """
107 107
108 108 # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $
109 109
110 110 from __future__ import absolute_import, print_function
111 111
112 112 import errno
113 113 import hashlib
114 114 import socket
115 115 import sys
116 116 import threading
117 117
118 118 from . import (
119 119 util,
120 120 )
121 121
122 122 httplib = util.httplib
123 123 urlerr = util.urlerr
124 124 urlreq = util.urlreq
125 125
126 126 DEBUG = None
127 127
128 128 if sys.version_info < (2, 4):
129 129 HANDLE_ERRORS = 1
130 130 else: HANDLE_ERRORS = 0
131 131
132 132 class ConnectionManager(object):
133 133 """
134 134 The connection manager must be able to:
135 135 * keep track of all existing
136 136 """
137 137 def __init__(self):
138 138 self._lock = threading.Lock()
139 139 self._hostmap = {} # map hosts to a list of connections
140 140 self._connmap = {} # map connections to host
141 141 self._readymap = {} # map connection to ready state
142 142
143 143 def add(self, host, connection, ready):
144 144 self._lock.acquire()
145 145 try:
146 146 if host not in self._hostmap:
147 147 self._hostmap[host] = []
148 148 self._hostmap[host].append(connection)
149 149 self._connmap[connection] = host
150 150 self._readymap[connection] = ready
151 151 finally:
152 152 self._lock.release()
153 153
154 154 def remove(self, connection):
155 155 self._lock.acquire()
156 156 try:
157 157 try:
158 158 host = self._connmap[connection]
159 159 except KeyError:
160 160 pass
161 161 else:
162 162 del self._connmap[connection]
163 163 del self._readymap[connection]
164 164 self._hostmap[host].remove(connection)
165 165 if not self._hostmap[host]: del self._hostmap[host]
166 166 finally:
167 167 self._lock.release()
168 168
169 169 def set_ready(self, connection, ready):
170 170 try:
171 171 self._readymap[connection] = ready
172 172 except KeyError:
173 173 pass
174 174
175 175 def get_ready_conn(self, host):
176 176 conn = None
177 177 self._lock.acquire()
178 178 try:
179 179 if host in self._hostmap:
180 180 for c in self._hostmap[host]:
181 181 if self._readymap[c]:
182 182 self._readymap[c] = 0
183 183 conn = c
184 184 break
185 185 finally:
186 186 self._lock.release()
187 187 return conn
188 188
189 189 def get_all(self, host=None):
190 190 if host:
191 191 return list(self._hostmap.get(host, []))
192 192 else:
193 193 return dict(self._hostmap)
194 194
195 195 class KeepAliveHandler(object):
196 196 def __init__(self):
197 197 self._cm = ConnectionManager()
198 198
199 199 #### Connection Management
200 200 def open_connections(self):
201 201 """return a list of connected hosts and the number of connections
202 202 to each. [('foo.com:80', 2), ('bar.org', 1)]"""
203 203 return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
204 204
205 205 def close_connection(self, host):
206 206 """close connection(s) to <host>
207 207 host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
208 208 no error occurs if there is no connection to that host."""
209 209 for h in self._cm.get_all(host):
210 210 self._cm.remove(h)
211 211 h.close()
212 212
213 213 def close_all(self):
214 214 """close all open connections"""
215 215 for host, conns in self._cm.get_all().iteritems():
216 216 for h in conns:
217 217 self._cm.remove(h)
218 218 h.close()
219 219
220 220 def _request_closed(self, request, host, connection):
221 221 """tells us that this request is now closed and that the
222 222 connection is ready for another request"""
223 223 self._cm.set_ready(connection, 1)
224 224
225 225 def _remove_connection(self, host, connection, close=0):
226 226 if close:
227 227 connection.close()
228 228 self._cm.remove(connection)
229 229
230 230 #### Transaction Execution
231 231 def http_open(self, req):
232 232 return self.do_open(HTTPConnection, req)
233 233
234 234 def do_open(self, http_class, req):
235 235 host = req.get_host()
236 236 if not host:
237 237 raise urlerr.urlerror('no host given')
238 238
239 239 try:
240 240 h = self._cm.get_ready_conn(host)
241 241 while h:
242 242 r = self._reuse_connection(h, req, host)
243 243
244 244 # if this response is non-None, then it worked and we're
245 245 # done. Break out, skipping the else block.
246 246 if r:
247 247 break
248 248
249 249 # connection is bad - possibly closed by server
250 250 # discard it and ask for the next free connection
251 251 h.close()
252 252 self._cm.remove(h)
253 253 h = self._cm.get_ready_conn(host)
254 254 else:
255 255 # no (working) free connections were found. Create a new one.
256 256 h = http_class(host)
257 257 if DEBUG:
258 258 DEBUG.info("creating new connection to %s (%d)",
259 259 host, id(h))
260 260 self._cm.add(host, h, 0)
261 261 self._start_transaction(h, req)
262 262 r = h.getresponse()
263 263 except (socket.error, httplib.HTTPException) as err:
264 264 raise urlerr.urlerror(err)
265 265
266 266 # if not a persistent connection, don't try to reuse it
267 267 if r.will_close:
268 268 self._cm.remove(h)
269 269
270 270 if DEBUG:
271 271 DEBUG.info("STATUS: %s, %s", r.status, r.reason)
272 272 r._handler = self
273 273 r._host = host
274 274 r._url = req.get_full_url()
275 275 r._connection = h
276 276 r.code = r.status
277 277 r.headers = r.msg
278 278 r.msg = r.reason
279 279
280 280 if r.status == 200 or not HANDLE_ERRORS:
281 281 return r
282 282 else:
283 283 return self.parent.error('http', req, r,
284 284 r.status, r.msg, r.headers)
285 285
286 286 def _reuse_connection(self, h, req, host):
287 287 """start the transaction with a re-used connection
288 288 return a response object (r) upon success or None on failure.
289 289 This DOES not close or remove bad connections in cases where
290 290 it returns. However, if an unexpected exception occurs, it
291 291 will close and remove the connection before re-raising.
292 292 """
293 293 try:
294 294 self._start_transaction(h, req)
295 295 r = h.getresponse()
296 296 # note: just because we got something back doesn't mean it
297 297 # worked. We'll check the version below, too.
298 298 except (socket.error, httplib.HTTPException):
299 299 r = None
300 300 except: # re-raises
301 301 # adding this block just in case we've missed
302 302 # something we will still raise the exception, but
303 303 # lets try and close the connection and remove it
304 304 # first. We previously got into a nasty loop
305 305 # where an exception was uncaught, and so the
306 306 # connection stayed open. On the next try, the
307 307 # same exception was raised, etc. The trade-off is
308 308 # that it's now possible this call will raise
309 309 # a DIFFERENT exception
310 310 if DEBUG:
311 311 DEBUG.error("unexpected exception - closing "
312 312 "connection to %s (%d)", host, id(h))
313 313 self._cm.remove(h)
314 314 h.close()
315 315 raise
316 316
317 317 if r is None or r.version == 9:
318 318 # httplib falls back to assuming HTTP 0.9 if it gets a
319 319 # bad header back. This is most likely to happen if
320 320 # the socket has been closed by the server since we
321 321 # last used the connection.
322 322 if DEBUG:
323 323 DEBUG.info("failed to re-use connection to %s (%d)",
324 324 host, id(h))
325 325 r = None
326 326 else:
327 327 if DEBUG:
328 328 DEBUG.info("re-using connection to %s (%d)", host, id(h))
329 329
330 330 return r
331 331
332 332 def _start_transaction(self, h, req):
333 333 # What follows mostly reimplements HTTPConnection.request()
334 334 # except it adds self.parent.addheaders in the mix.
335 headers = req.headers.copy()
336 if sys.version_info >= (2, 4):
335 headers = dict(self.parent.addheaders)
336 headers.update(req.headers)
337 337 headers.update(req.unredirected_hdrs)
338 headers.update(self.parent.addheaders)
339 338 headers = dict((n.lower(), v) for n, v in headers.items())
340 339 skipheaders = {}
341 340 for n in ('host', 'accept-encoding'):
342 341 if n in headers:
343 342 skipheaders['skip_' + n.replace('-', '_')] = 1
344 343 try:
345 344 if req.has_data():
346 345 data = req.get_data()
347 346 h.putrequest('POST', req.get_selector(), **skipheaders)
348 347 if 'content-type' not in headers:
349 348 h.putheader('Content-type',
350 349 'application/x-www-form-urlencoded')
351 350 if 'content-length' not in headers:
352 351 h.putheader('Content-length', '%d' % len(data))
353 352 else:
354 353 h.putrequest('GET', req.get_selector(), **skipheaders)
355 354 except socket.error as err:
356 355 raise urlerr.urlerror(err)
357 356 for k, v in headers.items():
358 357 h.putheader(k, v)
359 358 h.endheaders()
360 359 if req.has_data():
361 360 h.send(data)
362 361
363 362 class HTTPHandler(KeepAliveHandler, urlreq.httphandler):
364 363 pass
365 364
366 365 class HTTPResponse(httplib.HTTPResponse):
367 366 # we need to subclass HTTPResponse in order to
368 367 # 1) add readline() and readlines() methods
369 368 # 2) add close_connection() methods
370 369 # 3) add info() and geturl() methods
371 370
372 371 # in order to add readline(), read must be modified to deal with a
373 372 # buffer. example: readline must read a buffer and then spit back
374 373 # one line at a time. The only real alternative is to read one
375 374 # BYTE at a time (ick). Once something has been read, it can't be
376 375 # put back (ok, maybe it can, but that's even uglier than this),
377 376 # so if you THEN do a normal read, you must first take stuff from
378 377 # the buffer.
379 378
380 379 # the read method wraps the original to accommodate buffering,
381 380 # although read() never adds to the buffer.
382 381 # Both readline and readlines have been stolen with almost no
383 382 # modification from socket.py
384 383
385 384
386 385 def __init__(self, sock, debuglevel=0, strict=0, method=None):
387 386 httplib.HTTPResponse.__init__(self, sock, debuglevel, method)
388 387 self.fileno = sock.fileno
389 388 self.code = None
390 389 self._rbuf = ''
391 390 self._rbufsize = 8096
392 391 self._handler = None # inserted by the handler later
393 392 self._host = None # (same)
394 393 self._url = None # (same)
395 394 self._connection = None # (same)
396 395
397 396 _raw_read = httplib.HTTPResponse.read
398 397
399 398 def close(self):
400 399 if self.fp:
401 400 self.fp.close()
402 401 self.fp = None
403 402 if self._handler:
404 403 self._handler._request_closed(self, self._host,
405 404 self._connection)
406 405
407 406 def close_connection(self):
408 407 self._handler._remove_connection(self._host, self._connection, close=1)
409 408 self.close()
410 409
411 410 def info(self):
412 411 return self.headers
413 412
414 413 def geturl(self):
415 414 return self._url
416 415
417 416 def read(self, amt=None):
418 417 # the _rbuf test is only in this first if for speed. It's not
419 418 # logically necessary
420 419 if self._rbuf and not amt is None:
421 420 L = len(self._rbuf)
422 421 if amt > L:
423 422 amt -= L
424 423 else:
425 424 s = self._rbuf[:amt]
426 425 self._rbuf = self._rbuf[amt:]
427 426 return s
428 427
429 428 s = self._rbuf + self._raw_read(amt)
430 429 self._rbuf = ''
431 430 return s
432 431
433 432 # stolen from Python SVN #68532 to fix issue1088
434 433 def _read_chunked(self, amt):
435 434 chunk_left = self.chunk_left
436 435 value = ''
437 436
438 437 # XXX This accumulates chunks by repeated string concatenation,
439 438 # which is not efficient as the number or size of chunks gets big.
440 439 while True:
441 440 if chunk_left is None:
442 441 line = self.fp.readline()
443 442 i = line.find(';')
444 443 if i >= 0:
445 444 line = line[:i] # strip chunk-extensions
446 445 try:
447 446 chunk_left = int(line, 16)
448 447 except ValueError:
449 448 # close the connection as protocol synchronization is
450 449 # probably lost
451 450 self.close()
452 451 raise httplib.IncompleteRead(value)
453 452 if chunk_left == 0:
454 453 break
455 454 if amt is None:
456 455 value += self._safe_read(chunk_left)
457 456 elif amt < chunk_left:
458 457 value += self._safe_read(amt)
459 458 self.chunk_left = chunk_left - amt
460 459 return value
461 460 elif amt == chunk_left:
462 461 value += self._safe_read(amt)
463 462 self._safe_read(2) # toss the CRLF at the end of the chunk
464 463 self.chunk_left = None
465 464 return value
466 465 else:
467 466 value += self._safe_read(chunk_left)
468 467 amt -= chunk_left
469 468
470 469 # we read the whole chunk, get another
471 470 self._safe_read(2) # toss the CRLF at the end of the chunk
472 471 chunk_left = None
473 472
474 473 # read and discard trailer up to the CRLF terminator
475 474 ### note: we shouldn't have any trailers!
476 475 while True:
477 476 line = self.fp.readline()
478 477 if not line:
479 478 # a vanishingly small number of sites EOF without
480 479 # sending the trailer
481 480 break
482 481 if line == '\r\n':
483 482 break
484 483
485 484 # we read everything; close the "file"
486 485 self.close()
487 486
488 487 return value
489 488
490 489 def readline(self, limit=-1):
491 490 i = self._rbuf.find('\n')
492 491 while i < 0 and not (0 < limit <= len(self._rbuf)):
493 492 new = self._raw_read(self._rbufsize)
494 493 if not new:
495 494 break
496 495 i = new.find('\n')
497 496 if i >= 0:
498 497 i = i + len(self._rbuf)
499 498 self._rbuf = self._rbuf + new
500 499 if i < 0:
501 500 i = len(self._rbuf)
502 501 else:
503 502 i = i + 1
504 503 if 0 <= limit < len(self._rbuf):
505 504 i = limit
506 505 data, self._rbuf = self._rbuf[:i], self._rbuf[i:]
507 506 return data
508 507
509 508 def readlines(self, sizehint=0):
510 509 total = 0
511 510 list = []
512 511 while True:
513 512 line = self.readline()
514 513 if not line:
515 514 break
516 515 list.append(line)
517 516 total += len(line)
518 517 if sizehint and total >= sizehint:
519 518 break
520 519 return list
521 520
522 521 def safesend(self, str):
523 522 """Send `str' to the server.
524 523
525 524 Shamelessly ripped off from httplib to patch a bad behavior.
526 525 """
527 526 # _broken_pipe_resp is an attribute we set in this function
528 527 # if the socket is closed while we're sending data but
529 528 # the server sent us a response before hanging up.
530 529 # In that case, we want to pretend to send the rest of the
531 530 # outgoing data, and then let the user use getresponse()
532 531 # (which we wrap) to get this last response before
533 532 # opening a new socket.
534 533 if getattr(self, '_broken_pipe_resp', None) is not None:
535 534 return
536 535
537 536 if self.sock is None:
538 537 if self.auto_open:
539 538 self.connect()
540 539 else:
541 540 raise httplib.NotConnected
542 541
543 542 # send the data to the server. if we get a broken pipe, then close
544 543 # the socket. we want to reconnect when somebody tries to send again.
545 544 #
546 545 # NOTE: we DO propagate the error, though, because we cannot simply
547 546 # ignore the error... the caller will know if they can retry.
548 547 if self.debuglevel > 0:
549 548 print("send:", repr(str))
550 549 try:
551 550 blocksize = 8192
552 551 read = getattr(str, 'read', None)
553 552 if read is not None:
554 553 if self.debuglevel > 0:
555 554 print("sending a read()able")
556 555 data = read(blocksize)
557 556 while data:
558 557 self.sock.sendall(data)
559 558 data = read(blocksize)
560 559 else:
561 560 self.sock.sendall(str)
562 561 except socket.error as v:
563 562 reraise = True
564 563 if v[0] == errno.EPIPE: # Broken pipe
565 564 if self._HTTPConnection__state == httplib._CS_REQ_SENT:
566 565 self._broken_pipe_resp = None
567 566 self._broken_pipe_resp = self.getresponse()
568 567 reraise = False
569 568 self.close()
570 569 if reraise:
571 570 raise
572 571
573 572 def wrapgetresponse(cls):
574 573 """Wraps getresponse in cls with a broken-pipe sane version.
575 574 """
576 575 def safegetresponse(self):
577 576 # In safesend() we might set the _broken_pipe_resp
578 577 # attribute, in which case the socket has already
579 578 # been closed and we just need to give them the response
580 579 # back. Otherwise, we use the normal response path.
581 580 r = getattr(self, '_broken_pipe_resp', None)
582 581 if r is not None:
583 582 return r
584 583 return cls.getresponse(self)
585 584 safegetresponse.__doc__ = cls.getresponse.__doc__
586 585 return safegetresponse
587 586
588 587 class HTTPConnection(httplib.HTTPConnection):
589 588 # use the modified response class
590 589 response_class = HTTPResponse
591 590 send = safesend
592 591 getresponse = wrapgetresponse(httplib.HTTPConnection)
593 592
594 593
595 594 #########################################################################
596 595 ##### TEST FUNCTIONS
597 596 #########################################################################
598 597
599 598 def error_handler(url):
600 599 global HANDLE_ERRORS
601 600 orig = HANDLE_ERRORS
602 601 keepalive_handler = HTTPHandler()
603 602 opener = urlreq.buildopener(keepalive_handler)
604 603 urlreq.installopener(opener)
605 604 pos = {0: 'off', 1: 'on'}
606 605 for i in (0, 1):
607 606 print(" fancy error handling %s (HANDLE_ERRORS = %i)" % (pos[i], i))
608 607 HANDLE_ERRORS = i
609 608 try:
610 609 fo = urlreq.urlopen(url)
611 610 fo.read()
612 611 fo.close()
613 612 try:
614 613 status, reason = fo.status, fo.reason
615 614 except AttributeError:
616 615 status, reason = None, None
617 616 except IOError as e:
618 617 print(" EXCEPTION: %s" % e)
619 618 raise
620 619 else:
621 620 print(" status = %s, reason = %s" % (status, reason))
622 621 HANDLE_ERRORS = orig
623 622 hosts = keepalive_handler.open_connections()
624 623 print("open connections:", hosts)
625 624 keepalive_handler.close_all()
626 625
627 626 def continuity(url):
628 627 md5 = hashlib.md5
629 628 format = '%25s: %s'
630 629
631 630 # first fetch the file with the normal http handler
632 631 opener = urlreq.buildopener()
633 632 urlreq.installopener(opener)
634 633 fo = urlreq.urlopen(url)
635 634 foo = fo.read()
636 635 fo.close()
637 636 m = md5(foo)
638 637 print(format % ('normal urllib', m.hexdigest()))
639 638
640 639 # now install the keepalive handler and try again
641 640 opener = urlreq.buildopener(HTTPHandler())
642 641 urlreq.installopener(opener)
643 642
644 643 fo = urlreq.urlopen(url)
645 644 foo = fo.read()
646 645 fo.close()
647 646 m = md5(foo)
648 647 print(format % ('keepalive read', m.hexdigest()))
649 648
650 649 fo = urlreq.urlopen(url)
651 650 foo = ''
652 651 while True:
653 652 f = fo.readline()
654 653 if f:
655 654 foo = foo + f
656 655 else: break
657 656 fo.close()
658 657 m = md5(foo)
659 658 print(format % ('keepalive readline', m.hexdigest()))
660 659
661 660 def comp(N, url):
662 661 print(' making %i connections to:\n %s' % (N, url))
663 662
664 663 sys.stdout.write(' first using the normal urllib handlers')
665 664 # first use normal opener
666 665 opener = urlreq.buildopener()
667 666 urlreq.installopener(opener)
668 667 t1 = fetch(N, url)
669 668 print(' TIME: %.3f s' % t1)
670 669
671 670 sys.stdout.write(' now using the keepalive handler ')
672 671 # now install the keepalive handler and try again
673 672 opener = urlreq.buildopener(HTTPHandler())
674 673 urlreq.installopener(opener)
675 674 t2 = fetch(N, url)
676 675 print(' TIME: %.3f s' % t2)
677 676 print(' improvement factor: %.2f' % (t1 / t2))
678 677
679 678 def fetch(N, url, delay=0):
680 679 import time
681 680 lens = []
682 681 starttime = time.time()
683 682 for i in range(N):
684 683 if delay and i > 0:
685 684 time.sleep(delay)
686 685 fo = urlreq.urlopen(url)
687 686 foo = fo.read()
688 687 fo.close()
689 688 lens.append(len(foo))
690 689 diff = time.time() - starttime
691 690
692 691 j = 0
693 692 for i in lens[1:]:
694 693 j = j + 1
695 694 if not i == lens[0]:
696 695 print("WARNING: inconsistent length on read %i: %i" % (j, i))
697 696
698 697 return diff
699 698
700 699 def test_timeout(url):
701 700 global DEBUG
702 701 dbbackup = DEBUG
703 702 class FakeLogger(object):
704 703 def debug(self, msg, *args):
705 704 print(msg % args)
706 705 info = warning = error = debug
707 706 DEBUG = FakeLogger()
708 707 print(" fetching the file to establish a connection")
709 708 fo = urlreq.urlopen(url)
710 709 data1 = fo.read()
711 710 fo.close()
712 711
713 712 i = 20
714 713 print(" waiting %i seconds for the server to close the connection" % i)
715 714 while i > 0:
716 715 sys.stdout.write('\r %2i' % i)
717 716 sys.stdout.flush()
718 717 time.sleep(1)
719 718 i -= 1
720 719 sys.stderr.write('\r')
721 720
722 721 print(" fetching the file a second time")
723 722 fo = urlreq.urlopen(url)
724 723 data2 = fo.read()
725 724 fo.close()
726 725
727 726 if data1 == data2:
728 727 print(' data are identical')
729 728 else:
730 729 print(' ERROR: DATA DIFFER')
731 730
732 731 DEBUG = dbbackup
733 732
734 733
735 734 def test(url, N=10):
736 735 print("checking error handler (do this on a non-200)")
737 736 try: error_handler(url)
738 737 except IOError:
739 738 print("exiting - exception will prevent further tests")
740 739 sys.exit()
741 740 print('')
742 741 print("performing continuity test (making sure stuff isn't corrupted)")
743 742 continuity(url)
744 743 print('')
745 744 print("performing speed comparison")
746 745 comp(N, url)
747 746 print('')
748 747 print("performing dropped-connection check")
749 748 test_timeout(url)
750 749
751 750 if __name__ == '__main__':
752 751 import time
753 752 try:
754 753 N = int(sys.argv[1])
755 754 url = sys.argv[2]
756 755 except (IndexError, ValueError):
757 756 print("%s <integer> <url>" % sys.argv[0])
758 757 else:
759 758 test(url, N)
General Comments 0
You need to be logged in to leave comments. Login now