##// END OF EJS Templates
http: reuse Python's implementation of read/readline/readinto...
Joerg Sonnenberger -
r52722:c1ed5ee2 default
parent child Browse files
Show More
@@ -1,844 +1,707 b''
1 1 # This library is free software; you can redistribute it and/or
2 2 # modify it under the terms of the GNU Lesser General Public
3 3 # License as published by the Free Software Foundation; either
4 4 # version 2.1 of the License, or (at your option) any later version.
5 5 #
6 6 # This library is distributed in the hope that it will be useful,
7 7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9 9 # Lesser General Public License for more details.
10 10 #
11 11 # You should have received a copy of the GNU Lesser General Public
12 12 # License along with this library; if not, see
13 13 # <http://www.gnu.org/licenses/>.
14 14
15 15 # This file is part of urlgrabber, a high-level cross-protocol url-grabber
16 16 # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
17 17
18 18 # Modified by Benoit Boissinot:
19 19 # - fix for digest auth (inspired from urllib2.py @ Python v2.4)
20 20 # Modified by Dirkjan Ochtman:
21 21 # - import md5 function from a local util module
22 22 # Modified by Augie Fackler:
23 23 # - add safesend method and use it to prevent broken pipe errors
24 24 # on large POST requests
25 25
26 26 """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
27 27
28 28 >>> import urllib2
29 29 >>> from keepalive import HTTPHandler
30 30 >>> keepalive_handler = HTTPHandler()
31 31 >>> opener = urlreq.buildopener(keepalive_handler)
32 32 >>> urlreq.installopener(opener)
33 33 >>>
34 34 >>> fo = urlreq.urlopen('http://www.python.org')
35 35
36 36 If a connection to a given host is requested, and all of the existing
37 37 connections are still in use, another connection will be opened. If
38 38 the handler tries to use an existing connection but it fails in some
39 39 way, it will be closed and removed from the pool.
40 40
41 41 To remove the handler, simply re-run build_opener with no arguments, and
42 42 install that opener.
43 43
44 44 You can explicitly close connections by using the close_connection()
45 45 method of the returned file-like object (described below) or you can
46 46 use the handler methods:
47 47
48 48 close_connection(host)
49 49 close_all()
50 50 open_connections()
51 51
52 52 NOTE: using the close_connection and close_all methods of the handler
53 53 should be done with care when using multiple threads.
54 54 * there is nothing that prevents another thread from creating new
55 55 connections immediately after connections are closed
56 56 * no checks are done to prevent in-use connections from being closed
57 57
58 58 >>> keepalive_handler.close_all()
59 59
60 60 EXTRA ATTRIBUTES AND METHODS
61 61
62 62 Upon a status of 200, the object returned has a few additional
63 63 attributes and methods, which should not be used if you want to
64 64 remain consistent with the normal urllib2-returned objects:
65 65
66 66 close_connection() - close the connection to the host
67 67 readlines() - you know, readlines()
68 68 status - the return status (i.e. 404)
69 69 reason - english translation of status (i.e. 'File not found')
70 70
71 71 If you want the best of both worlds, use this inside an
72 72 AttributeError-catching try:
73 73
74 74 >>> try: status = fo.status
75 75 >>> except AttributeError: status = None
76 76
77 77 Unfortunately, these are ONLY there if status == 200, so it's not
78 78 easy to distinguish between non-200 responses. The reason is that
79 79 urllib2 tries to do clever things with error codes 301, 302, 401,
80 80 and 407, and it wraps the object upon return.
81 81 """
82 82
83 83 # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $
84 84
85 85
86 86 import collections
87 87 import hashlib
88 88 import socket
89 89 import sys
90 90 import threading
91 91
92 92 from .i18n import _
93 93 from .node import hex
94 94 from . import (
95 95 pycompat,
96 96 urllibcompat,
97 97 util,
98 98 )
99 99 from .utils import procutil
100 100
101 101 httplib = util.httplib
102 102 urlerr = util.urlerr
103 103 urlreq = util.urlreq
104 104
105 105 DEBUG = None
106 106
107 107
108 108 class ConnectionManager:
109 109 """
110 110 The connection manager must be able to:
111 111 * keep track of all existing
112 112 """
113 113
114 114 def __init__(self):
115 115 self._lock = threading.Lock()
116 116 self._hostmap = collections.defaultdict(list) # host -> [connection]
117 117 self._connmap = {} # map connections to host
118 118 self._readymap = {} # map connection to ready state
119 119
120 120 def add(self, host, connection, ready):
121 121 self._lock.acquire()
122 122 try:
123 123 self._hostmap[host].append(connection)
124 124 self._connmap[connection] = host
125 125 self._readymap[connection] = ready
126 126 finally:
127 127 self._lock.release()
128 128
129 129 def remove(self, connection):
130 130 self._lock.acquire()
131 131 try:
132 132 try:
133 133 host = self._connmap[connection]
134 134 except KeyError:
135 135 pass
136 136 else:
137 137 del self._connmap[connection]
138 138 del self._readymap[connection]
139 139 self._hostmap[host].remove(connection)
140 140 if not self._hostmap[host]:
141 141 del self._hostmap[host]
142 142 finally:
143 143 self._lock.release()
144 144
145 145 def set_ready(self, connection, ready):
146 146 try:
147 147 self._readymap[connection] = ready
148 148 except KeyError:
149 149 pass
150 150
151 151 def get_ready_conn(self, host):
152 152 conn = None
153 153 self._lock.acquire()
154 154 try:
155 155 for c in self._hostmap[host]:
156 156 if self._readymap[c]:
157 157 self._readymap[c] = False
158 158 conn = c
159 159 break
160 160 finally:
161 161 self._lock.release()
162 162 return conn
163 163
164 164 def get_all(self, host=None):
165 165 if host:
166 166 return list(self._hostmap[host])
167 167 else:
168 168 return dict(
169 169 {h: list(conns) for (h, conns) in self._hostmap.items()}
170 170 )
171 171
172 172
173 173 class KeepAliveHandler:
174 174 def __init__(self, timeout=None):
175 175 self._cm = ConnectionManager()
176 176 self._timeout = timeout
177 177 self.requestscount = 0
178 178 self.sentbytescount = 0
179 179
180 180 #### Connection Management
181 181 def open_connections(self):
182 182 """return a list of connected hosts and the number of connections
183 183 to each. [('foo.com:80', 2), ('bar.org', 1)]"""
184 184 return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
185 185
186 186 def close_connection(self, host):
187 187 """close connection(s) to <host>
188 188 host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
189 189 no error occurs if there is no connection to that host."""
190 190 for h in self._cm.get_all(host):
191 191 self._cm.remove(h)
192 192 h.close()
193 193
194 194 def close_all(self):
195 195 """close all open connections"""
196 196 for host, conns in self._cm.get_all().items():
197 197 for h in conns:
198 198 self._cm.remove(h)
199 199 h.close()
200 200
201 201 def _request_closed(self, request, host, connection):
202 202 """tells us that this request is now closed and that the
203 203 connection is ready for another request"""
204 204 self._cm.set_ready(connection, True)
205 205
206 206 def _remove_connection(self, host, connection, close=0):
207 207 if close:
208 208 connection.close()
209 209 self._cm.remove(connection)
210 210
211 211 #### Transaction Execution
212 212 def http_open(self, req):
213 213 return self.do_open(HTTPConnection, req)
214 214
215 215 def do_open(self, http_class, req):
216 216 host = urllibcompat.gethost(req)
217 217 if not host:
218 218 raise urlerr.urlerror(b'no host given')
219 219
220 220 try:
221 221 h = self._cm.get_ready_conn(host)
222 222 while h:
223 223 r = self._reuse_connection(h, req, host)
224 224
225 225 # if this response is non-None, then it worked and we're
226 226 # done. Break out, skipping the else block.
227 227 if r:
228 228 break
229 229
230 230 # connection is bad - possibly closed by server
231 231 # discard it and ask for the next free connection
232 232 h.close()
233 233 self._cm.remove(h)
234 234 h = self._cm.get_ready_conn(host)
235 235 else:
236 236 # no (working) free connections were found. Create a new one.
237 237 h = http_class(host, timeout=self._timeout)
238 238 if DEBUG:
239 239 DEBUG.info(
240 240 b"creating new connection to %s (%d)", host, id(h)
241 241 )
242 242 self._cm.add(host, h, False)
243 243 self._start_transaction(h, req)
244 244 r = h.getresponse()
245 245 # The string form of BadStatusLine is the status line. Add some context
246 246 # to make the error message slightly more useful.
247 247 except httplib.BadStatusLine as err:
248 248 raise urlerr.urlerror(
249 249 _(b'bad HTTP status line: %s') % pycompat.sysbytes(err.line)
250 250 )
251 251 except (socket.error, httplib.HTTPException) as err:
252 252 raise urlerr.urlerror(err)
253 253
254 254 # If not a persistent connection, don't try to reuse it. Look
255 255 # for this using getattr() since vcr doesn't define this
256 256 # attribute, and in that case always close the connection.
257 257 if getattr(r, 'will_close', True):
258 258 self._cm.remove(h)
259 259
260 260 if DEBUG:
261 261 DEBUG.info(b"STATUS: %s, %s", r.status, r.reason)
262 262 r._handler = self
263 263 r._host = host
264 264 r._url = req.get_full_url()
265 265 r._connection = h
266 266 r.code = r.status
267 267 r.headers = r.msg
268 268 r.msg = r.reason
269 269
270 270 return r
271 271
272 272 def _reuse_connection(self, h, req, host):
273 273 """start the transaction with a re-used connection
274 274 return a response object (r) upon success or None on failure.
275 275 This DOES not close or remove bad connections in cases where
276 276 it returns. However, if an unexpected exception occurs, it
277 277 will close and remove the connection before re-raising.
278 278 """
279 279 try:
280 280 self._start_transaction(h, req)
281 281 r = h.getresponse()
282 282 # note: just because we got something back doesn't mean it
283 283 # worked. We'll check the version below, too.
284 284 except (socket.error, httplib.HTTPException):
285 285 r = None
286 286 except: # re-raises
287 287 # adding this block just in case we've missed
288 288 # something we will still raise the exception, but
289 289 # lets try and close the connection and remove it
290 290 # first. We previously got into a nasty loop
291 291 # where an exception was uncaught, and so the
292 292 # connection stayed open. On the next try, the
293 293 # same exception was raised, etc. The trade-off is
294 294 # that it's now possible this call will raise
295 295 # a DIFFERENT exception
296 296 if DEBUG:
297 297 DEBUG.error(
298 298 b"unexpected exception - closing connection to %s (%d)",
299 299 host,
300 300 id(h),
301 301 )
302 302 self._cm.remove(h)
303 303 h.close()
304 304 raise
305 305
306 306 if r is None or r.version == 9:
307 307 # httplib falls back to assuming HTTP 0.9 if it gets a
308 308 # bad header back. This is most likely to happen if
309 309 # the socket has been closed by the server since we
310 310 # last used the connection.
311 311 if DEBUG:
312 312 DEBUG.info(
313 313 b"failed to re-use connection to %s (%d)", host, id(h)
314 314 )
315 315 r = None
316 316 else:
317 317 if DEBUG:
318 318 DEBUG.info(b"re-using connection to %s (%d)", host, id(h))
319 319
320 320 return r
321 321
322 322 def _start_transaction(self, h, req):
323 323 oldbytescount = getattr(h, 'sentbytescount', 0)
324 324
325 325 # What follows mostly reimplements HTTPConnection.request()
326 326 # except it adds self.parent.addheaders in the mix and sends headers
327 327 # in a deterministic order (to make testing easier).
328 328 headers = util.sortdict(self.parent.addheaders)
329 329 headers.update(sorted(req.headers.items()))
330 330 headers.update(sorted(req.unredirected_hdrs.items()))
331 331 headers = util.sortdict((n.lower(), v) for n, v in headers.items())
332 332 skipheaders = {}
333 333 for n in ('host', 'accept-encoding'):
334 334 if n in headers:
335 335 skipheaders['skip_' + n.replace('-', '_')] = 1
336 336 try:
337 337 if urllibcompat.hasdata(req):
338 338 data = urllibcompat.getdata(req)
339 339 h.putrequest(
340 340 req.get_method(),
341 341 urllibcompat.getselector(req),
342 342 **skipheaders
343 343 )
344 344 if 'content-type' not in headers:
345 345 h.putheader(
346 346 'Content-type', 'application/x-www-form-urlencoded'
347 347 )
348 348 if 'content-length' not in headers:
349 349 h.putheader('Content-length', '%d' % len(data))
350 350 else:
351 351 h.putrequest(
352 352 req.get_method(),
353 353 urllibcompat.getselector(req),
354 354 **skipheaders
355 355 )
356 356 except socket.error as err:
357 357 raise urlerr.urlerror(err)
358 358 for k, v in headers.items():
359 359 h.putheader(k, v)
360 360 h.endheaders()
361 361 if urllibcompat.hasdata(req):
362 362 h.send(data)
363 363
364 364 # This will fail to record events in case of I/O failure. That's OK.
365 365 self.requestscount += 1
366 366 self.sentbytescount += getattr(h, 'sentbytescount', 0) - oldbytescount
367 367
368 368 try:
369 369 self.parent.requestscount += 1
370 370 self.parent.sentbytescount += (
371 371 getattr(h, 'sentbytescount', 0) - oldbytescount
372 372 )
373 373 except AttributeError:
374 374 pass
375 375
376 376
377 377 class HTTPHandler(KeepAliveHandler, urlreq.httphandler):
378 378 pass
379 379
380 380
381 381 class HTTPResponse(httplib.HTTPResponse):
382 382 # we need to subclass HTTPResponse in order to
383 # 1) add readline(), readlines(), and readinto() methods
384 # 2) add close_connection() methods
385 # 3) add info() and geturl() methods
386
387 # in order to add readline(), read must be modified to deal with a
388 # buffer. example: readline must read a buffer and then spit back
389 # one line at a time. The only real alternative is to read one
390 # BYTE at a time (ick). Once something has been read, it can't be
391 # put back (ok, maybe it can, but that's even uglier than this),
392 # so if you THEN do a normal read, you must first take stuff from
393 # the buffer.
394
395 # the read method wraps the original to accommodate buffering,
396 # although read() never adds to the buffer.
397 # Both readline and readlines have been stolen with almost no
398 # modification from socket.py
383 # 1) add close_connection() methods
384 # 2) add info() and geturl() methods
385 # 3) add accounting for read(), readlines() and readinto()
399 386
400 387 def __init__(self, sock, debuglevel=0, strict=0, method=None):
401 388 httplib.HTTPResponse.__init__(
402 389 self, sock, debuglevel=debuglevel, method=method
403 390 )
404 391 self.fileno = sock.fileno
405 392 self.code = None
406 393 self.receivedbytescount = 0
407 394 self._rbuf = b''
408 395 self._rbufsize = 8096
409 396 self._handler = None # inserted by the handler later
410 397 self._host = None # (same)
411 398 self._url = None # (same)
412 399 self._connection = None # (same)
413 400
414 _raw_read = httplib.HTTPResponse.read
415 _raw_readinto = getattr(httplib.HTTPResponse, 'readinto', None)
416
417 401 # Python 2.7 has a single close() which closes the socket handle.
418 402 # This method was effectively renamed to _close_conn() in Python 3. But
419 403 # there is also a close(). _close_conn() is called by methods like
420 404 # read().
421 405
422 406 def close(self):
423 407 if self.fp:
424 408 self.fp.close()
425 409 self.fp = None
426 410 if self._handler:
427 411 self._handler._request_closed(
428 412 self, self._host, self._connection
429 413 )
430 414
431 415 def _close_conn(self):
432 416 self.close()
433 417
434 418 def close_connection(self):
435 419 self._handler._remove_connection(self._host, self._connection, close=1)
436 420 self.close()
437 421
438 422 def info(self):
439 423 return self.headers
440 424
441 425 def geturl(self):
442 426 return self._url
443 427
444 428 def read(self, amt=None):
445 # the _rbuf test is only in this first if for speed. It's not
446 # logically necessary
447 if self._rbuf and amt is not None:
448 L = len(self._rbuf)
449 if amt > L:
450 amt -= L
451 else:
452 s = self._rbuf[:amt]
453 self._rbuf = self._rbuf[amt:]
454 return s
455 # Careful! http.client.HTTPResponse.read() on Python 3 is
456 # implemented using readinto(), which can duplicate self._rbuf
457 # if it's not empty.
458 s = self._rbuf
459 self._rbuf = b''
460 data = self._raw_read(amt)
461
429 data = super().read(amt)
462 430 self.receivedbytescount += len(data)
463 431 try:
464 432 self._connection.receivedbytescount += len(data)
465 433 except AttributeError:
466 434 pass
467 435 try:
468 436 self._handler.parent.receivedbytescount += len(data)
469 437 except AttributeError:
470 438 pass
471
472 s += data
473 return s
474
475 # stolen from Python SVN #68532 to fix issue1088
476 def _read_chunked(self, amt):
477 chunk_left = self.chunk_left
478 parts = []
479
480 while True:
481 if chunk_left is None:
482 line = self.fp.readline()
483 i = line.find(b';')
484 if i >= 0:
485 line = line[:i] # strip chunk-extensions
486 try:
487 chunk_left = int(line, 16)
488 except ValueError:
489 # close the connection as protocol synchronization is
490 # probably lost
491 self.close()
492 raise httplib.IncompleteRead(b''.join(parts))
493 if chunk_left == 0:
494 break
495 if amt is None:
496 parts.append(self._safe_read(chunk_left))
497 elif amt < chunk_left:
498 parts.append(self._safe_read(amt))
499 self.chunk_left = chunk_left - amt
500 return b''.join(parts)
501 elif amt == chunk_left:
502 parts.append(self._safe_read(amt))
503 self._safe_read(2) # toss the CRLF at the end of the chunk
504 self.chunk_left = None
505 return b''.join(parts)
506 else:
507 parts.append(self._safe_read(chunk_left))
508 amt -= chunk_left
509
510 # we read the whole chunk, get another
511 self._safe_read(2) # toss the CRLF at the end of the chunk
512 chunk_left = None
513
514 # read and discard trailer up to the CRLF terminator
515 ### note: we shouldn't have any trailers!
516 while True:
517 line = self.fp.readline()
518 if not line:
519 # a vanishingly small number of sites EOF without
520 # sending the trailer
521 break
522 if line == b'\r\n':
523 break
524
525 # we read everything; close the "file"
526 self.close()
527
528 return b''.join(parts)
439 return data
529 440
530 441 def readline(self):
531 # Fast path for a line is already available in read buffer.
532 i = self._rbuf.find(b'\n')
533 if i >= 0:
534 i += 1
535 line = self._rbuf[:i]
536 self._rbuf = self._rbuf[i:]
537 return line
538
539 # No newline in local buffer. Read until we find one.
540 # readinto read via readinto will already return _rbuf
541 if self._raw_readinto is None:
542 chunks = [self._rbuf]
543 else:
544 chunks = []
545 i = -1
546 readsize = self._rbufsize
547 while True:
548 new = self._raw_read(readsize)
549 if not new:
550 break
551
552 self.receivedbytescount += len(new)
553 self._connection.receivedbytescount += len(new)
442 data = super().readline()
443 self.receivedbytescount += len(data)
554 444 try:
555 self._handler.parent.receivedbytescount += len(new)
445 self._connection.receivedbytescount += len(data)
446 except AttributeError:
447 pass
448 try:
449 self._handler.parent.receivedbytescount += len(data)
556 450 except AttributeError:
557 451 pass
558
559 chunks.append(new)
560 i = new.find(b'\n')
561 if i >= 0:
562 break
563
564 # We either have exhausted the stream or have a newline in chunks[-1].
565
566 # EOF
567 if i == -1:
568 self._rbuf = b''
569 return b''.join(chunks)
570
571 i += 1
572 self._rbuf = chunks[-1][i:]
573 chunks[-1] = chunks[-1][:i]
574 return b''.join(chunks)
452 return data
575 453
576 454 def readinto(self, dest):
577 if self._raw_readinto is None:
578 res = self.read(len(dest))
579 if not res:
580 return 0
581 dest[0 : len(res)] = res
582 return len(res)
583 total = len(dest)
584 have = len(self._rbuf)
585 if have >= total:
586 dest[0:total] = self._rbuf[:total]
587 self._rbuf = self._rbuf[total:]
588 return total
589 mv = memoryview(dest)
590 got = self._raw_readinto(mv[have:total])
591
455 got = super().readinto(dest)
592 456 self.receivedbytescount += got
457 try:
593 458 self._connection.receivedbytescount += got
594 try:
595 self._handler.receivedbytescount += got
596 459 except AttributeError:
597 460 pass
598
599 dest[0:have] = self._rbuf
600 got += len(self._rbuf)
601 self._rbuf = b''
461 try:
462 self._handler.parent.receivedbytescount += got
463 except AttributeError:
464 pass
602 465 return got
603 466
604 467
605 468 def safesend(self, str):
606 469 """Send `str' to the server.
607 470
608 471 Shamelessly ripped off from httplib to patch a bad behavior.
609 472 """
610 473 # _broken_pipe_resp is an attribute we set in this function
611 474 # if the socket is closed while we're sending data but
612 475 # the server sent us a response before hanging up.
613 476 # In that case, we want to pretend to send the rest of the
614 477 # outgoing data, and then let the user use getresponse()
615 478 # (which we wrap) to get this last response before
616 479 # opening a new socket.
617 480 if getattr(self, '_broken_pipe_resp', None) is not None:
618 481 return
619 482
620 483 if self.sock is None:
621 484 if self.auto_open:
622 485 self.connect()
623 486 else:
624 487 raise httplib.NotConnected
625 488
626 489 # send the data to the server. if we get a broken pipe, then close
627 490 # the socket. we want to reconnect when somebody tries to send again.
628 491 #
629 492 # NOTE: we DO propagate the error, though, because we cannot simply
630 493 # ignore the error... the caller will know if they can retry.
631 494 if self.debuglevel > 0:
632 495 print(b"send:", repr(str))
633 496 try:
634 497 blocksize = 8192
635 498 read = getattr(str, 'read', None)
636 499 if read is not None:
637 500 if self.debuglevel > 0:
638 501 print(b"sending a read()able")
639 502 data = read(blocksize)
640 503 while data:
641 504 self.sock.sendall(data)
642 505 self.sentbytescount += len(data)
643 506 data = read(blocksize)
644 507 else:
645 508 self.sock.sendall(str)
646 509 self.sentbytescount += len(str)
647 510 except BrokenPipeError:
648 511 if self._HTTPConnection__state == httplib._CS_REQ_SENT:
649 512 self._broken_pipe_resp = None
650 513 self._broken_pipe_resp = self.getresponse()
651 514 reraise = False
652 515 else:
653 516 reraise = True
654 517 self.close()
655 518 if reraise:
656 519 raise
657 520
658 521
659 522 def wrapgetresponse(cls):
660 523 """Wraps getresponse in cls with a broken-pipe sane version."""
661 524
662 525 def safegetresponse(self):
663 526 # In safesend() we might set the _broken_pipe_resp
664 527 # attribute, in which case the socket has already
665 528 # been closed and we just need to give them the response
666 529 # back. Otherwise, we use the normal response path.
667 530 r = getattr(self, '_broken_pipe_resp', None)
668 531 if r is not None:
669 532 return r
670 533 return cls.getresponse(self)
671 534
672 535 safegetresponse.__doc__ = cls.getresponse.__doc__
673 536 return safegetresponse
674 537
675 538
676 539 class HTTPConnection(httplib.HTTPConnection):
677 540 # url.httpsconnection inherits from this. So when adding/removing
678 541 # attributes, be sure to audit httpsconnection() for unintended
679 542 # consequences.
680 543
681 544 # use the modified response class
682 545 response_class = HTTPResponse
683 546 send = safesend
684 547 getresponse = wrapgetresponse(httplib.HTTPConnection)
685 548
686 549 def __init__(self, *args, **kwargs):
687 550 httplib.HTTPConnection.__init__(self, *args, **kwargs)
688 551 self.sentbytescount = 0
689 552 self.receivedbytescount = 0
690 553
691 554 def __repr__(self):
692 555 base = super(HTTPConnection, self).__repr__()
693 556 local = "(unconnected)"
694 557 s = self.sock
695 558 if s:
696 559 try:
697 560 local = "%s:%d" % s.getsockname()
698 561 except OSError:
699 562 pass # Likely not connected
700 563 return "<%s: %s <--> %s:%d>" % (base, local, self.host, self.port)
701 564
702 565
703 566 #########################################################################
704 567 ##### TEST FUNCTIONS
705 568 #########################################################################
706 569
707 570
708 571 def continuity(url):
709 572 md5 = hashlib.md5
710 573 format = b'%25s: %s'
711 574
712 575 # first fetch the file with the normal http handler
713 576 opener = urlreq.buildopener()
714 577 urlreq.installopener(opener)
715 578 fo = urlreq.urlopen(url)
716 579 foo = fo.read()
717 580 fo.close()
718 581 m = md5(foo)
719 582 print(format % (b'normal urllib', hex(m.digest())))
720 583
721 584 # now install the keepalive handler and try again
722 585 opener = urlreq.buildopener(HTTPHandler())
723 586 urlreq.installopener(opener)
724 587
725 588 fo = urlreq.urlopen(url)
726 589 foo = fo.read()
727 590 fo.close()
728 591 m = md5(foo)
729 592 print(format % (b'keepalive read', hex(m.digest())))
730 593
731 594 fo = urlreq.urlopen(url)
732 595 foo = b''
733 596 while True:
734 597 f = fo.readline()
735 598 if f:
736 599 foo = foo + f
737 600 else:
738 601 break
739 602 fo.close()
740 603 m = md5(foo)
741 604 print(format % (b'keepalive readline', hex(m.digest())))
742 605
743 606
744 607 def comp(N, url):
745 608 print(b' making %i connections to:\n %s' % (N, url))
746 609
747 610 procutil.stdout.write(b' first using the normal urllib handlers')
748 611 # first use normal opener
749 612 opener = urlreq.buildopener()
750 613 urlreq.installopener(opener)
751 614 t1 = fetch(N, url)
752 615 print(b' TIME: %.3f s' % t1)
753 616
754 617 procutil.stdout.write(b' now using the keepalive handler ')
755 618 # now install the keepalive handler and try again
756 619 opener = urlreq.buildopener(HTTPHandler())
757 620 urlreq.installopener(opener)
758 621 t2 = fetch(N, url)
759 622 print(b' TIME: %.3f s' % t2)
760 623 print(b' improvement factor: %.2f' % (t1 / t2))
761 624
762 625
763 626 def fetch(N, url, delay=0):
764 627 import time
765 628
766 629 lens = []
767 630 starttime = time.time()
768 631 for i in range(N):
769 632 if delay and i > 0:
770 633 time.sleep(delay)
771 634 fo = urlreq.urlopen(url)
772 635 foo = fo.read()
773 636 fo.close()
774 637 lens.append(len(foo))
775 638 diff = time.time() - starttime
776 639
777 640 j = 0
778 641 for i in lens[1:]:
779 642 j = j + 1
780 643 if not i == lens[0]:
781 644 print(b"WARNING: inconsistent length on read %i: %i" % (j, i))
782 645
783 646 return diff
784 647
785 648
786 649 def test_timeout(url):
787 650 global DEBUG
788 651 dbbackup = DEBUG
789 652
790 653 class FakeLogger:
791 654 def debug(self, msg, *args):
792 655 print(msg % args)
793 656
794 657 info = warning = error = debug
795 658
796 659 DEBUG = FakeLogger()
797 660 print(b" fetching the file to establish a connection")
798 661 fo = urlreq.urlopen(url)
799 662 data1 = fo.read()
800 663 fo.close()
801 664
802 665 i = 20
803 666 print(b" waiting %i seconds for the server to close the connection" % i)
804 667 while i > 0:
805 668 procutil.stdout.write(b'\r %2i' % i)
806 669 procutil.stdout.flush()
807 670 time.sleep(1)
808 671 i -= 1
809 672 procutil.stderr.write(b'\r')
810 673
811 674 print(b" fetching the file a second time")
812 675 fo = urlreq.urlopen(url)
813 676 data2 = fo.read()
814 677 fo.close()
815 678
816 679 if data1 == data2:
817 680 print(b' data are identical')
818 681 else:
819 682 print(b' ERROR: DATA DIFFER')
820 683
821 684 DEBUG = dbbackup
822 685
823 686
824 687 def test(url, N=10):
825 688 print(b"performing continuity test (making sure stuff isn't corrupted)")
826 689 continuity(url)
827 690 print(b'')
828 691 print(b"performing speed comparison")
829 692 comp(N, url)
830 693 print(b'')
831 694 print(b"performing dropped-connection check")
832 695 test_timeout(url)
833 696
834 697
835 698 if __name__ == '__main__':
836 699 import time
837 700
838 701 try:
839 702 N = int(sys.argv[1])
840 703 url = sys.argv[2]
841 704 except (IndexError, ValueError):
842 705 print(b"%s <integer> <url>" % sys.argv[0])
843 706 else:
844 707 test(url, N)
General Comments 0
You need to be logged in to leave comments. Login now