Show More
keepalive.py
851 lines
| 27.1 KiB
| text/x-python
|
PythonLexer
/ mercurial / keepalive.py
Vadim Gelfer
|
r2435 | # This library is free software; you can redistribute it and/or | ||
# modify it under the terms of the GNU Lesser General Public | ||||
# License as published by the Free Software Foundation; either | ||||
# version 2.1 of the License, or (at your option) any later version. | ||||
# | ||||
# This library is distributed in the hope that it will be useful, | ||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||||
# Lesser General Public License for more details. | ||||
# | ||||
# You should have received a copy of the GNU Lesser General Public | ||||
Martin Geisler
|
r15782 | # License along with this library; if not, see | ||
# <http://www.gnu.org/licenses/>. | ||||
Vadim Gelfer
|
r2435 | |||
# This file is part of urlgrabber, a high-level cross-protocol url-grabber | ||||
# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko | ||||
Benoit Boissinot
|
r4026 | # Modified by Benoit Boissinot: | ||
# - fix for digest auth (inspired from urllib2.py @ Python v2.4) | ||||
Dirkjan Ochtman
|
r6470 | # Modified by Dirkjan Ochtman: | ||
# - import md5 function from a local util module | ||||
Augie Fackler
|
r9726 | # Modified by Augie Fackler: | ||
# - add safesend method and use it to prevent broken pipe errors | ||||
# on large POST requests | ||||
Benoit Boissinot
|
r4026 | |||
Vadim Gelfer
|
r2435 | """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive. | ||
>>> import urllib2 | ||||
>>> from keepalive import HTTPHandler | ||||
>>> keepalive_handler = HTTPHandler() | ||||
timeless
|
r28883 | >>> opener = urlreq.buildopener(keepalive_handler) | ||
>>> urlreq.installopener(opener) | ||||
Vadim Gelfer
|
r2600 | >>> | ||
timeless
|
r28883 | >>> fo = urlreq.urlopen('http://www.python.org') | ||
Vadim Gelfer
|
r2435 | |||
If a connection to a given host is requested, and all of the existing | ||||
connections are still in use, another connection will be opened. If | ||||
the handler tries to use an existing connection but it fails in some | ||||
way, it will be closed and removed from the pool. | ||||
To remove the handler, simply re-run build_opener with no arguments, and | ||||
install that opener. | ||||
You can explicitly close connections by using the close_connection() | ||||
method of the returned file-like object (described below) or you can | ||||
use the handler methods: | ||||
close_connection(host) | ||||
close_all() | ||||
open_connections() | ||||
NOTE: using the close_connection and close_all methods of the handler | ||||
should be done with care when using multiple threads. | ||||
* there is nothing that prevents another thread from creating new | ||||
connections immediately after connections are closed | ||||
* no checks are done to prevent in-use connections from being closed | ||||
>>> keepalive_handler.close_all() | ||||
EXTRA ATTRIBUTES AND METHODS | ||||
Upon a status of 200, the object returned has a few additional | ||||
attributes and methods, which should not be used if you want to | ||||
remain consistent with the normal urllib2-returned objects: | ||||
close_connection() - close the connection to the host | ||||
readlines() - you know, readlines() | ||||
Mads Kiilerich
|
r17424 | status - the return status (i.e. 404) | ||
reason - english translation of status (i.e. 'File not found') | ||||
Vadim Gelfer
|
r2435 | |||
If you want the best of both worlds, use this inside an | ||||
AttributeError-catching try: | ||||
>>> try: status = fo.status | ||||
>>> except AttributeError: status = None | ||||
Unfortunately, these are ONLY there if status == 200, so it's not | ||||
easy to distinguish between non-200 responses. The reason is that | ||||
urllib2 tries to do clever things with error codes 301, 302, 401, | ||||
and 407, and it wraps the object upon return. | ||||
""" | ||||
Alexis S. L. Carvalho
|
r2444 | # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $ | ||
Vadim Gelfer
|
r2435 | |||
Gregory Szorc
|
r27616 | from __future__ import absolute_import, print_function | ||
Gregory Szorc
|
r27507 | |||
Gregory Szorc
|
r41453 | import collections | ||
Augie Fackler
|
r9726 | import errno | ||
Augie Fackler
|
r29341 | import hashlib | ||
Vadim Gelfer
|
r2435 | import socket | ||
Gregory Szorc
|
r27507 | import sys | ||
Pulkit Goyal
|
r29456 | import threading | ||
timeless
|
r28883 | |||
Gregory Szorc
|
r34309 | from .i18n import _ | ||
Gregory Szorc
|
r43359 | from .pycompat import getattr | ||
Joerg Sonnenberger
|
r46729 | from .node import hex | ||
timeless
|
r28883 | from . import ( | ||
Augie Fackler
|
r34428 | pycompat, | ||
Augie Fackler
|
r34467 | urllibcompat, | ||
timeless
|
r28883 | util, | ||
) | ||||
Augie Fackler
|
r43346 | from .utils import procutil | ||
timeless
|
r28883 | |||
Pulkit Goyal
|
r29455 | httplib = util.httplib | ||
timeless
|
r28883 | urlerr = util.urlerr | ||
urlreq = util.urlreq | ||||
Vadim Gelfer
|
r2435 | |||
DEBUG = None | ||||
Augie Fackler
|
r43346 | |||
Thomas Arendsen Hein
|
r14764 | class ConnectionManager(object): | ||
Vadim Gelfer
|
r2435 | """ | ||
The connection manager must be able to: | ||||
* keep track of all existing | ||||
Augie Fackler
|
r46554 | """ | ||
Augie Fackler
|
r43346 | |||
Vadim Gelfer
|
r2435 | def __init__(self): | ||
Pulkit Goyal
|
r29456 | self._lock = threading.Lock() | ||
Augie Fackler
|
r43346 | self._hostmap = collections.defaultdict(list) # host -> [connection] | ||
self._connmap = {} # map connections to host | ||||
self._readymap = {} # map connection to ready state | ||||
Vadim Gelfer
|
r2435 | |||
def add(self, host, connection, ready): | ||||
self._lock.acquire() | ||||
try: | ||||
self._hostmap[host].append(connection) | ||||
self._connmap[connection] = host | ||||
self._readymap[connection] = ready | ||||
finally: | ||||
self._lock.release() | ||||
def remove(self, connection): | ||||
self._lock.acquire() | ||||
try: | ||||
try: | ||||
host = self._connmap[connection] | ||||
except KeyError: | ||||
pass | ||||
else: | ||||
del self._connmap[connection] | ||||
del self._readymap[connection] | ||||
self._hostmap[host].remove(connection) | ||||
Alex Gaynor
|
r34436 | if not self._hostmap[host]: | ||
del self._hostmap[host] | ||||
Vadim Gelfer
|
r2435 | finally: | ||
self._lock.release() | ||||
def set_ready(self, connection, ready): | ||||
Matt Mackall
|
r10282 | try: | ||
self._readymap[connection] = ready | ||||
except KeyError: | ||||
pass | ||||
Vadim Gelfer
|
r2600 | |||
Vadim Gelfer
|
r2435 | def get_ready_conn(self, host): | ||
conn = None | ||||
self._lock.acquire() | ||||
try: | ||||
Gregory Szorc
|
r41453 | for c in self._hostmap[host]: | ||
if self._readymap[c]: | ||||
Gregory Szorc
|
r41454 | self._readymap[c] = False | ||
Gregory Szorc
|
r41453 | conn = c | ||
break | ||||
Vadim Gelfer
|
r2435 | finally: | ||
self._lock.release() | ||||
return conn | ||||
def get_all(self, host=None): | ||||
if host: | ||||
Gregory Szorc
|
r41453 | return list(self._hostmap[host]) | ||
Vadim Gelfer
|
r2435 | else: | ||
return dict(self._hostmap) | ||||
Augie Fackler
|
r43346 | |||
Thomas Arendsen Hein
|
r14764 | class KeepAliveHandler(object): | ||
Cédric Krier
|
r40079 | def __init__(self, timeout=None): | ||
Vadim Gelfer
|
r2435 | self._cm = ConnectionManager() | ||
Cédric Krier
|
r40079 | self._timeout = timeout | ||
Gregory Szorc
|
r40068 | self.requestscount = 0 | ||
self.sentbytescount = 0 | ||||
Vadim Gelfer
|
r2600 | |||
Vadim Gelfer
|
r2435 | #### Connection Management | ||
def open_connections(self): | ||||
"""return a list of connected hosts and the number of connections | ||||
to each. [('foo.com:80', 2), ('bar.org', 1)]""" | ||||
return [(host, len(li)) for (host, li) in self._cm.get_all().items()] | ||||
def close_connection(self, host): | ||||
"""close connection(s) to <host> | ||||
host is the host:port spec, as in 'www.cnn.com:8080' as passed in. | ||||
no error occurs if there is no connection to that host.""" | ||||
for h in self._cm.get_all(host): | ||||
self._cm.remove(h) | ||||
h.close() | ||||
Vadim Gelfer
|
r2600 | |||
Vadim Gelfer
|
r2435 | def close_all(self): | ||
"""close all open connections""" | ||||
Gregory Szorc
|
r43376 | for host, conns in pycompat.iteritems(self._cm.get_all()): | ||
Vadim Gelfer
|
r2435 | for h in conns: | ||
self._cm.remove(h) | ||||
h.close() | ||||
Vadim Gelfer
|
r2600 | |||
Vadim Gelfer
|
r2435 | def _request_closed(self, request, host, connection): | ||
Mads Kiilerich
|
r17251 | """tells us that this request is now closed and that the | ||
Vadim Gelfer
|
r2435 | connection is ready for another request""" | ||
Gregory Szorc
|
r41454 | self._cm.set_ready(connection, True) | ||
Vadim Gelfer
|
r2435 | |||
def _remove_connection(self, host, connection, close=0): | ||||
Matt Mackall
|
r10282 | if close: | ||
connection.close() | ||||
Vadim Gelfer
|
r2435 | self._cm.remove(connection) | ||
Vadim Gelfer
|
r2600 | |||
Vadim Gelfer
|
r2435 | #### Transaction Execution | ||
def http_open(self, req): | ||||
return self.do_open(HTTPConnection, req) | ||||
def do_open(self, http_class, req): | ||||
Augie Fackler
|
r34467 | host = urllibcompat.gethost(req) | ||
Vadim Gelfer
|
r2435 | if not host: | ||
Augie Fackler
|
r43347 | raise urlerr.urlerror(b'no host given') | ||
Vadim Gelfer
|
r2435 | |||
try: | ||||
h = self._cm.get_ready_conn(host) | ||||
while h: | ||||
r = self._reuse_connection(h, req, host) | ||||
# if this response is non-None, then it worked and we're | ||||
# done. Break out, skipping the else block. | ||||
Matt Mackall
|
r10282 | if r: | ||
break | ||||
Vadim Gelfer
|
r2435 | |||
# connection is bad - possibly closed by server | ||||
# discard it and ask for the next free connection | ||||
h.close() | ||||
self._cm.remove(h) | ||||
h = self._cm.get_ready_conn(host) | ||||
else: | ||||
# no (working) free connections were found. Create a new one. | ||||
Cédric Krier
|
r40079 | h = http_class(host, timeout=self._timeout) | ||
Matt Mackall
|
r10282 | if DEBUG: | ||
Augie Fackler
|
r43346 | DEBUG.info( | ||
Augie Fackler
|
r43347 | b"creating new connection to %s (%d)", host, id(h) | ||
Augie Fackler
|
r43346 | ) | ||
Gregory Szorc
|
r41454 | self._cm.add(host, h, False) | ||
Vadim Gelfer
|
r2435 | self._start_transaction(h, req) | ||
r = h.getresponse() | ||||
Gregory Szorc
|
r34309 | # The string form of BadStatusLine is the status line. Add some context | ||
# to make the error message slightly more useful. | ||||
except httplib.BadStatusLine as err: | ||||
Augie Fackler
|
r34428 | raise urlerr.urlerror( | ||
Augie Fackler
|
r43347 | _(b'bad HTTP status line: %s') % pycompat.sysbytes(err.line) | ||
Augie Fackler
|
r43346 | ) | ||
Gregory Szorc
|
r25660 | except (socket.error, httplib.HTTPException) as err: | ||
timeless
|
r28883 | raise urlerr.urlerror(err) | ||
Vadim Gelfer
|
r2600 | |||
Augie Fackler
|
r39685 | # If not a persistent connection, don't try to reuse it. Look | ||
# for this using getattr() since vcr doesn't define this | ||||
# attribute, and in that case always close the connection. | ||||
Gregory Szorc
|
r43373 | if getattr(r, 'will_close', True): | ||
Matt Mackall
|
r10282 | self._cm.remove(h) | ||
Vadim Gelfer
|
r2435 | |||
Matt Mackall
|
r10282 | if DEBUG: | ||
Augie Fackler
|
r43347 | DEBUG.info(b"STATUS: %s, %s", r.status, r.reason) | ||
Vadim Gelfer
|
r2435 | r._handler = self | ||
r._host = host | ||||
r._url = req.get_full_url() | ||||
r._connection = h | ||||
r.code = r.status | ||||
Alexis S. L. Carvalho
|
r2444 | r.headers = r.msg | ||
r.msg = r.reason | ||||
Vadim Gelfer
|
r2600 | |||
Augie Fackler
|
r30487 | return r | ||
Vadim Gelfer
|
r2435 | |||
def _reuse_connection(self, h, req, host): | ||||
"""start the transaction with a re-used connection | ||||
return a response object (r) upon success or None on failure. | ||||
This DOES not close or remove bad connections in cases where | ||||
it returns. However, if an unexpected exception occurs, it | ||||
will close and remove the connection before re-raising. | ||||
""" | ||||
try: | ||||
self._start_transaction(h, req) | ||||
r = h.getresponse() | ||||
# note: just because we got something back doesn't mean it | ||||
# worked. We'll check the version below, too. | ||||
except (socket.error, httplib.HTTPException): | ||||
r = None | ||||
Augie Fackler
|
r43346 | except: # re-raises | ||
Vadim Gelfer
|
r2435 | # adding this block just in case we've missed | ||
# something we will still raise the exception, but | ||||
# lets try and close the connection and remove it | ||||
# first. We previously got into a nasty loop | ||||
# where an exception was uncaught, and so the | ||||
# connection stayed open. On the next try, the | ||||
Mads Kiilerich
|
r17424 | # same exception was raised, etc. The trade-off is | ||
Vadim Gelfer
|
r2435 | # that it's now possible this call will raise | ||
# a DIFFERENT exception | ||||
Matt Mackall
|
r10282 | if DEBUG: | ||
Augie Fackler
|
r43346 | DEBUG.error( | ||
Martin von Zweigbergk
|
r43387 | b"unexpected exception - closing connection to %s (%d)", | ||
Augie Fackler
|
r43346 | host, | ||
id(h), | ||||
) | ||||
Vadim Gelfer
|
r2435 | self._cm.remove(h) | ||
h.close() | ||||
raise | ||||
Vadim Gelfer
|
r2600 | |||
Vadim Gelfer
|
r2435 | if r is None or r.version == 9: | ||
# httplib falls back to assuming HTTP 0.9 if it gets a | ||||
# bad header back. This is most likely to happen if | ||||
# the socket has been closed by the server since we | ||||
# last used the connection. | ||||
Matt Mackall
|
r10282 | if DEBUG: | ||
Augie Fackler
|
r43346 | DEBUG.info( | ||
Augie Fackler
|
r43347 | b"failed to re-use connection to %s (%d)", host, id(h) | ||
Augie Fackler
|
r43346 | ) | ||
Vadim Gelfer
|
r2435 | r = None | ||
else: | ||||
Matt Mackall
|
r10282 | if DEBUG: | ||
Augie Fackler
|
r43347 | DEBUG.info(b"re-using connection to %s (%d)", host, id(h)) | ||
Vadim Gelfer
|
r2435 | |||
return r | ||||
def _start_transaction(self, h, req): | ||||
Augie Fackler
|
r40415 | oldbytescount = getattr(h, 'sentbytescount', 0) | ||
Gregory Szorc
|
r40068 | |||
Patrick Mezard
|
r8233 | # What follows mostly reimplements HTTPConnection.request() | ||
Gregory Szorc
|
r31999 | # except it adds self.parent.addheaders in the mix and sends headers | ||
# in a deterministic order (to make testing easier). | ||||
headers = util.sortdict(self.parent.addheaders) | ||||
headers.update(sorted(req.headers.items())) | ||||
headers.update(sorted(req.unredirected_hdrs.items())) | ||||
headers = util.sortdict((n.lower(), v) for n, v in headers.items()) | ||||
Patrick Mezard
|
r8233 | skipheaders = {} | ||
Augie Fackler
|
r43906 | for n in ('host', 'accept-encoding'): | ||
Patrick Mezard
|
r8233 | if n in headers: | ||
Augie Fackler
|
r43906 | skipheaders['skip_' + n.replace('-', '_')] = 1 | ||
Vadim Gelfer
|
r2435 | try: | ||
Augie Fackler
|
r34467 | if urllibcompat.hasdata(req): | ||
data = urllibcompat.getdata(req) | ||||
John Mulligan
|
r30922 | h.putrequest( | ||
Augie Fackler
|
r43346 | req.get_method(), | ||
urllibcompat.getselector(req), | ||||
**skipheaders | ||||
) | ||||
Augie Fackler
|
r43906 | if 'content-type' not in headers: | ||
Augie Fackler
|
r43346 | h.putheader( | ||
Augie Fackler
|
r43906 | 'Content-type', 'application/x-www-form-urlencoded' | ||
Augie Fackler
|
r43346 | ) | ||
Augie Fackler
|
r43906 | if 'content-length' not in headers: | ||
h.putheader('Content-length', '%d' % len(data)) | ||||
Matt Mackall
|
r8146 | else: | ||
John Mulligan
|
r30922 | h.putrequest( | ||
Augie Fackler
|
r43346 | req.get_method(), | ||
urllibcompat.getselector(req), | ||||
**skipheaders | ||||
) | ||||
Gregory Szorc
|
r28278 | except socket.error as err: | ||
timeless
|
r28883 | raise urlerr.urlerror(err) | ||
Patrick Mezard
|
r8233 | for k, v in headers.items(): | ||
Matt Mackall
|
r8146 | h.putheader(k, v) | ||
h.endheaders() | ||||
Augie Fackler
|
r34467 | if urllibcompat.hasdata(req): | ||
Matt Mackall
|
r8146 | h.send(data) | ||
Gregory Szorc
|
r40068 | # This will fail to record events in case of I/O failure. That's OK. | ||
self.requestscount += 1 | ||||
Augie Fackler
|
r40415 | self.sentbytescount += getattr(h, 'sentbytescount', 0) - oldbytescount | ||
Gregory Szorc
|
r40068 | |||
try: | ||||
self.parent.requestscount += 1 | ||||
Augie Fackler
|
r40415 | self.parent.sentbytescount += ( | ||
Augie Fackler
|
r43346 | getattr(h, 'sentbytescount', 0) - oldbytescount | ||
) | ||||
Gregory Szorc
|
r40068 | except AttributeError: | ||
pass | ||||
Augie Fackler
|
r43346 | |||
timeless
|
r28883 | class HTTPHandler(KeepAliveHandler, urlreq.httphandler): | ||
Alexis S. L. Carvalho
|
r5983 | pass | ||
Augie Fackler
|
r43346 | |||
Vadim Gelfer
|
r2435 | class HTTPResponse(httplib.HTTPResponse): | ||
# we need to subclass HTTPResponse in order to | ||||
Gregory Szorc
|
r37313 | # 1) add readline(), readlines(), and readinto() methods | ||
Vadim Gelfer
|
r2435 | # 2) add close_connection() methods | ||
# 3) add info() and geturl() methods | ||||
# in order to add readline(), read must be modified to deal with a | ||||
# buffer. example: readline must read a buffer and then spit back | ||||
# one line at a time. The only real alternative is to read one | ||||
# BYTE at a time (ick). Once something has been read, it can't be | ||||
# put back (ok, maybe it can, but that's even uglier than this), | ||||
# so if you THEN do a normal read, you must first take stuff from | ||||
# the buffer. | ||||
Mads Kiilerich
|
r17424 | # the read method wraps the original to accommodate buffering, | ||
Vadim Gelfer
|
r2435 | # although read() never adds to the buffer. | ||
# Both readline and readlines have been stolen with almost no | ||||
# modification from socket.py | ||||
Vadim Gelfer
|
r2600 | |||
Vadim Gelfer
|
r2435 | def __init__(self, sock, debuglevel=0, strict=0, method=None): | ||
Augie Fackler
|
r34428 | extrakw = {} | ||
if not pycompat.ispy3: | ||||
Augie Fackler
|
r43906 | extrakw['strict'] = True | ||
extrakw['buffering'] = True | ||||
Augie Fackler
|
r43346 | httplib.HTTPResponse.__init__( | ||
self, sock, debuglevel=debuglevel, method=method, **extrakw | ||||
) | ||||
Vadim Gelfer
|
r2435 | self.fileno = sock.fileno | ||
self.code = None | ||||
Gregory Szorc
|
r40069 | self.receivedbytescount = 0 | ||
Augie Fackler
|
r43347 | self._rbuf = b'' | ||
Vadim Gelfer
|
r2435 | self._rbufsize = 8096 | ||
Augie Fackler
|
r43346 | self._handler = None # inserted by the handler later | ||
self._host = None # (same) | ||||
self._url = None # (same) | ||||
self._connection = None # (same) | ||||
Vadim Gelfer
|
r2435 | |||
_raw_read = httplib.HTTPResponse.read | ||||
Augie Fackler
|
r37605 | _raw_readinto = getattr(httplib.HTTPResponse, 'readinto', None) | ||
Vadim Gelfer
|
r2435 | |||
Gregory Szorc
|
r41486 | # Python 2.7 has a single close() which closes the socket handle. | ||
# This method was effectively renamed to _close_conn() in Python 3. But | ||||
# there is also a close(). _close_conn() is called by methods like | ||||
# read(). | ||||
Vadim Gelfer
|
r2435 | def close(self): | ||
if self.fp: | ||||
self.fp.close() | ||||
self.fp = None | ||||
if self._handler: | ||||
Augie Fackler
|
r43346 | self._handler._request_closed( | ||
self, self._host, self._connection | ||||
) | ||||
Vadim Gelfer
|
r2435 | |||
Gregory Szorc
|
r41486 | def _close_conn(self): | ||
self.close() | ||||
Vadim Gelfer
|
r2435 | def close_connection(self): | ||
self._handler._remove_connection(self._host, self._connection, close=1) | ||||
self.close() | ||||
Vadim Gelfer
|
r2600 | |||
Vadim Gelfer
|
r2435 | def info(self): | ||
Alexis S. L. Carvalho
|
r2444 | return self.headers | ||
Vadim Gelfer
|
r2435 | |||
def geturl(self): | ||||
return self._url | ||||
def read(self, amt=None): | ||||
# the _rbuf test is only in this first if for speed. It's not | ||||
# logically necessary | ||||
Alex Gaynor
|
r34332 | if self._rbuf and amt is not None: | ||
Vadim Gelfer
|
r2435 | L = len(self._rbuf) | ||
if amt > L: | ||||
amt -= L | ||||
else: | ||||
s = self._rbuf[:amt] | ||||
self._rbuf = self._rbuf[amt:] | ||||
return s | ||||
Augie Fackler
|
r39841 | # Careful! http.client.HTTPResponse.read() on Python 3 is | ||
# implemented using readinto(), which can duplicate self._rbuf | ||||
# if it's not empty. | ||||
s = self._rbuf | ||||
Augie Fackler
|
r43347 | self._rbuf = b'' | ||
Gregory Szorc
|
r40069 | data = self._raw_read(amt) | ||
self.receivedbytescount += len(data) | ||||
Augie Fackler
|
r40478 | try: | ||
self._connection.receivedbytescount += len(data) | ||||
except AttributeError: | ||||
pass | ||||
Gregory Szorc
|
r40069 | try: | ||
self._handler.parent.receivedbytescount += len(data) | ||||
except AttributeError: | ||||
pass | ||||
s += data | ||||
Vadim Gelfer
|
r2435 | return s | ||
Matt Mackall
|
r7781 | # stolen from Python SVN #68532 to fix issue1088 | ||
def _read_chunked(self, amt): | ||||
chunk_left = self.chunk_left | ||||
Gregory Szorc
|
r30686 | parts = [] | ||
Matt Mackall
|
r7781 | |||
while True: | ||||
if chunk_left is None: | ||||
line = self.fp.readline() | ||||
Augie Fackler
|
r43347 | i = line.find(b';') | ||
Matt Mackall
|
r7781 | if i >= 0: | ||
Augie Fackler
|
r43346 | line = line[:i] # strip chunk-extensions | ||
Matt Mackall
|
r7781 | try: | ||
chunk_left = int(line, 16) | ||||
except ValueError: | ||||
Mads Kiilerich
|
r17424 | # close the connection as protocol synchronization is | ||
Matt Mackall
|
r7781 | # probably lost | ||
self.close() | ||||
Augie Fackler
|
r43347 | raise httplib.IncompleteRead(b''.join(parts)) | ||
Matt Mackall
|
r7781 | if chunk_left == 0: | ||
break | ||||
if amt is None: | ||||
Gregory Szorc
|
r30686 | parts.append(self._safe_read(chunk_left)) | ||
Matt Mackall
|
r7781 | elif amt < chunk_left: | ||
Gregory Szorc
|
r30686 | parts.append(self._safe_read(amt)) | ||
Matt Mackall
|
r7781 | self.chunk_left = chunk_left - amt | ||
Augie Fackler
|
r43347 | return b''.join(parts) | ||
Matt Mackall
|
r7781 | elif amt == chunk_left: | ||
Gregory Szorc
|
r30686 | parts.append(self._safe_read(amt)) | ||
Matt Mackall
|
r7781 | self._safe_read(2) # toss the CRLF at the end of the chunk | ||
self.chunk_left = None | ||||
Augie Fackler
|
r43347 | return b''.join(parts) | ||
Matt Mackall
|
r7781 | else: | ||
Gregory Szorc
|
r30686 | parts.append(self._safe_read(chunk_left)) | ||
Matt Mackall
|
r7781 | amt -= chunk_left | ||
# we read the whole chunk, get another | ||||
Augie Fackler
|
r43346 | self._safe_read(2) # toss the CRLF at the end of the chunk | ||
Matt Mackall
|
r7781 | chunk_left = None | ||
# read and discard trailer up to the CRLF terminator | ||||
### note: we shouldn't have any trailers! | ||||
while True: | ||||
line = self.fp.readline() | ||||
if not line: | ||||
# a vanishingly small number of sites EOF without | ||||
# sending the trailer | ||||
break | ||||
Augie Fackler
|
r43347 | if line == b'\r\n': | ||
Matt Mackall
|
r7781 | break | ||
# we read everything; close the "file" | ||||
self.close() | ||||
Augie Fackler
|
r43347 | return b''.join(parts) | ||
Matt Mackall
|
r7781 | |||
Gregory Szorc
|
r30687 | def readline(self): | ||
Gregory Szorc
|
r30688 | # Fast path for a line is already available in read buffer. | ||
Augie Fackler
|
r43347 | i = self._rbuf.find(b'\n') | ||
Gregory Szorc
|
r30688 | if i >= 0: | ||
i += 1 | ||||
line = self._rbuf[:i] | ||||
self._rbuf = self._rbuf[i:] | ||||
return line | ||||
# No newline in local buffer. Read until we find one. | ||||
Cédric Krier
|
r45930 | # readinto read via readinto will already return _rbuf | ||
if self._raw_readinto is None: | ||||
chunks = [self._rbuf] | ||||
else: | ||||
chunks = [] | ||||
Gregory Szorc
|
r30688 | i = -1 | ||
readsize = self._rbufsize | ||||
while True: | ||||
new = self._raw_read(readsize) | ||||
Matt Mackall
|
r10282 | if not new: | ||
break | ||||
Gregory Szorc
|
r30688 | |||
Gregory Szorc
|
r40069 | self.receivedbytescount += len(new) | ||
self._connection.receivedbytescount += len(new) | ||||
try: | ||||
self._handler.parent.receivedbytescount += len(new) | ||||
except AttributeError: | ||||
pass | ||||
Gregory Szorc
|
r30688 | chunks.append(new) | ||
Augie Fackler
|
r43347 | i = new.find(b'\n') | ||
Matt Mackall
|
r10282 | if i >= 0: | ||
Gregory Szorc
|
r30688 | break | ||
# We either have exhausted the stream or have a newline in chunks[-1]. | ||||
# EOF | ||||
if i == -1: | ||||
Augie Fackler
|
r43347 | self._rbuf = b'' | ||
return b''.join(chunks) | ||||
Gregory Szorc
|
r30688 | |||
i += 1 | ||||
self._rbuf = chunks[-1][i:] | ||||
chunks[-1] = chunks[-1][:i] | ||||
Augie Fackler
|
r43347 | return b''.join(chunks) | ||
Vadim Gelfer
|
r2435 | |||
Mads Kiilerich
|
r19872 | def readlines(self, sizehint=0): | ||
Vadim Gelfer
|
r2435 | total = 0 | ||
list = [] | ||||
Martin Geisler
|
r14494 | while True: | ||
Vadim Gelfer
|
r2435 | line = self.readline() | ||
Matt Mackall
|
r10282 | if not line: | ||
break | ||||
Vadim Gelfer
|
r2435 | list.append(line) | ||
total += len(line) | ||||
if sizehint and total >= sizehint: | ||||
break | ||||
return list | ||||
Gregory Szorc
|
r37313 | def readinto(self, dest): | ||
Augie Fackler
|
r37605 | if self._raw_readinto is None: | ||
res = self.read(len(dest)) | ||||
if not res: | ||||
return 0 | ||||
Augie Fackler
|
r43346 | dest[0 : len(res)] = res | ||
Augie Fackler
|
r37605 | return len(res) | ||
total = len(dest) | ||||
have = len(self._rbuf) | ||||
if have >= total: | ||||
dest[0:total] = self._rbuf[:total] | ||||
self._rbuf = self._rbuf[total:] | ||||
return total | ||||
mv = memoryview(dest) | ||||
got = self._raw_readinto(mv[have:total]) | ||||
Gregory Szorc
|
r40069 | |||
self.receivedbytescount += got | ||||
self._connection.receivedbytescount += got | ||||
try: | ||||
self._handler.receivedbytescount += got | ||||
except AttributeError: | ||||
pass | ||||
Augie Fackler
|
r37605 | dest[0:have] = self._rbuf | ||
got += len(self._rbuf) | ||||
Augie Fackler
|
r43347 | self._rbuf = b'' | ||
Augie Fackler
|
r37605 | return got | ||
Gregory Szorc
|
r37313 | |||
Augie Fackler
|
r43346 | |||
Augie Fackler
|
r9726 | def safesend(self, str): | ||
"""Send `str' to the server. | ||||
Shamelessly ripped off from httplib to patch a bad behavior. | ||||
""" | ||||
# _broken_pipe_resp is an attribute we set in this function | ||||
# if the socket is closed while we're sending data but | ||||
# the server sent us a response before hanging up. | ||||
# In that case, we want to pretend to send the rest of the | ||||
# outgoing data, and then let the user use getresponse() | ||||
# (which we wrap) to get this last response before | ||||
# opening a new socket. | ||||
if getattr(self, '_broken_pipe_resp', None) is not None: | ||||
return | ||||
if self.sock is None: | ||||
if self.auto_open: | ||||
self.connect() | ||||
else: | ||||
Brodie Rao
|
r16687 | raise httplib.NotConnected | ||
Augie Fackler
|
r9726 | |||
# send the data to the server. if we get a broken pipe, then close | ||||
# the socket. we want to reconnect when somebody tries to send again. | ||||
# | ||||
# NOTE: we DO propagate the error, though, because we cannot simply | ||||
# ignore the error... the caller will know if they can retry. | ||||
if self.debuglevel > 0: | ||||
Augie Fackler
|
r43347 | print(b"send:", repr(str)) | ||
Augie Fackler
|
r9726 | try: | ||
Matt Mackall
|
r10282 | blocksize = 8192 | ||
Augie Fackler
|
r14958 | read = getattr(str, 'read', None) | ||
if read is not None: | ||||
Matt Mackall
|
r10282 | if self.debuglevel > 0: | ||
Augie Fackler
|
r43347 | print(b"sending a read()able") | ||
Augie Fackler
|
r14958 | data = read(blocksize) | ||
Augie Fackler
|
r9726 | while data: | ||
self.sock.sendall(data) | ||||
Gregory Szorc
|
r40068 | self.sentbytescount += len(data) | ||
Augie Fackler
|
r14958 | data = read(blocksize) | ||
Augie Fackler
|
r9726 | else: | ||
self.sock.sendall(str) | ||||
Gregory Szorc
|
r40068 | self.sentbytescount += len(str) | ||
Gregory Szorc
|
r25660 | except socket.error as v: | ||
Augie Fackler
|
r9726 | reraise = True | ||
Augie Fackler
|
r43346 | if v.args[0] == errno.EPIPE: # Broken pipe | ||
Augie Fackler
|
r9726 | if self._HTTPConnection__state == httplib._CS_REQ_SENT: | ||
self._broken_pipe_resp = None | ||||
self._broken_pipe_resp = self.getresponse() | ||||
reraise = False | ||||
self.close() | ||||
if reraise: | ||||
raise | ||||
Augie Fackler
|
r43346 | |||
Augie Fackler
|
r9726 | def wrapgetresponse(cls): | ||
Augie Fackler
|
r46554 | """Wraps getresponse in cls with a broken-pipe sane version.""" | ||
Augie Fackler
|
r43346 | |||
Augie Fackler
|
r9726 | def safegetresponse(self): | ||
# In safesend() we might set the _broken_pipe_resp | ||||
# attribute, in which case the socket has already | ||||
# been closed and we just need to give them the response | ||||
# back. Otherwise, we use the normal response path. | ||||
r = getattr(self, '_broken_pipe_resp', None) | ||||
if r is not None: | ||||
return r | ||||
return cls.getresponse(self) | ||||
Augie Fackler
|
r43346 | |||
Augie Fackler
|
r9726 | safegetresponse.__doc__ = cls.getresponse.__doc__ | ||
return safegetresponse | ||||
Vadim Gelfer
|
r2435 | |||
Augie Fackler
|
r43346 | |||
Vadim Gelfer
|
r2435 | class HTTPConnection(httplib.HTTPConnection): | ||
Gregory Szorc
|
r40067 | # url.httpsconnection inherits from this. So when adding/removing | ||
# attributes, be sure to audit httpsconnection() for unintended | ||||
# consequences. | ||||
Vadim Gelfer
|
r2435 | # use the modified response class | ||
response_class = HTTPResponse | ||||
Augie Fackler
|
r9726 | send = safesend | ||
getresponse = wrapgetresponse(httplib.HTTPConnection) | ||||
Gregory Szorc
|
r40068 | def __init__(self, *args, **kwargs): | ||
httplib.HTTPConnection.__init__(self, *args, **kwargs) | ||||
self.sentbytescount = 0 | ||||
Gregory Szorc
|
r40069 | self.receivedbytescount = 0 | ||
Vadim Gelfer
|
r2600 | |||
Augie Fackler
|
r43346 | |||
Vadim Gelfer
|
r2435 | ######################################################################### | ||
##### TEST FUNCTIONS | ||||
######################################################################### | ||||
def continuity(url): | ||||
Augie Fackler
|
r29341 | md5 = hashlib.md5 | ||
Augie Fackler
|
r43347 | format = b'%25s: %s' | ||
Vadim Gelfer
|
r2600 | |||
Vadim Gelfer
|
r2435 | # first fetch the file with the normal http handler | ||
timeless
|
r28883 | opener = urlreq.buildopener() | ||
urlreq.installopener(opener) | ||||
fo = urlreq.urlopen(url) | ||||
Vadim Gelfer
|
r2435 | foo = fo.read() | ||
fo.close() | ||||
Mike Hommey
|
r22505 | m = md5(foo) | ||
Joerg Sonnenberger
|
r46729 | print(format % (b'normal urllib', hex(m.digest()))) | ||
Vadim Gelfer
|
r2435 | |||
# now install the keepalive handler and try again | ||||
timeless
|
r28883 | opener = urlreq.buildopener(HTTPHandler()) | ||
urlreq.installopener(opener) | ||||
Vadim Gelfer
|
r2435 | |||
timeless
|
r28883 | fo = urlreq.urlopen(url) | ||
Vadim Gelfer
|
r2435 | foo = fo.read() | ||
fo.close() | ||||
Mike Hommey
|
r22505 | m = md5(foo) | ||
Joerg Sonnenberger
|
r46729 | print(format % (b'keepalive read', hex(m.digest()))) | ||
Vadim Gelfer
|
r2435 | |||
timeless
|
r28883 | fo = urlreq.urlopen(url) | ||
Augie Fackler
|
r43347 | foo = b'' | ||
Martin Geisler
|
r14494 | while True: | ||
Vadim Gelfer
|
r2435 | f = fo.readline() | ||
Matt Mackall
|
r10282 | if f: | ||
foo = foo + f | ||||
Alex Gaynor
|
r34436 | else: | ||
break | ||||
Vadim Gelfer
|
r2435 | fo.close() | ||
Mike Hommey
|
r22505 | m = md5(foo) | ||
Joerg Sonnenberger
|
r46729 | print(format % (b'keepalive readline', hex(m.digest()))) | ||
Vadim Gelfer
|
r2435 | |||
Augie Fackler
|
r43346 | |||
Vadim Gelfer
|
r2435 | def comp(N, url): | ||
Augie Fackler
|
r43347 | print(b' making %i connections to:\n %s' % (N, url)) | ||
Vadim Gelfer
|
r2435 | |||
Augie Fackler
|
r43347 | procutil.stdout.write(b' first using the normal urllib handlers') | ||
Vadim Gelfer
|
r2435 | # first use normal opener | ||
timeless
|
r28883 | opener = urlreq.buildopener() | ||
urlreq.installopener(opener) | ||||
Vadim Gelfer
|
r2435 | t1 = fetch(N, url) | ||
Augie Fackler
|
r43347 | print(b' TIME: %.3f s' % t1) | ||
Vadim Gelfer
|
r2435 | |||
Augie Fackler
|
r43347 | procutil.stdout.write(b' now using the keepalive handler ') | ||
Vadim Gelfer
|
r2435 | # now install the keepalive handler and try again | ||
timeless
|
r28883 | opener = urlreq.buildopener(HTTPHandler()) | ||
urlreq.installopener(opener) | ||||
Vadim Gelfer
|
r2435 | t2 = fetch(N, url) | ||
Augie Fackler
|
r43347 | print(b' TIME: %.3f s' % t2) | ||
print(b' improvement factor: %.2f' % (t1 / t2)) | ||||
Vadim Gelfer
|
r2600 | |||
Augie Fackler
|
r43346 | |||
Vadim Gelfer
|
r2435 | def fetch(N, url, delay=0): | ||
import time | ||||
Augie Fackler
|
r43346 | |||
Vadim Gelfer
|
r2435 | lens = [] | ||
starttime = time.time() | ||||
for i in range(N): | ||||
Matt Mackall
|
r10282 | if delay and i > 0: | ||
time.sleep(delay) | ||||
timeless
|
r28883 | fo = urlreq.urlopen(url) | ||
Vadim Gelfer
|
r2435 | foo = fo.read() | ||
fo.close() | ||||
lens.append(len(foo)) | ||||
diff = time.time() - starttime | ||||
j = 0 | ||||
for i in lens[1:]: | ||||
j = j + 1 | ||||
if not i == lens[0]: | ||||
Augie Fackler
|
r43347 | print(b"WARNING: inconsistent length on read %i: %i" % (j, i)) | ||
Vadim Gelfer
|
r2435 | |||
return diff | ||||
Augie Fackler
|
r43346 | |||
Vadim Gelfer
|
r2435 | def test_timeout(url): | ||
global DEBUG | ||||
dbbackup = DEBUG | ||||
Augie Fackler
|
r43346 | |||
Thomas Arendsen Hein
|
r14764 | class FakeLogger(object): | ||
Matt Mackall
|
r10282 | def debug(self, msg, *args): | ||
Gregory Szorc
|
r27616 | print(msg % args) | ||
Augie Fackler
|
r43346 | |||
Vadim Gelfer
|
r2435 | info = warning = error = debug | ||
Augie Fackler
|
r43346 | |||
Vadim Gelfer
|
r2435 | DEBUG = FakeLogger() | ||
Augie Fackler
|
r43347 | print(b" fetching the file to establish a connection") | ||
timeless
|
r28883 | fo = urlreq.urlopen(url) | ||
Vadim Gelfer
|
r2435 | data1 = fo.read() | ||
fo.close() | ||||
Vadim Gelfer
|
r2600 | |||
Vadim Gelfer
|
r2435 | i = 20 | ||
Augie Fackler
|
r43347 | print(b" waiting %i seconds for the server to close the connection" % i) | ||
Vadim Gelfer
|
r2435 | while i > 0: | ||
Augie Fackler
|
r43347 | procutil.stdout.write(b'\r %2i' % i) | ||
Yuya Nishihara
|
r37137 | procutil.stdout.flush() | ||
Vadim Gelfer
|
r2435 | time.sleep(1) | ||
i -= 1 | ||||
Augie Fackler
|
r43347 | procutil.stderr.write(b'\r') | ||
Vadim Gelfer
|
r2435 | |||
Augie Fackler
|
r43347 | print(b" fetching the file a second time") | ||
timeless
|
r28883 | fo = urlreq.urlopen(url) | ||
Vadim Gelfer
|
r2435 | data2 = fo.read() | ||
fo.close() | ||||
if data1 == data2: | ||||
Augie Fackler
|
r43347 | print(b' data are identical') | ||
Vadim Gelfer
|
r2435 | else: | ||
Augie Fackler
|
r43347 | print(b' ERROR: DATA DIFFER') | ||
Vadim Gelfer
|
r2435 | |||
DEBUG = dbbackup | ||||
Vadim Gelfer
|
r2600 | |||
Vadim Gelfer
|
r2435 | def test(url, N=10): | ||
Augie Fackler
|
r43347 | print(b"performing continuity test (making sure stuff isn't corrupted)") | ||
Vadim Gelfer
|
r2435 | continuity(url) | ||
Augie Fackler
|
r43347 | print(b'') | ||
print(b"performing speed comparison") | ||||
Vadim Gelfer
|
r2435 | comp(N, url) | ||
Augie Fackler
|
r43347 | print(b'') | ||
print(b"performing dropped-connection check") | ||||
Vadim Gelfer
|
r2435 | test_timeout(url) | ||
Vadim Gelfer
|
r2600 | |||
Augie Fackler
|
r43346 | |||
Vadim Gelfer
|
r2435 | if __name__ == '__main__': | ||
import time | ||||
Augie Fackler
|
r43346 | |||
Vadim Gelfer
|
r2435 | try: | ||
N = int(sys.argv[1]) | ||||
url = sys.argv[2] | ||||
Brodie Rao
|
r16688 | except (IndexError, ValueError): | ||
Augie Fackler
|
r43347 | print(b"%s <integer> <url>" % sys.argv[0]) | ||
Vadim Gelfer
|
r2435 | else: | ||
test(url, N) | ||||