# HG changeset patch # User Joerg Sonnenberger # Date 2024-06-30 11:22:23 # Node ID c1ed5ee2ad82bf976560acd936e743b38d40a077 # Parent b08de326bee41301ab599c35b9a5ee3febc6c211 http: reuse Python's implementation of read/readline/readinto Since Python 3 already provides a working implementation of readline, there is no need for our own buffering implementation. Reduce the code to transfer accounting only. diff --git a/mercurial/keepalive.py b/mercurial/keepalive.py --- a/mercurial/keepalive.py +++ b/mercurial/keepalive.py @@ -380,22 +380,9 @@ class HTTPHandler(KeepAliveHandler, urlr class HTTPResponse(httplib.HTTPResponse): # we need to subclass HTTPResponse in order to - # 1) add readline(), readlines(), and readinto() methods - # 2) add close_connection() methods - # 3) add info() and geturl() methods - - # in order to add readline(), read must be modified to deal with a - # buffer. example: readline must read a buffer and then spit back - # one line at a time. The only real alternative is to read one - # BYTE at a time (ick). Once something has been read, it can't be - # put back (ok, maybe it can, but that's even uglier than this), - # so if you THEN do a normal read, you must first take stuff from - # the buffer. - - # the read method wraps the original to accommodate buffering, - # although read() never adds to the buffer. - # Both readline and readlines have been stolen with almost no - # modification from socket.py + # 1) add close_connection() methods + # 2) add info() and geturl() methods + # 3) add accounting for read(), readlines() and readinto() def __init__(self, sock, debuglevel=0, strict=0, method=None): httplib.HTTPResponse.__init__( @@ -411,9 +398,6 @@ class HTTPResponse(httplib.HTTPResponse) self._url = None # (same) self._connection = None # (same) - _raw_read = httplib.HTTPResponse.read - _raw_readinto = getattr(httplib.HTTPResponse, 'readinto', None) - # Python 2.7 has a single close() which closes the socket handle. # This method was effectively renamed to _close_conn() in Python 3. But # there is also a close(). _close_conn() is called by methods like @@ -442,23 +426,7 @@ class HTTPResponse(httplib.HTTPResponse) return self._url def read(self, amt=None): - # the _rbuf test is only in this first if for speed. It's not - # logically necessary - if self._rbuf and amt is not None: - L = len(self._rbuf) - if amt > L: - amt -= L - else: - s = self._rbuf[:amt] - self._rbuf = self._rbuf[amt:] - return s - # Careful! http.client.HTTPResponse.read() on Python 3 is - # implemented using readinto(), which can duplicate self._rbuf - # if it's not empty. - s = self._rbuf - self._rbuf = b'' - data = self._raw_read(amt) - + data = super().read(amt) self.receivedbytescount += len(data) try: self._connection.receivedbytescount += len(data) @@ -468,137 +436,32 @@ class HTTPResponse(httplib.HTTPResponse) self._handler.parent.receivedbytescount += len(data) except AttributeError: pass - - s += data - return s - - # stolen from Python SVN #68532 to fix issue1088 - def _read_chunked(self, amt): - chunk_left = self.chunk_left - parts = [] - - while True: - if chunk_left is None: - line = self.fp.readline() - i = line.find(b';') - if i >= 0: - line = line[:i] # strip chunk-extensions - try: - chunk_left = int(line, 16) - except ValueError: - # close the connection as protocol synchronization is - # probably lost - self.close() - raise httplib.IncompleteRead(b''.join(parts)) - if chunk_left == 0: - break - if amt is None: - parts.append(self._safe_read(chunk_left)) - elif amt < chunk_left: - parts.append(self._safe_read(amt)) - self.chunk_left = chunk_left - amt - return b''.join(parts) - elif amt == chunk_left: - parts.append(self._safe_read(amt)) - self._safe_read(2) # toss the CRLF at the end of the chunk - self.chunk_left = None - return b''.join(parts) - else: - parts.append(self._safe_read(chunk_left)) - amt -= chunk_left - - # we read the whole chunk, get another - self._safe_read(2) # toss the CRLF at the end of the chunk - chunk_left = None - - # read and discard trailer up to the CRLF terminator - ### note: we shouldn't have any trailers! - while True: - line = self.fp.readline() - if not line: - # a vanishingly small number of sites EOF without - # sending the trailer - break - if line == b'\r\n': - break - - # we read everything; close the "file" - self.close() - - return b''.join(parts) + return data def readline(self): - # Fast path for a line is already available in read buffer. - i = self._rbuf.find(b'\n') - if i >= 0: - i += 1 - line = self._rbuf[:i] - self._rbuf = self._rbuf[i:] - return line - - # No newline in local buffer. Read until we find one. - # readinto read via readinto will already return _rbuf - if self._raw_readinto is None: - chunks = [self._rbuf] - else: - chunks = [] - i = -1 - readsize = self._rbufsize - while True: - new = self._raw_read(readsize) - if not new: - break - - self.receivedbytescount += len(new) - self._connection.receivedbytescount += len(new) - try: - self._handler.parent.receivedbytescount += len(new) - except AttributeError: - pass - - chunks.append(new) - i = new.find(b'\n') - if i >= 0: - break - - # We either have exhausted the stream or have a newline in chunks[-1]. - - # EOF - if i == -1: - self._rbuf = b'' - return b''.join(chunks) - - i += 1 - self._rbuf = chunks[-1][i:] - chunks[-1] = chunks[-1][:i] - return b''.join(chunks) + data = super().readline() + self.receivedbytescount += len(data) + try: + self._connection.receivedbytescount += len(data) + except AttributeError: + pass + try: + self._handler.parent.receivedbytescount += len(data) + except AttributeError: + pass + return data def readinto(self, dest): - if self._raw_readinto is None: - res = self.read(len(dest)) - if not res: - return 0 - dest[0 : len(res)] = res - return len(res) - total = len(dest) - have = len(self._rbuf) - if have >= total: - dest[0:total] = self._rbuf[:total] - self._rbuf = self._rbuf[total:] - return total - mv = memoryview(dest) - got = self._raw_readinto(mv[have:total]) - + got = super().readinto(dest) self.receivedbytescount += got - self._connection.receivedbytescount += got try: - self._handler.receivedbytescount += got + self._connection.receivedbytescount += got except AttributeError: pass - - dest[0:have] = self._rbuf - got += len(self._rbuf) - self._rbuf = b'' + try: + self._handler.parent.receivedbytescount += got + except AttributeError: + pass return got