##// END OF EJS Templates
changegroup: store old heads as a set...
changegroup: store old heads as a set Previously, the "oldheads" variable was a list. On a repository at Mozilla with 46,492 heads, profiling revealed that list membership testing was dominating execution time of applying small changegroups. This patch converts the list of old heads to a set. This makes membership testing significantly faster. On the aforementioned repository with 46,492 heads: $ hg unbundle <file with 1 changeset> before: 18.535s wall after: 1.303s Consumers of this variable only check for truthiness (`if oldheads`), length (`len(oldheads)`), and (most importantly) item membership (`h not in oldheads` - which occurs twice). So, the change to a set should be safe and suitable for stable. The practical effect of this change is that changegroup application and related operations (like `hg push`) no longer exhibit an O(n^2) CPU explosion as the number of heads grows.

File last commit:

r29442:456609cb default
r31587:ed5b2587 4.1.2 stable
Show More
_readers.py
239 lines | 8.5 KiB | text/x-python | PythonLexer
Augie Fackler
httpclient: update to revision 892730fe7f46 of httpplus
r16643 # Copyright 2011, Google Inc.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following disclaimer
# in the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Google Inc. nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""Reader objects to abstract out different body response types.
This module is package-private. It is not expected that these will
have any clients outside of httpplus.
"""
Augie Fackler
httpclient: update to 938f2107d6e2 of httpplus...
r27601 from __future__ import absolute_import
Augie Fackler
httpclient: update to revision 892730fe7f46 of httpplus
r16643
Augie Fackler
httpclient: update to upstream revision 2995635573d2...
r29131 try:
import httplib
httplib.HTTPException
except ImportError:
import http.client as httplib
Augie Fackler
httpclient: update to revision 892730fe7f46 of httpplus
r16643 import logging
logger = logging.getLogger(__name__)
class ReadNotReady(Exception):
"""Raised when read() is attempted but not enough data is loaded."""
class HTTPRemoteClosedError(httplib.HTTPException):
"""The server closed the remote socket in the middle of a response."""
class AbstractReader(object):
"""Abstract base class for response readers.
Subclasses must implement _load, and should implement _close if
it's not an error for the server to close their socket without
some termination condition being detected during _load.
"""
def __init__(self):
self._finished = False
self._done_chunks = []
Brendan Cully
http2: track available data as it changes instead of recomputing it...
r19036 self.available_data = 0
Augie Fackler
httpclient: update to revision 892730fe7f46 of httpplus
r16643
Augie Fackler
httpclient: upgrade to fe8c09e4db64 of httpplus
r19182 def _addchunk(self, data):
Brendan Cully
http2: track available data as it changes instead of recomputing it...
r19036 self._done_chunks.append(data)
self.available_data += len(data)
Augie Fackler
httpclient: upgrade to fe8c09e4db64 of httpplus
r19182 def _pushchunk(self, data):
Brendan Cully
http2: track available data as it changes instead of recomputing it...
r19036 self._done_chunks.insert(0, data)
self.available_data += len(data)
Augie Fackler
httpclient: upgrade to fe8c09e4db64 of httpplus
r19182 def _popchunk(self):
Brendan Cully
http2: track available data as it changes instead of recomputing it...
r19036 b = self._done_chunks.pop(0)
self.available_data -= len(b)
return b
Augie Fackler
httpclient: update to revision 892730fe7f46 of httpplus
r16643
def done(self):
Augie Fackler
httpclient: upgrade to fe8c09e4db64 of httpplus
r19182 """Returns true if the response body is entirely read."""
Augie Fackler
httpclient: update to revision 892730fe7f46 of httpplus
r16643 return self._finished
def read(self, amt):
Augie Fackler
httpclient: upgrade to fe8c09e4db64 of httpplus
r19182 """Read amt bytes from the response body."""
Augie Fackler
httpclient: update to revision 892730fe7f46 of httpplus
r16643 if self.available_data < amt and not self._finished:
raise ReadNotReady()
Brendan Cully
http2: make read use pushchunk/popchunk, eschew itertools...
r19037 blocks = []
need = amt
while self._done_chunks:
Augie Fackler
httpclient: upgrade to fe8c09e4db64 of httpplus
r19182 b = self._popchunk()
Brendan Cully
http2: make read use pushchunk/popchunk, eschew itertools...
r19037 if len(b) > need:
nb = b[:need]
Augie Fackler
httpclient: upgrade to fe8c09e4db64 of httpplus
r19182 self._pushchunk(b[need:])
Brendan Cully
http2: make read use pushchunk/popchunk, eschew itertools...
r19037 b = nb
blocks.append(b)
need -= len(b)
if need == 0:
break
Augie Fackler
httpclient: update to 54868ef054d2 of httpplus...
r29442 result = b''.join(blocks)
Augie Fackler
httpclient: update to revision 892730fe7f46 of httpplus
r16643 assert len(result) == amt or (self._finished and len(result) < amt)
Brendan Cully
http2: make read use pushchunk/popchunk, eschew itertools...
r19037
Augie Fackler
httpclient: update to revision 892730fe7f46 of httpplus
r16643 return result
Brendan Cully
http2: sane readline...
r19038 def readto(self, delimstr, blocks = None):
Augie Fackler
httpclient: update to 938f2107d6e2 of httpplus...
r27601 """return available data chunks up to the first one in which
delimstr occurs. No data will be returned after delimstr --
the chunk in which it occurs will be split and the remainder
pushed back onto the available data queue. If blocks is
supplied chunks will be added to blocks, otherwise a new list
will be allocated.
Brendan Cully
http2: sane readline...
r19038 """
if blocks is None:
blocks = []
while self._done_chunks:
Augie Fackler
httpclient: upgrade to fe8c09e4db64 of httpplus
r19182 b = self._popchunk()
Brendan Cully
http2: sane readline...
r19038 i = b.find(delimstr) + len(delimstr)
if i:
if i < len(b):
Augie Fackler
httpclient: upgrade to fe8c09e4db64 of httpplus
r19182 self._pushchunk(b[i:])
Brendan Cully
http2: sane readline...
r19038 blocks.append(b[:i])
break
else:
blocks.append(b)
return blocks
Augie Fackler
httpclient: update to revision 892730fe7f46 of httpplus
r16643 def _load(self, data): # pragma: no cover
"""Subclasses must implement this.
As data is available to be read out of this object, it should
be placed into the _done_chunks list. Subclasses should not
rely on data remaining in _done_chunks forever, as it may be
reaped if the client is parsing data as it comes in.
"""
raise NotImplementedError
def _close(self):
"""Default implementation of close.
The default implementation assumes that the reader will mark
the response as finished on the _finished attribute once the
entire response body has been read. In the event that this is
not true, the subclass should override the implementation of
close (for example, close-is-end responses have to set
self._finished in the close handler.)
"""
if not self._finished:
raise HTTPRemoteClosedError(
'server appears to have closed the socket mid-response')
class AbstractSimpleReader(AbstractReader):
"""Abstract base class for simple readers that require no response decoding.
Examples of such responses are Connection: Close (close-is-end)
and responses that specify a content length.
"""
def _load(self, data):
if data:
assert not self._finished, (
'tried to add data (%r) to a closed reader!' % data)
Augie Fackler
httpclient: upgrade to fe8c09e4db64 of httpplus
r19182 logger.debug('%s read an additional %d data',
self.name, len(data)) # pylint: disable=E1101
self._addchunk(data)
Augie Fackler
httpclient: update to revision 892730fe7f46 of httpplus
r16643
class CloseIsEndReader(AbstractSimpleReader):
"""Reader for responses that specify Connection: Close for length."""
name = 'close-is-end'
def _close(self):
logger.info('Marking close-is-end reader as closed.')
self._finished = True
class ContentLengthReader(AbstractSimpleReader):
"""Reader for responses that specify an exact content length."""
name = 'content-length'
def __init__(self, amount):
Augie Fackler
httpclient: upgrade to fe8c09e4db64 of httpplus
r19182 AbstractSimpleReader.__init__(self)
Augie Fackler
httpclient: update to revision 892730fe7f46 of httpplus
r16643 self._amount = amount
if amount == 0:
self._finished = True
self._amount_seen = 0
def _load(self, data):
AbstractSimpleReader._load(self, data)
self._amount_seen += len(data)
if self._amount_seen >= self._amount:
self._finished = True
logger.debug('content-length read complete')
class ChunkedReader(AbstractReader):
"""Reader for chunked transfer encoding responses."""
def __init__(self, eol):
AbstractReader.__init__(self)
self._eol = eol
self._leftover_skip_amt = 0
self._leftover_data = ''
def _load(self, data):
assert not self._finished, 'tried to add data to a closed reader!'
Mads Kiilerich
fix trivial spelling errors
r17424 logger.debug('chunked read an additional %d data', len(data))
Augie Fackler
httpclient: update to revision 892730fe7f46 of httpplus
r16643 position = 0
if self._leftover_data:
Augie Fackler
httpclient: upgrade to fe8c09e4db64 of httpplus
r19182 logger.debug(
'chunked reader trying to finish block from leftover data')
Augie Fackler
httpclient: update to revision 892730fe7f46 of httpplus
r16643 # TODO: avoid this string concatenation if possible
data = self._leftover_data + data
position = self._leftover_skip_amt
self._leftover_data = ''
self._leftover_skip_amt = 0
datalen = len(data)
while position < datalen:
split = data.find(self._eol, position)
if split == -1:
self._leftover_data = data
self._leftover_skip_amt = position
return
amt = int(data[position:split], base=16)
block_start = split + len(self._eol)
# If the whole data chunk plus the eol trailer hasn't
# loaded, we'll wait for the next load.
if block_start + amt + len(self._eol) > len(data):
self._leftover_data = data
self._leftover_skip_amt = position
return
if amt == 0:
self._finished = True
Mads Kiilerich
fix trivial spelling errors
r17424 logger.debug('closing chunked reader due to chunk of length 0')
Augie Fackler
httpclient: update to revision 892730fe7f46 of httpplus
r16643 return
Augie Fackler
httpclient: upgrade to fe8c09e4db64 of httpplus
r19182 self._addchunk(data[block_start:block_start + amt])
Augie Fackler
httpclient: update to revision 892730fe7f46 of httpplus
r16643 position = block_start + amt + len(self._eol)
# no-check-code