# HG changeset patch # User Gregory Szorc # Date 2017-11-14 06:20:12 # Node ID 699b2a759319966cc496da16d3e39ad539175988 # Parent 764e3ad1cf54e9d6c7a74c70051820d422adadfd bundle2: avoid unbound read when seeking Currently, seekableunbundlepart.seek() will perform a read() during seek operations. This will allocate a buffer to hold the raw data over the seek distance. This can lead to very large allocations and cause performance to suffer. We change the code to perform read(32768) in a loop to avoid potentially large allocations. `hg perfbundleread` on an uncompressed Firefox bundle reveals a performance impact: ! bundle2 iterparts() ! wall 2.992605 comb 2.990000 user 2.260000 sys 0.730000 (best of 4) ! bundle2 iterparts() seekable ! wall 3.863810 comb 3.860000 user 3.000000 sys 0.860000 (best of 3) ! bundle2 part seek() ! wall 6.213387 comb 6.200000 user 3.350000 sys 2.850000 (best of 3) ! wall 3.820347 comb 3.810000 user 2.980000 sys 0.830000 (best of 3) Since seekable bundle parts are (only) used by bundlerepo, this /may/ speed up initial loading of bundle-based repos. But any improvement will likely only be noticed on very large bundles. Differential Revision: https://phab.mercurial-scm.org/D1394 diff --git a/mercurial/bundle2.py b/mercurial/bundle2.py --- a/mercurial/bundle2.py +++ b/mercurial/bundle2.py @@ -1415,13 +1415,20 @@ class seekableunbundlepart(unbundlepart) newpos = self._pos + offset elif whence == os.SEEK_END: if not self.consumed: - self.read() + # Can't use self.consume() here because it advances self._pos. + chunk = self.read(32768) + while chunk: + chunk = self.read(32768) newpos = self._chunkindex[-1][0] - offset else: raise ValueError('Unknown whence value: %r' % (whence,)) if newpos > self._chunkindex[-1][0] and not self.consumed: - self.read() + # Can't use self.consume() here because it advances self._pos. + chunk = self.read(32768) + while chunk: + chunk = self.read(32668) + if not 0 <= newpos <= self._chunkindex[-1][0]: raise ValueError('Offset out of range')