# HG changeset patch # User Boris Feld # Date 2018-07-10 10:20:57 # Node ID 43d0619cec90d8c7a28d08880a345da98041606b # Parent 967fee55e8d9d8674100849d94383c72581d49f3 revlog: enforce chunk slicing down to a certain size Limit maximum chunk size to 4x final size when reading a revision from a revlog. We only apply this logic when the target size is known from the revlog. Ideally, revlog's delta chain would be written in a way that does not trigger this extra slicing often. However, having this second guarantee that we won't read unexpectedly large amounts of memory in all cases is important for the future. Future delta chain building algorithms might have good reason to create delta chain with such characteristics. Including this code in core as soon as possible will make Mercurial 4.7 forward-compatible with such improvement. diff --git a/mercurial/revlog.py b/mercurial/revlog.py --- a/mercurial/revlog.py +++ b/mercurial/revlog.py @@ -1949,7 +1949,7 @@ class revlog(object): """ return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1]) - def _chunks(self, revs, df=None): + def _chunks(self, revs, df=None, targetsize=None): """Obtain decompressed chunks for the specified revisions. Accepts an iterable of numeric revisions that are assumed to be in @@ -1976,7 +1976,7 @@ class revlog(object): if not self._withsparseread: slicedchunks = (revs,) else: - slicedchunks = _slicechunk(self, revs) + slicedchunks = _slicechunk(self, revs, targetsize) for revschunk in slicedchunks: firstrev = revschunk[0] @@ -2079,7 +2079,12 @@ class revlog(object): # drop cache to save memory self._cache = None - bins = self._chunks(chain, df=_df) + targetsize = None + rawsize = self.index[rev][2] + if 0 <= rawsize: + targetsize = 4 * rawsize + + bins = self._chunks(chain, df=_df, targetsize=targetsize) if rawtext is None: rawtext = bytes(bins[0]) bins = bins[1:]