# HG changeset patch # User Boris Feld # Date 2018-07-10 09:57:33 # Node ID 967fee55e8d9d8674100849d94383c72581d49f3 # Parent e59e27e52297134e38bd35e0109630b9fed72b59 revlog: postprocess chunk to slice them down to a certain size After the density slicing is done, we enforce a maximum chunk size to avoid memory consumption issue. diff --git a/mercurial/revlog.py b/mercurial/revlog.py --- a/mercurial/revlog.py +++ b/mercurial/revlog.py @@ -293,7 +293,7 @@ def _segmentspan(revlog, revs): return 0 return revlog.end(revs[-1]) - revlog.start(revs[0]) -def _slicechunk(revlog, revs): +def _slicechunk(revlog, revs, targetsize=None): """slice revs to reduce the amount of unrelated data to be read from disk. ``revs`` is sliced into groups that should be read in one time. @@ -303,6 +303,13 @@ def _slicechunk(revlog, revs): ratio) is above `revlog._srdensitythreshold`. No gap smaller than `revlog._srmingapsize` is skipped. + If `targetsize` is set, no chunk larger than `targetsize` will be yield. + For consistency with other slicing choice, this limit won't go lower than + `revlog._srmingapsize`. + + If individual revisions chunk are larger than this limit, they will still + be raised individually. + >>> revlog = _testrevlog([ ... 5, #00 (5) ... 10, #01 (5) @@ -332,11 +339,20 @@ def _slicechunk(revlog, revs): [[0], [11, 13, 15]] >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14])) [[1, 2], [5, 8, 10, 11], [14]] + + Slicing with a maximum chunk size + >>> list(_slicechunk(revlog, [0, 11, 13, 15], 15)) + [[0], [11], [13], [15]] + >>> list(_slicechunk(revlog, [0, 11, 13, 15], 20)) + [[0], [11], [13, 15]] """ + if targetsize is not None: + targetsize = max(targetsize, revlog._srmingapsize) for chunk in _slicechunktodensity(revlog, revs, revlog._srdensitythreshold, revlog._srmingapsize): - yield chunk + for subchunk in _slicechunktosize(revlog, chunk, targetsize): + yield subchunk def _slicechunktosize(revlog, revs, targetsize): """slice revs to match the target size