diff --git a/mercurial/configitems.py b/mercurial/configitems.py --- a/mercurial/configitems.py +++ b/mercurial/configitems.py @@ -2042,6 +2042,11 @@ coreconfigitem( ) coreconfigitem( b'storage', + b'revlog.delta-parent-search.candidate-group-chunk-size', + default=0, +) +coreconfigitem( + b'storage', b'revlog.issue6528.fix-incoming', default=True, ) diff --git a/mercurial/helptext/config.txt b/mercurial/helptext/config.txt --- a/mercurial/helptext/config.txt +++ b/mercurial/helptext/config.txt @@ -2281,6 +2281,21 @@ category impact performance and reposito To fix affected revisions that already exist within the repository, one can use :hg:`debug-repair-issue-6528`. +.. container:: verbose + + ``revlog.delta-parent-search.candidate-group-chunk-size`` + Tune the number of delta bases the storage will consider in the + same "round" of search. In some very rare cases, using a smaller value + might result in faster processing at the possible expense of storage + space, while using larger values might result in slower processing at the + possible benefit of storage space. A value of "0" means no limitation. + + default: no limitation + + This is unlikely that you'll have to tune this configuration. If you think + you do, consider talking with the mercurial developer community about your + repositories. + ``revlog.optimize-delta-parent-choice`` When storing a merge revision, both parents will be equally considered as a possible delta base. This results in better delta selection and improved diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py --- a/mercurial/localrepo.py +++ b/mercurial/localrepo.py @@ -1081,6 +1081,11 @@ def resolverevlogstorevfsoptions(ui, req b'storage', b'revlog.optimize-delta-parent-choice' ) options[b'deltabothparents'] = deltabothparents + dps_cgds = ui.configint( + b'storage', + b'revlog.delta-parent-search.candidate-group-chunk-size', + ) + options[b'delta-parent-search.candidate-group-chunk-size'] = dps_cgds options[b'debug-delta'] = ui.configbool(b'debug', b'revlog.debug-delta') issue6528 = ui.configbool(b'storage', b'revlog.issue6528.fix-incoming') diff --git a/mercurial/revlog.py b/mercurial/revlog.py --- a/mercurial/revlog.py +++ b/mercurial/revlog.py @@ -348,6 +348,7 @@ class revlog: self._chunkcachesize = 65536 self._maxchainlen = None self._deltabothparents = True + self._candidate_group_chunk_size = 0 self._debug_delta = False self.index = None self._docket = None @@ -422,6 +423,9 @@ class revlog: self._maxchainlen = opts[b'maxchainlen'] if b'deltabothparents' in opts: self._deltabothparents = opts[b'deltabothparents'] + dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size') + if dps_cgds: + self._candidate_group_chunk_size = dps_cgds self._lazydelta = bool(opts.get(b'lazydelta', True)) self._lazydeltabase = False if self._lazydelta: diff --git a/mercurial/revlogutils/deltas.py b/mercurial/revlogutils/deltas.py --- a/mercurial/revlogutils/deltas.py +++ b/mercurial/revlogutils/deltas.py @@ -680,6 +680,7 @@ def _candidategroups( good = None deltas_limit = textlen * LIMIT_DELTA2TEXT + group_chunk_size = revlog._candidate_group_chunk_size tested = {nullrev} candidates = _refinedgroups( @@ -770,11 +771,30 @@ def _candidategroups( group.append(rev) if group: - # XXX: in the sparse revlog case, group can become large, - # impacting performances. Some bounding or slicing mecanism - # would help to reduce this impact. - tested.update(group) - good = yield tuple(group) + # When the size of the candidate group is big, it can result in a + # quite significant performance impact. To reduce this, we can send + # them in smaller batches until the new batch does not provide any + # improvements. + # + # This might reduce the overall efficiency of the compression in + # some corner cases, but that should also prevent very pathological + # cases from being an issue. (eg. 20 000 candidates). + # + # XXX note that the ordering of the group becomes important as it + # now impacts the final result. The current order is unprocessed + # and can be improved. + if group_chunk_size == 0: + tested.update(group) + good = yield tuple(group) + else: + prev_good = good + for start in range(0, len(group), group_chunk_size): + sub_group = group[start : start + group_chunk_size] + tested.update(sub_group) + good = yield tuple(sub_group) + if prev_good == good: + break + yield None