diff --git a/mercurial/revlogutils/deltas.py b/mercurial/revlogutils/deltas.py --- a/mercurial/revlogutils/deltas.py +++ b/mercurial/revlogutils/deltas.py @@ -9,6 +9,7 @@ from __future__ import absolute_import +import collections import heapq import struct @@ -607,8 +608,18 @@ def _candidategroups(revlog, textlen, p1 continue group.append(rev) if group: + # XXX: in the sparse revlog case, group can become large, + # impacting performances. Some bounding or slicing mecanism + # would help to reduce this impact. yield tuple(group) +def _findsnapshots(revlog, cache, start_rev): + """find snapshot from start_rev to tip""" + deltaparent = revlog.deltaparent + for rev in revlog.revs(start_rev): + if deltaparent(rev) == nullrev: + cache[nullrev].append(rev) + def _rawgroups(revlog, p1, p2, cachedelta): """Provides group of revision to be tested as delta base @@ -656,6 +667,18 @@ def _rawgroups(revlog, p1, p2, cachedelt for p in parents: bases.append(deltachain(p)[0]) yield tuple(sorted(bases)) + # No suitable base found in the parent chain, search if any full + # snapshots emitted since parent's base would be a suitable base for an + # intermediate snapshot. + # + # It give a chance to reuse a delta chain unrelated to the current + # revisions instead of starting our own. Without such re-use, + # topological branches would keep reopening new full chains. Creating + # more and more snapshot as the repository grow. + snapfloor = min(bases) + 1 + snapshots = collections.defaultdict(list) + _findsnapshots(revlog, snapshots, snapfloor) + yield tuple(snapshots[nullrev]) # other approach failed try against prev to hopefully save us a # fulltext. diff --git a/tests/test-sparse-revlog.t b/tests/test-sparse-revlog.t --- a/tests/test-sparse-revlog.t +++ b/tests/test-sparse-revlog.t @@ -77,7 +77,7 @@ repeatedly while some of it changes rare $ f -s .hg/store/data/*.d - .hg/store/data/_s_p_a_r_s_e-_r_e_v_l_o_g-_t_e_s_t-_f_i_l_e.d: size=72315280 + .hg/store/data/_s_p_a_r_s_e-_r_e_v_l_o_g-_t_e_s_t-_f_i_l_e.d: size=67810463 $ hg debugrevlog * format : 1 flags : generaldelta @@ -89,36 +89,39 @@ repeatedly while some of it changes rare empty : 0 ( 0.00%) text : 0 (100.00%) delta : 0 (100.00%) - snapshot : 145 ( 2.90%) - lvl-0 : 15 ( 0.30%) - lvl-1 : 130 ( 2.60%) - deltas : 4856 (97.10%) - revision size : 72315280 - snapshot : 18481085 (25.56%) - lvl-0 : 3016019 ( 4.17%) - lvl-1 : 15465066 (21.39%) - deltas : 53834195 (74.44%) + snapshot : 126 ( 2.52%) + lvl-0 : 4 ( 0.08%) + lvl-1 : 120 ( 2.40%) + lvl-2 : 2 ( 0.04%) + deltas : 4875 (97.48%) + revision size : 67810463 + snapshot : 14373347 (21.20%) + lvl-0 : 804235 ( 1.19%) + lvl-1 : 13535903 (19.96%) + lvl-2 : 33209 ( 0.05%) + deltas : 53437116 (78.80%) chunks : 5001 0x78 (x) : 5001 (100.00%) - chunks size : 72315280 - 0x78 (x) : 72315280 (100.00%) + chunks size : 67810463 + 0x78 (x) : 67810463 (100.00%) avg chain length : 18 max chain length : 45 - max chain reach : 32095083 - compression ratio : 23 + max chain reach : 25808240 + compression ratio : 25 uncompressed data size (min/max/avg) : 346468 / 346472 / 346471 - full revision size (min/max/avg) : 200990 / 201151 / 201067 - inter-snapshot size (min/max/avg) : 37202 / 173034 / 118962 - level-1 (min/max/avg) : 37202 / 173034 / 118962 - delta size (min/max/avg) : 10649 / 104791 / 11086 + full revision size (min/max/avg) : 201014 / 201116 / 201058 + inter-snapshot size (min/max/avg) : 11623 / 173150 / 111222 + level-1 (min/max/avg) : 11623 / 173150 / 112799 + level-2 (min/max/avg) : 14151 / 19058 / 16604 + delta size (min/max/avg) : 10649 / 101790 / 10961 - deltas against prev : 4185 (86.18%) - where prev = p1 : 4139 (98.90%) + deltas against prev : 4207 (86.30%) + where prev = p1 : 4164 (98.98%) where prev = p2 : 0 ( 0.00%) - other : 46 ( 1.10%) - deltas against p1 : 647 (13.32%) - deltas against p2 : 24 ( 0.49%) + other : 43 ( 1.02%) + deltas against p1 : 653 (13.39%) + deltas against p2 : 15 ( 0.31%) deltas against other : 0 ( 0.00%)