# HG changeset patch # User Boris Feld # Date 2018-12-17 09:42:19 # Node ID 42f59d3f714dd283d6edcc4d72083ce38857f9cb # Parent 21a9cace4bbf95e75fa4a463f3575098dea32d3b delta: exclude base candidate much smaller than the target If a revision's full text is that much bigger than a base candidate full text, we no longer consider that candidate. This solves a pathological case we encountered on a very specify repository. It contains a long series of changesets with a very small manifest (one file) co-existing with others changesets using a very large manifest. Without this filtering, we ended up considering a large number of tiny full snapshots as a potential base. It resulted in very large delta (the size of the full text) and mercurial spending 99% of its time compressing these deltas. The timing of a commit moved from about 400s to about 10s (still slow, but not ridiculously slow). diff --git a/mercurial/revlogutils/deltas.py b/mercurial/revlogutils/deltas.py --- a/mercurial/revlogutils/deltas.py +++ b/mercurial/revlogutils/deltas.py @@ -601,6 +601,11 @@ def isgooddeltainfo(revlog, deltainfo, r return True +# If a revision's full text is that much bigger than a base candidate full +# text's, it is very unlikely that it will produce a valid delta. We no longer +# consider these candidates. +LIMIT_BASE2TEXT = 50 + def _candidategroups(revlog, textlen, p1, p2, cachedelta): """Provides group of revision to be tested as delta base @@ -614,6 +619,7 @@ def _candidategroups(revlog, textlen, p1 deltalength = revlog.length deltaparent = revlog.deltaparent + sparse = revlog._sparserevlog good = None deltas_limit = textlen * LIMIT_DELTA2TEXT @@ -644,6 +650,8 @@ def _candidategroups(revlog, textlen, p1 # filter out delta base that will never produce good delta if deltas_limit < revlog.length(rev): continue + if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT): + continue # no delta for rawtext-changing revs (see "candelta" for why) if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS: continue diff --git a/tests/test-clone-uncompressed.t b/tests/test-clone-uncompressed.t --- a/tests/test-clone-uncompressed.t +++ b/tests/test-clone-uncompressed.t @@ -184,8 +184,8 @@ Basic clone #if stream-bundle2 $ hg clone --stream -U http://localhost:$HGPORT clone1 streaming all changes - 1030 files to transfer, 96.4 KB of data - transferred 96.4 KB in * seconds (* */sec) (glob) + 1030 files to transfer, 96.5 KB of data + transferred 96.5 KB in * seconds (* */sec) (glob) $ ls -1 clone1/.hg/cache branch2-served @@ -201,11 +201,11 @@ getbundle requests with stream=1 are unc $ f --size --hex --bytes 256 body - body: size=112245 + body: size=112262 0000: 04 6e 6f 6e 65 48 47 32 30 00 00 00 00 00 00 00 |.noneHG20.......| 0010: 7f 07 53 54 52 45 41 4d 32 00 00 00 00 03 00 09 |..STREAM2.......| 0020: 05 09 04 0c 44 62 79 74 65 63 6f 75 6e 74 39 38 |....Dbytecount98| - 0030: 37 35 38 66 69 6c 65 63 6f 75 6e 74 31 30 33 30 |758filecount1030| + 0030: 37 37 35 66 69 6c 65 63 6f 75 6e 74 31 30 33 30 |775filecount1030| 0040: 72 65 71 75 69 72 65 6d 65 6e 74 73 64 6f 74 65 |requirementsdote| 0050: 6e 63 6f 64 65 25 32 43 66 6e 63 61 63 68 65 25 |ncode%2Cfncache%| 0060: 32 43 67 65 6e 65 72 61 6c 64 65 6c 74 61 25 32 |2Cgeneraldelta%2| @@ -232,8 +232,8 @@ getbundle requests with stream=1 are unc #if stream-bundle2 $ hg clone --uncompressed -U http://localhost:$HGPORT clone1-uncompressed streaming all changes - 1030 files to transfer, 96.4 KB of data - transferred 96.4 KB in * seconds (* */sec) (glob) + 1030 files to transfer, 96.5 KB of data + transferred 96.5 KB in * seconds (* */sec) (glob) #endif Clone with background file closing enabled @@ -274,12 +274,12 @@ Clone with background file closing enabl bundle2-input-bundle: with-transaction bundle2-input-part: "stream2" (params: 3 mandatory) supported applying stream bundle - 1030 files to transfer, 96.4 KB of data + 1030 files to transfer, 96.5 KB of data starting 4 threads for background file closing starting 4 threads for background file closing updating the branch cache - transferred 96.4 KB in * seconds (* */sec) (glob) - bundle2-input-part: total payload size 112077 + transferred 96.5 KB in * seconds (* */sec) (glob) + bundle2-input-part: total payload size 112094 bundle2-input-part: "listkeys" (params: 1 mandatory) supported bundle2-input-bundle: 1 parts total checking for updated bookmarks @@ -318,8 +318,8 @@ Streaming of secrets can be overridden b #if stream-bundle2 $ hg clone --stream -U http://localhost:$HGPORT secret-allowed streaming all changes - 1030 files to transfer, 96.4 KB of data - transferred 96.4 KB in * seconds (* */sec) (glob) + 1030 files to transfer, 96.5 KB of data + transferred 96.5 KB in * seconds (* */sec) (glob) #endif $ killdaemons.py