# HG changeset patch # User Mateusz Kwapich # Date 2014-11-06 22:20:05 # Node ID 76effa770ff9b2044ff28d92a7bf249395e99c1e # Parent d23834b871ac65dda10a3f2bd4dcb107c9667f02 revlog: add config variable for limiting delta-chain length The current heuristic for deciding between storing delta and full texts is based on ratio of (sizeofdeltas)/(sizeoffulltext). In some cases (for example a manifest for ahuge repo) this approach can result in extremely long delta chains (~30,000) which are very slow to read. (In the case of a manifest ~500ms are added to every hg command because of that). This commit introduces "revlog.maxchainlength" configuration variable that will limit delta chain length. diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py --- a/mercurial/localrepo.py +++ b/mercurial/localrepo.py @@ -316,6 +316,9 @@ class localrepository(object): chunkcachesize = self.ui.configint('format', 'chunkcachesize') if chunkcachesize is not None: self.sopener.options['chunkcachesize'] = chunkcachesize + maxchainlen = self.ui.configint('revlog', 'maxchainlen') + if maxchainlen is not None: + self.sopener.options['maxchainlen'] = maxchainlen def _writerequirements(self): reqfile = self.opener("requires", "w") diff --git a/mercurial/revlog.py b/mercurial/revlog.py --- a/mercurial/revlog.py +++ b/mercurial/revlog.py @@ -204,6 +204,7 @@ class revlog(object): self._basecache = None self._chunkcache = (0, '') self._chunkcachesize = 65536 + self._maxchainlen = None self.index = [] self._pcache = {} self._nodecache = {nullid: nullrev} @@ -219,6 +220,8 @@ class revlog(object): v = 0 if 'chunkcachesize' in opts: self._chunkcachesize = opts['chunkcachesize'] + if 'maxchainlen' in opts: + self._maxchainlen = opts['maxchainlen'] if self._chunkcachesize <= 0: raise RevlogError(_('revlog chunk cache size %r is not greater ' @@ -1216,11 +1219,13 @@ class revlog(object): base = rev else: base = chainbase - return dist, l, data, base, chainbase + chainlen = self.chainlen(rev) + 1 + return dist, l, data, base, chainbase, chainlen curr = len(self) prev = curr - 1 base = chainbase = curr + chainlen = None offset = self.end(prev) flags = 0 d = None @@ -1240,7 +1245,7 @@ class revlog(object): d = builddelta(prev) else: d = builddelta(prev) - dist, l, data, base, chainbase = d + dist, l, data, base, chainbase, chainlen = d # full versions are inserted when the needed deltas # become comparable to the uncompressed text @@ -1249,7 +1254,8 @@ class revlog(object): cachedelta[1]) else: textlen = len(text) - if d is None or dist > textlen * 2: + if (d is None or dist > textlen * 2 or + self._maxchainlen and chainlen > self._maxchainlen): text = buildtext() data = self.compress(text) l = len(data[1]) + len(data[0]) diff --git a/tests/test-debugcommands.t b/tests/test-debugcommands.t --- a/tests/test-debugcommands.t +++ b/tests/test-debugcommands.t @@ -24,6 +24,40 @@ full revision size (min/max/avg) : 44 / 44 / 44 delta size (min/max/avg) : 0 / 0 / 0 +Test max chain len + $ cat >> $HGRCPATH << EOF + > [revlog] + > maxchainlen=4 + > EOF + + $ echo "This test checks if maxchainlen config value is respected also it can serve as basic test for debugrevlog -d .\n" >> a + $ hg ci -m a + $ echo "b\n" >> a + $ hg ci -m a + $ echo "c\n" >> a + $ hg ci -m a + $ echo "d\n" >> a + $ hg ci -m a + $ echo "e\n" >> a + $ hg ci -m a + $ echo "f\n" >> a + $ hg ci -m a + $ echo 'g\n' >> a + $ hg ci -m a + $ echo 'h\n' >> a + $ hg ci -m a + $ hg debugrevlog -d a + # rev p1rev p2rev start end deltastart base p1 p2 rawsize totalsize compression heads chainlen + 0 -1 -1 0 ??? 0 0 0 0 ??? ???? ? 1 0 (glob) + 1 0 -1 ??? ??? 0 0 0 0 ??? ???? ? 1 1 (glob) + 2 1 -1 ??? ??? ??? ??? ??? 0 ??? ???? ? 1 2 (glob) + 3 2 -1 ??? ??? ??? ??? ??? 0 ??? ???? ? 1 3 (glob) + 4 3 -1 ??? ??? ??? ??? ??? 0 ??? ???? ? 1 4 (glob) + 5 4 -1 ??? ??? ??? ??? ??? 0 ??? ???? ? 1 0 (glob) + 6 5 -1 ??? ??? ??? ??? ??? 0 ??? ???? ? 1 1 (glob) + 7 6 -1 ??? ??? ??? ??? ??? 0 ??? ???? ? 1 2 (glob) + 8 7 -1 ??? ??? ??? ??? ??? 0 ??? ???? ? 1 3 (glob) + $ cd .. Test internal debugstacktrace command