# HG changeset patch # User Pierre-Yves David # Date 2022-05-20 12:27:46 # Node ID b909dd35d9abb16f292aae0f0dda21153c699c82 # Parent 2bcf5e14bb7ee09a088ab8f9662a1a5921d638d3 deltas: add a debug-delta-find command to analyse delta search See command documentation for details. For some reason, pytype is confused by our usage of None/deltainfo variable, so I had to quiet it. diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py --- a/mercurial/debugcommands.py +++ b/mercurial/debugcommands.py @@ -73,6 +73,7 @@ from . import ( repoview, requirements, revlog, + revlogutils, revset, revsetlang, scmutil, @@ -989,6 +990,65 @@ def debugdeltachain(ui, repo, file_=None @command( + b'debug-delta-find', + cmdutil.debugrevlogopts + cmdutil.formatteropts, + _(b'-c|-m|FILE REV'), + optionalrepo=True, +) +def debugdeltafind(ui, repo, arg_1, arg_2=None, **opts): + """display the computation to get to a valid delta for storing REV + + This command will replay the process used to find the "best" delta to store + a revision and display information about all the steps used to get to that + result. + + The revision use the revision number of the target storage (not changelog + revision number). + + note: the process is initiated from a full text of the revision to store. + """ + opts = pycompat.byteskwargs(opts) + if arg_2 is None: + file_ = None + rev = arg_1 + else: + file_ = arg_1 + rev = arg_2 + + rev = int(rev) + + revlog = cmdutil.openrevlog(repo, b'debugdeltachain', file_, opts) + + deltacomputer = deltautil.deltacomputer( + revlog, + write_debug=ui.write, + debug_search=True, + ) + + node = revlog.node(rev) + p1r, p2r = revlog.parentrevs(rev) + p1 = revlog.node(p1r) + p2 = revlog.node(p2r) + btext = [revlog.revision(rev)] + textlen = len(btext[0]) + cachedelta = None + flags = revlog.flags(rev) + + revinfo = revlogutils.revisioninfo( + node, + p1, + p2, + btext, + textlen, + cachedelta, + flags, + ) + + fh = revlog._datafp() + deltacomputer.finddeltainfo(revinfo, fh, target_rev=rev) + + +@command( b'debugdirstate|debugstate', [ ( diff --git a/mercurial/revlogutils/deltas.py b/mercurial/revlogutils/deltas.py --- a/mercurial/revlogutils/deltas.py +++ b/mercurial/revlogutils/deltas.py @@ -931,9 +931,10 @@ def _rawgroups(revlog, p1, p2, cachedelt class deltacomputer: - def __init__(self, revlog, write_debug=None): + def __init__(self, revlog, write_debug=None, debug_search=False): self.revlog = revlog self._write_debug = write_debug + self._debug_search = debug_search def buildtext(self, revinfo, fh): """Builds a fulltext version of a revision @@ -980,6 +981,7 @@ class deltacomputer: def _builddeltainfo(self, revinfo, base, fh): # can we use the cached delta? revlog = self.revlog + debug_search = self._write_debug is not None and self._debug_search chainbase = revlog.chainbase(base) if revlog._generaldelta: deltabase = base @@ -1009,13 +1011,27 @@ class deltacomputer: delta = revinfo.cachedelta[1] if delta is None: delta = self._builddeltadiff(base, revinfo, fh) + if debug_search: + msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n" + msg %= len(delta) + self._write_debug(msg) # snapshotdept need to be neither None nor 0 level snapshot if revlog.upperboundcomp is not None and snapshotdepth: lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp snapshotlimit = revinfo.textlen >> snapshotdepth + if debug_search: + msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n" + msg %= lowestrealisticdeltalen + self._write_debug(msg) if snapshotlimit < lowestrealisticdeltalen: + if debug_search: + msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n" + self._write_debug(msg) return None if revlog.length(base) < lowestrealisticdeltalen: + if debug_search: + msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n" + self._write_debug(msg) return None header, data = revlog.compress(delta) deltalen = len(header) + len(data) @@ -1090,6 +1106,8 @@ class deltacomputer: if self._write_debug is not None: start = util.timer() + debug_search = self._write_debug is not None and self._debug_search + # count the number of different delta we tried (for debug purpose) dbg_try_count = 0 # count the number of "search round" we did. (for debug purpose) @@ -1113,6 +1131,10 @@ class deltacomputer: p2_chain_len = revlog._chaininfo(p2r)[0] else: p2_chain_len = -1 + if debug_search: + msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n" + msg %= target_rev + self._write_debug(msg) groups = _candidategroups( self.revlog, revinfo.textlen, p1r, p2r, cachedelta @@ -1120,21 +1142,93 @@ class deltacomputer: candidaterevs = next(groups) while candidaterevs is not None: dbg_try_rounds += 1 + if debug_search: + prev = None + if deltainfo is not None: + prev = deltainfo.base + + if p1 in candidaterevs or p2 in candidaterevs: + round_type = b"parents" + elif prev is not None and all(c < prev for c in candidaterevs): + round_type = b"refine-down" + elif prev is not None and all(c > prev for c in candidaterevs): + round_type = b"refine-up" + else: + round_type = b"search-down" + msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n" + msg %= (dbg_try_rounds, len(candidaterevs), round_type) + self._write_debug(msg) nominateddeltas = [] if deltainfo is not None: + if debug_search: + msg = ( + b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n" + ) + msg %= (deltainfo.base, deltainfo.deltalen) + self._write_debug(msg) # if we already found a good delta, # challenge it against refined candidates nominateddeltas.append(deltainfo) for candidaterev in candidaterevs: + if debug_search: + msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n" + msg %= candidaterev + self._write_debug(msg) + candidate_type = None + if candidaterev == p1: + candidate_type = b"p1" + elif candidaterev == p2: + candidate_type = b"p2" + elif self.revlog.issnapshot(candidaterev): + candidate_type = b"snapshot-%d" + candidate_type %= self.revlog.snapshotdepth( + candidaterev + ) + + if candidate_type is not None: + msg = b"DBG-DELTAS-SEARCH: type=%s\n" + msg %= candidate_type + self._write_debug(msg) + msg = b"DBG-DELTAS-SEARCH: size=%d\n" + msg %= self.revlog.length(candidaterev) + self._write_debug(msg) + msg = b"DBG-DELTAS-SEARCH: base=%d\n" + msg %= self.revlog.deltaparent(candidaterev) + self._write_debug(msg) if candidaterev in excluded_bases: + if debug_search: + msg = b"DBG-DELTAS-SEARCH: EXCLUDED\n" + self._write_debug(msg) continue if candidaterev >= target_rev: + if debug_search: + msg = b"DBG-DELTAS-SEARCH: TOO-HIGH\n" + self._write_debug(msg) continue dbg_try_count += 1 + + if debug_search: + delta_start = util.timer() candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh) + if debug_search: + delta_end = util.timer() + msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n" + msg %= delta_end - delta_start + self._write_debug(msg) if candidatedelta is not None: if isgooddeltainfo(self.revlog, candidatedelta, revinfo): + if debug_search: + msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n" + msg %= candidatedelta.deltalen + self._write_debug(msg) nominateddeltas.append(candidatedelta) + elif debug_search: + msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n" + msg %= candidatedelta.deltalen + self._write_debug(msg) + elif debug_search: + msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n" + self._write_debug(msg) if nominateddeltas: deltainfo = min(nominateddeltas, key=lambda x: x.deltalen) if deltainfo is not None: @@ -1145,7 +1239,7 @@ class deltacomputer: if deltainfo is None: dbg_type = b"full" deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev) - elif deltainfo.snapshotdepth: + elif deltainfo.snapshotdepth: # pytype: disable=attribute-error dbg_type = b"snapshot" else: dbg_type = b"delta" @@ -1161,8 +1255,13 @@ class deltacomputer: 'p1-chain-len': p1_chain_len, 'p2-chain-len': p2_chain_len, } - if deltainfo.snapshotdepth is not None: - dbg['snapshot-depth'] = deltainfo.snapshotdepth + if ( + deltainfo.snapshotdepth # pytype: disable=attribute-error + is not None + ): + dbg[ + 'snapshot-depth' + ] = deltainfo.snapshotdepth # pytype: disable=attribute-error else: dbg['snapshot-depth'] = 0 target_revlog = b"UNKNOWN" diff --git a/tests/test-completion.t b/tests/test-completion.t --- a/tests/test-completion.t +++ b/tests/test-completion.t @@ -74,6 +74,7 @@ Do not show debug commands if there are Show debug commands if there are no other candidates $ hg debugcomplete debug + debug-delta-find debug-repair-issue6528 debugancestor debugantivirusrunning @@ -267,6 +268,7 @@ Show all commands + options config: untrusted, exp-all-known, edit, local, source, shared, non-shared, global, template continue: dry-run copy: forget, after, at-rev, force, include, exclude, dry-run + debug-delta-find: changelog, manifest, dir, template debug-repair-issue6528: to-report, from-report, paranoid, dry-run debugancestor: debugantivirusrunning: diff --git a/tests/test-help.t b/tests/test-help.t --- a/tests/test-help.t +++ b/tests/test-help.t @@ -978,6 +978,8 @@ Test list of internal help commands $ hg help debug debug commands (internal and unsupported): + debug-delta-find + display the computation to get to a valid delta for storing REV debug-repair-issue6528 find affected revisions and repair them. See issue6528 for more details. diff --git a/tests/test-sparse-revlog.t b/tests/test-sparse-revlog.t --- a/tests/test-sparse-revlog.t +++ b/tests/test-sparse-revlog.t @@ -149,4 +149,46 @@ repeatedly while some of it changes rare deltas against p2 : 63 ( 1.36%) deltas against other : 0 ( 0.00%) + +Test `debug-delta-find` +----------------------- + + $ ls -1 + SPARSE-REVLOG-TEST-FILE + $ hg debugdeltachain SPARSE-REVLOG-TEST-FILE | grep other | tail -1 + 4971 3 5 4930 other 19179 346472 427596 1.23414 15994877 15567281 36.40652 427596 179288 1.00000 5 + $ hg debug-delta-find SPARSE-REVLOG-TEST-FILE 4971 + DBG-DELTAS-SEARCH: SEARCH rev=4971 + DBG-DELTAS-SEARCH: ROUND #1 - 2 candidates - search-down + DBG-DELTAS-SEARCH: CANDIDATE: rev=4962 + DBG-DELTAS-SEARCH: type=snapshot-4 + DBG-DELTAS-SEARCH: size=18296 + DBG-DELTAS-SEARCH: base=4930 + DBG-DELTAS-SEARCH: uncompressed-delta-size=30377 + DBG-DELTAS-SEARCH: delta-search-time=* (glob) + DBG-DELTAS-SEARCH: DELTA: length=16872 (BAD) + DBG-DELTAS-SEARCH: CANDIDATE: rev=4971 + DBG-DELTAS-SEARCH: type=snapshot-4 + DBG-DELTAS-SEARCH: size=19179 + DBG-DELTAS-SEARCH: base=4930 + DBG-DELTAS-SEARCH: TOO-HIGH + DBG-DELTAS-SEARCH: ROUND #2 - 1 candidates - search-down + DBG-DELTAS-SEARCH: CANDIDATE: rev=4930 + DBG-DELTAS-SEARCH: type=snapshot-3 + DBG-DELTAS-SEARCH: size=39228 + DBG-DELTAS-SEARCH: base=4799 + DBG-DELTAS-SEARCH: uncompressed-delta-size=33050 + DBG-DELTAS-SEARCH: delta-search-time=* (glob) + DBG-DELTAS-SEARCH: DELTA: length=19179 (GOOD) + DBG-DELTAS-SEARCH: ROUND #3 - 1 candidates - refine-down + DBG-DELTAS-SEARCH: CONTENDER: rev=4930 - length=19179 + DBG-DELTAS-SEARCH: CANDIDATE: rev=4799 + DBG-DELTAS-SEARCH: type=snapshot-2 + DBG-DELTAS-SEARCH: size=50213 + DBG-DELTAS-SEARCH: base=4623 + DBG-DELTAS-SEARCH: uncompressed-delta-size=82661 + DBG-DELTAS-SEARCH: delta-search-time=* (glob) + DBG-DELTAS-SEARCH: DELTA: length=49132 (BAD) + DBG-DELTAS: FILELOG:SPARSE-REVLOG-TEST-FILE: rev=4971: search-rounds=3 try-count=3 - delta-type=snapshot snap-depth=4 - p1-chain-length=15 p2-chain-length=-1 - duration=* (glob) + $ cd ..