# HG changeset patch # User Pierre-Yves David # Date 2022-10-17 20:19:43 # Node ID 35d4c2124073f97e13f247ddf7d1a3958109924d # Parent 9cac281eb9c001e4c8a21404ed9fba07651ab2af debug: add an option to display statistic about a unbundling operation This will helps a lot to understand how the bundling decision have actually impacted pull/unbundle on the other side. diff --git a/mercurial/changegroup.py b/mercurial/changegroup.py --- a/mercurial/changegroup.py +++ b/mercurial/changegroup.py @@ -105,6 +105,164 @@ def writechunks(ui, chunks, filename, vf os.unlink(cleanup) +def _dbg_ubdl_line( + ui, + indent, + key, + base_value=None, + percentage_base=None, + percentage_key=None, +): + """Print one line of debug_unbundle_debug_info""" + line = b"DEBUG-UNBUNDLING: " + line += b' ' * (2 * indent) + key += b":" + padding = b'' + if base_value is not None: + assert len(key) + 1 + (2 * indent) <= _KEY_PART_WIDTH + line += key.ljust(_KEY_PART_WIDTH - (2 * indent)) + if isinstance(base_value, float): + line += b"%14.3f seconds" % base_value + else: + line += b"%10d" % base_value + padding = b' ' + else: + line += key + + if percentage_base is not None: + line += padding + padding = b'' + assert base_value is not None + percentage = base_value * 100 // percentage_base + if percentage_key is not None: + line += b" (%3d%% of %s)" % ( + percentage, + percentage_key, + ) + else: + line += b" (%3d%%)" % percentage + + line += b'\n' + ui.write_err(line) + + +def _sumf(items): + # python < 3.8 does not support a `start=0.0` argument to sum + # So we have to cheat a bit until we drop support for those version + if not items: + return 0.0 + return sum(items) + + +def display_unbundle_debug_info(ui, debug_info): + """display an unbundling report from debug information""" + cl_info = [] + mn_info = [] + fl_info = [] + _dispatch = [ + (b'CHANGELOG:', cl_info), + (b'MANIFESTLOG:', mn_info), + (b'FILELOG:', fl_info), + ] + for e in debug_info: + for prefix, info in _dispatch: + if e["target-revlog"].startswith(prefix): + info.append(e) + break + else: + assert False, 'unreachable' + each_info = [ + (b'changelog', cl_info), + (b'manifests', mn_info), + (b'files', fl_info), + ] + + # General Revision Countss + _dbg_ubdl_line(ui, 0, b'revisions', len(debug_info)) + for key, info in each_info: + if not info: + continue + _dbg_ubdl_line(ui, 1, key, len(info), len(debug_info)) + + # General Time spent + all_durations = [e['duration'] for e in debug_info] + all_durations.sort() + total_duration = _sumf(all_durations) + _dbg_ubdl_line(ui, 0, b'total-time', total_duration) + + for key, info in each_info: + if not info: + continue + durations = [e['duration'] for e in info] + durations.sort() + _dbg_ubdl_line(ui, 1, key, _sumf(durations), total_duration) + + # Count and cache reuse per delta types + each_types = {} + for key, info in each_info: + each_types[key] = types = { + b'full': 0, + b'full-cached': 0, + b'snapshot': 0, + b'snapshot-cached': 0, + b'delta': 0, + b'delta-cached': 0, + b'unknown': 0, + b'unknown-cached': 0, + } + for e in info: + types[e['type']] += 1 + if e['using-cached-base']: + types[e['type'] + b'-cached'] += 1 + + EXPECTED_TYPES = (b'full', b'snapshot', b'delta', b'unknown') + if debug_info: + _dbg_ubdl_line(ui, 0, b'type-count') + for key, info in each_info: + if info: + _dbg_ubdl_line(ui, 1, key) + t = each_types[key] + for tn in EXPECTED_TYPES: + if t[tn]: + tc = tn + b'-cached' + _dbg_ubdl_line(ui, 2, tn, t[tn]) + _dbg_ubdl_line(ui, 3, b'cached', t[tc], t[tn]) + + # time perf delta types and reuse + each_type_time = {} + for key, info in each_info: + each_type_time[key] = t = { + b'full': [], + b'full-cached': [], + b'snapshot': [], + b'snapshot-cached': [], + b'delta': [], + b'delta-cached': [], + b'unknown': [], + b'unknown-cached': [], + } + for e in info: + t[e['type']].append(e['duration']) + if e['using-cached-base']: + t[e['type'] + b'-cached'].append(e['duration']) + for t_key, value in list(t.items()): + value.sort() + t[t_key] = _sumf(value) + + if debug_info: + _dbg_ubdl_line(ui, 0, b'type-time') + for key, info in each_info: + if info: + _dbg_ubdl_line(ui, 1, key) + t = each_type_time[key] + td = total_duration # to same space on next lines + for tn in EXPECTED_TYPES: + if t[tn]: + tc = tn + b'-cached' + _dbg_ubdl_line(ui, 2, tn, t[tn], td, b"total") + _dbg_ubdl_line(ui, 3, b'cached', t[tc], td, b"total") + + class cg1unpacker: """Unpacker for cg1 changegroup streams. @@ -254,7 +412,15 @@ class cg1unpacker: pos = next yield closechunk() - def _unpackmanifests(self, repo, revmap, trp, prog, addrevisioncb=None): + def _unpackmanifests( + self, + repo, + revmap, + trp, + prog, + addrevisioncb=None, + debug_info=None, + ): self.callback = prog.increment # no need to check for empty manifest group here: # if the result of the merge of 1 and 2 is the same in 3 and 4, @@ -263,7 +429,13 @@ class cg1unpacker: self.manifestheader() deltas = self.deltaiter() storage = repo.manifestlog.getstorage(b'') - storage.addgroup(deltas, revmap, trp, addrevisioncb=addrevisioncb) + storage.addgroup( + deltas, + revmap, + trp, + addrevisioncb=addrevisioncb, + debug_info=debug_info, + ) prog.complete() self.callback = None @@ -292,6 +464,10 @@ class cg1unpacker: """ repo = repo.unfiltered() + debug_info = None + if repo.ui.configbool(b'debug', b'unbundling-stats'): + debug_info = [] + # Only useful if we're adding sidedata categories. If both peers have # the same categories, then we simply don't do anything. adding_sidedata = ( @@ -366,6 +542,7 @@ class cg1unpacker: alwayscache=True, addrevisioncb=onchangelog, duplicaterevisioncb=ondupchangelog, + debug_info=debug_info, ): repo.ui.develwarn( b'applied empty changelog from changegroup', @@ -413,6 +590,7 @@ class cg1unpacker: trp, progress, addrevisioncb=on_manifest_rev, + debug_info=debug_info, ) needfiles = {} @@ -449,6 +627,7 @@ class cg1unpacker: efiles, needfiles, addrevisioncb=on_filelog_rev, + debug_info=debug_info, ) if sidedata_helpers: @@ -567,6 +746,8 @@ class cg1unpacker: b'changegroup-runhooks-%020i' % clstart, lambda tr: repo._afterlock(runhooks), ) + if debug_info is not None: + display_unbundle_debug_info(repo.ui, debug_info) finally: repo.ui.flush() # never return 0 here: @@ -626,9 +807,22 @@ class cg3unpacker(cg2unpacker): protocol_flags = 0 return node, p1, p2, deltabase, cs, flags, protocol_flags - def _unpackmanifests(self, repo, revmap, trp, prog, addrevisioncb=None): + def _unpackmanifests( + self, + repo, + revmap, + trp, + prog, + addrevisioncb=None, + debug_info=None, + ): super(cg3unpacker, self)._unpackmanifests( - repo, revmap, trp, prog, addrevisioncb=addrevisioncb + repo, + revmap, + trp, + prog, + addrevisioncb=addrevisioncb, + debug_info=debug_info, ) for chunkdata in iter(self.filelogheader, {}): # If we get here, there are directory manifests in the changegroup @@ -636,7 +830,11 @@ class cg3unpacker(cg2unpacker): repo.ui.debug(b"adding %s revisions\n" % d) deltas = self.deltaiter() if not repo.manifestlog.getstorage(d).addgroup( - deltas, revmap, trp, addrevisioncb=addrevisioncb + deltas, + revmap, + trp, + addrevisioncb=addrevisioncb, + debug_info=debug_info, ): raise error.Abort(_(b"received dir revlog group is empty")) @@ -2173,6 +2371,7 @@ def _addchangegroupfiles( expectedfiles, needfiles, addrevisioncb=None, + debug_info=None, ): revisions = 0 files = 0 @@ -2193,6 +2392,7 @@ def _addchangegroupfiles( revmap, trp, addrevisioncb=addrevisioncb, + debug_info=debug_info, ) if not added: raise error.Abort(_(b"received file revlog group is empty")) diff --git a/mercurial/configitems.py b/mercurial/configitems.py --- a/mercurial/configitems.py +++ b/mercurial/configitems.py @@ -594,6 +594,12 @@ coreconfigitem( b'bundling-stats', default=False, ) +# display extra information about the unbundling process +coreconfigitem( + b'debug', + b'unbundling-stats', + default=False, +) coreconfigitem( b'defaults', b'.*', diff --git a/mercurial/filelog.py b/mercurial/filelog.py --- a/mercurial/filelog.py +++ b/mercurial/filelog.py @@ -153,6 +153,7 @@ class filelog: addrevisioncb=None, duplicaterevisioncb=None, maybemissingparents=False, + debug_info=None, ): if maybemissingparents: raise error.Abort( @@ -173,6 +174,7 @@ class filelog: transaction, addrevisioncb=addrevisioncb, duplicaterevisioncb=duplicaterevisioncb, + debug_info=debug_info, ) def getstrippoint(self, minlink): diff --git a/mercurial/manifest.py b/mercurial/manifest.py --- a/mercurial/manifest.py +++ b/mercurial/manifest.py @@ -1856,6 +1856,7 @@ class manifestrevlog: alwayscache=False, addrevisioncb=None, duplicaterevisioncb=None, + debug_info=None, ): return self._revlog.addgroup( deltas, @@ -1864,6 +1865,7 @@ class manifestrevlog: alwayscache=alwayscache, addrevisioncb=addrevisioncb, duplicaterevisioncb=duplicaterevisioncb, + debug_info=debug_info, ) def rawsize(self, rev): diff --git a/mercurial/revlog.py b/mercurial/revlog.py --- a/mercurial/revlog.py +++ b/mercurial/revlog.py @@ -2640,6 +2640,7 @@ class revlog: alwayscache=False, addrevisioncb=None, duplicaterevisioncb=None, + debug_info=None, ): """ add a delta group @@ -2665,6 +2666,7 @@ class revlog: deltacomputer = deltautil.deltacomputer( self, write_debug=write_debug, + debug_info=debug_info, ) # loop through our set of deltas for data in deltas: diff --git a/mercurial/revlogutils/deltas.py b/mercurial/revlogutils/deltas.py --- a/mercurial/revlogutils/deltas.py +++ b/mercurial/revlogutils/deltas.py @@ -674,7 +674,12 @@ def _candidategroups(revlog, textlen, p1 deltas_limit = textlen * LIMIT_DELTA2TEXT tested = {nullrev} - candidates = _refinedgroups(revlog, p1, p2, cachedelta) + candidates = _refinedgroups( + revlog, + p1, + p2, + cachedelta, + ) while True: temptative = candidates.send(good) if temptative is None: @@ -703,6 +708,7 @@ def _candidategroups(revlog, textlen, p1 # no delta for rawtext-changing revs (see "candelta" for why) if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS: continue + # If we reach here, we are about to build and test a delta. # The delta building process will compute the chaininfo in all # case, since that computation is cached, it is fine to access it @@ -768,15 +774,28 @@ def _refinedgroups(revlog, p1, p2, cache # This logic only applies to general delta repositories and can be disabled # through configuration. Disabling reuse source delta is useful when # we want to make sure we recomputed "optimal" deltas. + debug_info = None if cachedelta and revlog._generaldelta and revlog._lazydeltabase: # Assume what we received from the server is a good choice # build delta will reuse the cache + if debug_info is not None: + debug_info['cached-delta.tested'] += 1 good = yield (cachedelta[0],) if good is not None: + if debug_info is not None: + debug_info['cached-delta.accepted'] += 1 yield None return + # XXX cache me higher snapshots = collections.defaultdict(list) - for candidates in _rawgroups(revlog, p1, p2, cachedelta, snapshots): + groups = _rawgroups( + revlog, + p1, + p2, + cachedelta, + snapshots, + ) + for candidates in groups: good = yield candidates if good is not None: break @@ -805,7 +824,10 @@ def _refinedgroups(revlog, p1, p2, cache children = tuple(sorted(c for c in snapshots[good])) good = yield children - # we have found nothing + if debug_info is not None: + if good is None: + debug_info['no-solution'] += 1 + yield None @@ -931,10 +953,17 @@ def _rawgroups(revlog, p1, p2, cachedelt class deltacomputer: - def __init__(self, revlog, write_debug=None, debug_search=False): + def __init__( + self, + revlog, + write_debug=None, + debug_search=False, + debug_info=None, + ): self.revlog = revlog self._write_debug = write_debug self._debug_search = debug_search + self._debug_info = debug_info def buildtext(self, revinfo, fh): """Builds a fulltext version of a revision @@ -1103,11 +1132,14 @@ class deltacomputer: if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS: return self._fullsnapshotinfo(fh, revinfo, target_rev) - if self._write_debug is not None: + gather_debug = ( + self._write_debug is not None or self._debug_info is not None + ) + debug_search = self._write_debug is not None and self._debug_search + + if gather_debug: start = util.timer() - debug_search = self._write_debug is not None and self._debug_search - # count the number of different delta we tried (for debug purpose) dbg_try_count = 0 # count the number of "search round" we did. (for debug purpose) @@ -1122,7 +1154,7 @@ class deltacomputer: deltainfo = None p1r, p2r = revlog.rev(p1), revlog.rev(p2) - if self._write_debug is not None: + if gather_debug: if p1r != nullrev: p1_chain_len = revlog._chaininfo(p1r)[0] else: @@ -1250,9 +1282,8 @@ class deltacomputer: else: dbg_type = b"delta" - if self._write_debug is not None: + if gather_debug: end = util.timer() - assert deltainfo is not None # please pytype used_cached = ( cachedelta is not None and dbg_try_rounds == 1 @@ -1262,7 +1293,7 @@ class deltacomputer: dbg = { 'duration': end - start, 'revision': target_rev, - 'delta-base': deltainfo.base, + 'delta-base': deltainfo.base, # pytype: disable=attribute-error 'search_round_count': dbg_try_rounds, 'using-cached-base': used_cached, 'delta_try_count': dbg_try_count, @@ -1294,35 +1325,39 @@ class deltacomputer: target_revlog += b'%s:' % target_key dbg['target-revlog'] = target_revlog - msg = ( - b"DBG-DELTAS:" - b" %-12s" - b" rev=%d:" - b" delta-base=%d" - b" is-cached=%d" - b" - search-rounds=%d" - b" try-count=%d" - b" - delta-type=%-6s" - b" snap-depth=%d" - b" - p1-chain-length=%d" - b" p2-chain-length=%d" - b" - duration=%f" - b"\n" - ) - msg %= ( - dbg["target-revlog"], - dbg["revision"], - dbg["delta-base"], - dbg["using-cached-base"], - dbg["search_round_count"], - dbg["delta_try_count"], - dbg["type"], - dbg["snapshot-depth"], - dbg["p1-chain-len"], - dbg["p2-chain-len"], - dbg["duration"], - ) - self._write_debug(msg) + if self._debug_info is not None: + self._debug_info.append(dbg) + + if self._write_debug is not None: + msg = ( + b"DBG-DELTAS:" + b" %-12s" + b" rev=%d:" + b" delta-base=%d" + b" is-cached=%d" + b" - search-rounds=%d" + b" try-count=%d" + b" - delta-type=%-6s" + b" snap-depth=%d" + b" - p1-chain-length=%d" + b" p2-chain-length=%d" + b" - duration=%f" + b"\n" + ) + msg %= ( + dbg["target-revlog"], + dbg["revision"], + dbg["delta-base"], + dbg["using-cached-base"], + dbg["search_round_count"], + dbg["delta_try_count"], + dbg["type"], + dbg["snapshot-depth"], + dbg["p1-chain-len"], + dbg["p2-chain-len"], + dbg["duration"], + ) + self._write_debug(msg) return deltainfo diff --git a/tests/test-bundle.t b/tests/test-bundle.t --- a/tests/test-bundle.t +++ b/tests/test-bundle.t @@ -1081,3 +1081,47 @@ Test the debug output when applying delt new changesets 4fe08cd4693e:4652c276ac4f (3 drafts) (run 'hg update' to get a working copy) + +Test the debug statistic when applying a bundle +----------------------------------------------- + + $ hg init bar + $ hg -R bar unbundle ./default.hg --config debug.unbundling-stats=yes + adding changesets + adding manifests + adding file changes + DEBUG-UNBUNDLING: revisions: 9 + DEBUG-UNBUNDLING: changelog: 3 ( 33%) + DEBUG-UNBUNDLING: manifests: 3 ( 33%) + DEBUG-UNBUNDLING: files: 3 ( 33%) + DEBUG-UNBUNDLING: total-time: ?????????????? seconds (glob) + DEBUG-UNBUNDLING: changelog: ?????????????? seconds (???%) (glob) + DEBUG-UNBUNDLING: manifests: ?????????????? seconds (???%) (glob) + DEBUG-UNBUNDLING: files: ?????????????? seconds (???%) (glob) + DEBUG-UNBUNDLING: type-count: + DEBUG-UNBUNDLING: changelog: + DEBUG-UNBUNDLING: full: 3 + DEBUG-UNBUNDLING: cached: 0 ( 0%) + DEBUG-UNBUNDLING: manifests: + DEBUG-UNBUNDLING: full: 1 + DEBUG-UNBUNDLING: cached: 0 ( 0%) + DEBUG-UNBUNDLING: delta: 2 + DEBUG-UNBUNDLING: cached: 2 (100%) + DEBUG-UNBUNDLING: files: + DEBUG-UNBUNDLING: full: 3 + DEBUG-UNBUNDLING: cached: 0 ( 0%) + DEBUG-UNBUNDLING: type-time: + DEBUG-UNBUNDLING: changelog: + DEBUG-UNBUNDLING: full: ?????????????? seconds (???% of total) (glob) + DEBUG-UNBUNDLING: cached: ?????????????? seconds (???% of total) (glob) + DEBUG-UNBUNDLING: manifests: + DEBUG-UNBUNDLING: full: ?????????????? seconds (???% of total) (glob) + DEBUG-UNBUNDLING: cached: ?????????????? seconds (???% of total) (glob) + DEBUG-UNBUNDLING: delta: ?????????????? seconds (???% of total) (glob) + DEBUG-UNBUNDLING: cached: ?????????????? seconds (???% of total) (glob) + DEBUG-UNBUNDLING: files: + DEBUG-UNBUNDLING: full: ?????????????? seconds (???% of total) (glob) + DEBUG-UNBUNDLING: cached: ?????????????? seconds (???% of total) (glob) + added 3 changesets with 3 changes to 3 files + new changesets 4fe08cd4693e:4652c276ac4f (3 drafts) + (run 'hg update' to get a working copy)