# HG changeset patch # User Franck Bret # Date 2022-12-19 15:22:01 # Node ID b1e4c74beb6f8b916817da754aa2dcf2ed45c331 # Parent 3fd5824f1177e2d8ebb77cc2dde628c83a47e94c debug: add debug-revlog-stats command Display statistics about revlogs in the store. Useful to get an approximate size of a repository, etc. More statistics will be added in the future. diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py --- a/mercurial/debugcommands.py +++ b/mercurial/debugcommands.py @@ -3809,6 +3809,33 @@ def debugshell(ui, repo): @command( + b'debug-revlog-stats', + [ + (b'c', b'changelog', None, _(b'Display changelog statistics')), + (b'm', b'manifest', None, _(b'Display manifest statistics')), + (b'f', b'filelogs', None, _(b'Display filelogs statistics')), + ] + + cmdutil.formatteropts, +) +def debug_revlog_stats(ui, repo, **opts): + """display statistics about revlogs in the store""" + opts = pycompat.byteskwargs(opts) + changelog = opts[b"changelog"] + manifest = opts[b"manifest"] + filelogs = opts[b"filelogs"] + + if changelog is None and manifest is None and filelogs is None: + changelog = True + manifest = True + filelogs = True + + repo = repo.unfiltered() + fm = ui.formatter(b'debug-revlog-stats', opts) + revlog_debug.debug_revlog_stats(repo, fm, changelog, manifest, filelogs) + fm.end() + + +@command( b'debugsuccessorssets', [(b'', b'closest', False, _(b'return closest successors sets only'))], _(b'[REV]'), diff --git a/mercurial/revlogutils/debug.py b/mercurial/revlogutils/debug.py --- a/mercurial/revlogutils/debug.py +++ b/mercurial/revlogutils/debug.py @@ -661,3 +661,61 @@ def debug_delta_find(ui, revlog, rev, ba fh = revlog._datafp() deltacomputer.finddeltainfo(revinfo, fh, target_rev=rev) + + +def _get_revlogs(repo, changelog: bool, manifest: bool, filelogs: bool): + """yield revlogs from this repository""" + if changelog: + yield repo.changelog + + if manifest: + # XXX: Handle tree manifest + root_mf = repo.manifestlog.getstorage(b'') + assert not root_mf._treeondisk + yield root_mf._revlog + + if filelogs: + files = set() + for rev in repo: + ctx = repo[rev] + files |= set(ctx.files()) + + for f in sorted(files): + yield repo.file(f)._revlog + + +def debug_revlog_stats( + repo, fm, changelog: bool, manifest: bool, filelogs: bool +): + """Format revlog statistics for debugging purposes + + fm: the output formatter. + """ + fm.plain(b'rev-count data-size inl type target \n') + + for rlog in _get_revlogs(repo, changelog, manifest, filelogs): + fm.startitem() + nb_rev = len(rlog) + inline = rlog._inline + data_size = rlog._get_data_offset(nb_rev - 1) + + target = rlog.target + revlog_type = b'unknown' + revlog_target = b'' + if target[0] == constants.KIND_CHANGELOG: + revlog_type = b'changelog' + elif target[0] == constants.KIND_MANIFESTLOG: + revlog_type = b'manifest' + revlog_target = target[1] + elif target[0] == constants.KIND_FILELOG: + revlog_type = b'file' + revlog_target = target[1] + + fm.write(b'revlog.rev-count', b'%9d', nb_rev) + fm.write(b'revlog.data-size', b'%12d', data_size) + + fm.write(b'revlog.inline', b' %-3s', b'yes' if inline else b'no') + fm.write(b'revlog.type', b' %-9s', revlog_type) + fm.write(b'revlog.target', b' %s', revlog_target) + + fm.plain(b'\n') diff --git a/tests/test-completion.t b/tests/test-completion.t --- a/tests/test-completion.t +++ b/tests/test-completion.t @@ -77,6 +77,7 @@ Show debug commands if there are no othe debug-delta-find debug-repair-issue6528 debug-revlog-index + debug-revlog-stats debugancestor debugantivirusrunning debugapplystreamclonebundle @@ -271,6 +272,7 @@ Show all commands + options debug-delta-find: changelog, manifest, dir, template, source debug-repair-issue6528: to-report, from-report, paranoid, dry-run debug-revlog-index: changelog, manifest, dir, template + debug-revlog-stats: changelog, manifest, filelogs, template debugancestor: debugantivirusrunning: debugapplystreamclonebundle: diff --git a/tests/test-debug-revlog-stats.t b/tests/test-debug-revlog-stats.t new file mode 100644 --- /dev/null +++ b/tests/test-debug-revlog-stats.t @@ -0,0 +1,77 @@ +Force revlog max inline value to be smaller than default + + $ mkdir $TESTTMP/ext + $ cat << EOF > $TESTTMP/ext/small_inline.py + > from mercurial import revlog + > revlog._maxinline = 8 + > EOF + + $ cat << EOF >> $HGRCPATH + > [extensions] + > small_inline=$TESTTMP/ext/small_inline.py + > EOF + + $ hg init repo + $ cd repo + +Try on an empty repository + + $ hg debug-revlog-stats + rev-count data-size inl type target + 0 0 yes changelog + 0 0 yes manifest + + $ mkdir folder + $ touch a b folder/c folder/d + $ hg commit -Aqm 0 + $ echo "text" > a + $ hg rm b + $ echo "longer string" > folder/d + $ hg commit -Aqm 1 + +Differences in data size observed with pure is due to different compression +algorithms + + $ hg debug-revlog-stats + rev-count data-size inl type target + 2 138 no changelog (no-pure !) + 2 137 no changelog (pure !) + 2 177 no manifest (no-pure !) + 2 168 no manifest (pure !) + 2 6 yes file a + 1 0 yes file b + 1 0 yes file folder/c + 2 15 no file folder/d + +Test 'changelog' command argument + + $ hg debug-revlog-stats -c + rev-count data-size inl type target + 2 138 no changelog (no-pure !) + 2 137 no changelog (pure !) + +Test 'manifest' command argument + + $ hg debug-revlog-stats -m + rev-count data-size inl type target + 2 177 no manifest (no-pure !) + 2 168 no manifest (pure !) + +Test 'file' command argument + + $ hg debug-revlog-stats -f + rev-count data-size inl type target + 2 6 yes file a + 1 0 yes file b + 1 0 yes file folder/c + 2 15 no file folder/d + +Test multiple command arguments + + $ hg debug-revlog-stats -cm + rev-count data-size inl type target + 2 138 no changelog (no-pure !) + 2 137 no changelog (pure !) + 2 177 no manifest (no-pure !) + 2 168 no manifest (pure !) + diff --git a/tests/test-help.t b/tests/test-help.t --- a/tests/test-help.t +++ b/tests/test-help.t @@ -985,6 +985,8 @@ Test list of internal help commands details. debug-revlog-index dump index data for a revlog + debug-revlog-stats + display statistics about revlogs in the store debugancestor find the ancestor revision of two revisions in a given index debugantivirusrunning