# revlogutils/debug.py - utility used for revlog debuging # # Copyright 2005-2007 Olivia Mackall # Copyright 2022 Octobus # # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. import collections import string from .. import ( mdiff, node as nodemod, revlogutils, util, ) from . import ( constants, deltas as deltautil, ) INDEX_ENTRY_DEBUG_COLUMN = [] NODE_SIZE = object() class _column_base: """constains the definition of a revlog column name: the column header, value_func: the function called to get a value, size: the width of the column, verbose_only: only include the column in verbose mode. """ def __init__(self, name, value_func, size=None, verbose=False): self.name = name self.value_func = value_func if size is not NODE_SIZE: if size is None: size = 8 # arbitrary default size = max(len(name), size) self._size = size self.verbose_only = verbose def get_size(self, node_size): if self._size is NODE_SIZE: return node_size else: return self._size def debug_column(name, size=None, verbose=False): """decorated function is registered as a column name: the name of the column, size: the expected size of the column. """ def register(func): entry = _column_base( name=name, value_func=func, size=size, verbose=verbose, ) INDEX_ENTRY_DEBUG_COLUMN.append(entry) return entry return register @debug_column(b"rev", size=6) def _rev(index, rev, entry, hexfn): return b"%d" % rev @debug_column(b"rank", size=6, verbose=True) def rank(index, rev, entry, hexfn): return b"%d" % entry[constants.ENTRY_RANK] @debug_column(b"linkrev", size=6) def _linkrev(index, rev, entry, hexfn): return b"%d" % entry[constants.ENTRY_LINK_REV] @debug_column(b"nodeid", size=NODE_SIZE) def _nodeid(index, rev, entry, hexfn): return hexfn(entry[constants.ENTRY_NODE_ID]) @debug_column(b"p1-rev", size=6, verbose=True) def _p1_rev(index, rev, entry, hexfn): return b"%d" % entry[constants.ENTRY_PARENT_1] @debug_column(b"p1-nodeid", size=NODE_SIZE) def _p1_node(index, rev, entry, hexfn): parent = entry[constants.ENTRY_PARENT_1] p_entry = index[parent] return hexfn(p_entry[constants.ENTRY_NODE_ID]) @debug_column(b"p2-rev", size=6, verbose=True) def _p2_rev(index, rev, entry, hexfn): return b"%d" % entry[constants.ENTRY_PARENT_2] @debug_column(b"p2-nodeid", size=NODE_SIZE) def _p2_node(index, rev, entry, hexfn): parent = entry[constants.ENTRY_PARENT_2] p_entry = index[parent] return hexfn(p_entry[constants.ENTRY_NODE_ID]) @debug_column(b"full-size", size=20, verbose=True) def full_size(index, rev, entry, hexfn): return b"%d" % entry[constants.ENTRY_DATA_UNCOMPRESSED_LENGTH] @debug_column(b"delta-base", size=6, verbose=True) def delta_base(index, rev, entry, hexfn): return b"%d" % entry[constants.ENTRY_DELTA_BASE] @debug_column(b"flags", size=2, verbose=True) def flags(index, rev, entry, hexfn): field = entry[constants.ENTRY_DATA_OFFSET] field &= 0xFFFF return b"%d" % field @debug_column(b"comp-mode", size=4, verbose=True) def compression_mode(index, rev, entry, hexfn): return b"%d" % entry[constants.ENTRY_DATA_COMPRESSION_MODE] @debug_column(b"data-offset", size=20, verbose=True) def data_offset(index, rev, entry, hexfn): field = entry[constants.ENTRY_DATA_OFFSET] field >>= 16 return b"%d" % field @debug_column(b"chunk-size", size=10, verbose=True) def data_chunk_size(index, rev, entry, hexfn): return b"%d" % entry[constants.ENTRY_DATA_COMPRESSED_LENGTH] @debug_column(b"sd-comp-mode", size=7, verbose=True) def sidedata_compression_mode(index, rev, entry, hexfn): compression = entry[constants.ENTRY_SIDEDATA_COMPRESSION_MODE] if compression == constants.COMP_MODE_PLAIN: return b"plain" elif compression == constants.COMP_MODE_DEFAULT: return b"default" elif compression == constants.COMP_MODE_INLINE: return b"inline" else: return b"%d" % compression @debug_column(b"sidedata-offset", size=20, verbose=True) def sidedata_offset(index, rev, entry, hexfn): return b"%d" % entry[constants.ENTRY_SIDEDATA_OFFSET] @debug_column(b"sd-chunk-size", size=10, verbose=True) def sidedata_chunk_size(index, rev, entry, hexfn): return b"%d" % entry[constants.ENTRY_SIDEDATA_COMPRESSED_LENGTH] def debug_index( ui, repo, formatter, revlog, full_node, ): """display index data for a revlog""" if full_node: hexfn = nodemod.hex else: hexfn = nodemod.short idlen = 12 for i in revlog: idlen = len(hexfn(revlog.node(i))) break fm = formatter header_pieces = [] for column in INDEX_ENTRY_DEBUG_COLUMN: if column.verbose_only and not ui.verbose: continue size = column.get_size(idlen) name = column.name header_pieces.append(name.rjust(size)) fm.plain(b' '.join(header_pieces) + b'\n') index = revlog.index for rev in revlog: fm.startitem() entry = index[rev] first = True for column in INDEX_ENTRY_DEBUG_COLUMN: if column.verbose_only and not ui.verbose: continue if not first: fm.plain(b' ') first = False size = column.get_size(idlen) value = column.value_func(index, rev, entry, hexfn) display = b"%%%ds" % size fm.write(column.name, display, value) fm.plain(b'\n') fm.end() def dump(ui, revlog): """perform the work for `hg debugrevlog --dump""" # XXX seems redundant with debug index ? r = revlog numrevs = len(r) ui.write( ( b"# rev p1rev p2rev start end deltastart base p1 p2" b" rawsize totalsize compression heads chainlen\n" ) ) ts = 0 heads = set() for rev in range(numrevs): dbase = r.deltaparent(rev) if dbase == -1: dbase = rev cbase = r.chainbase(rev) clen = r.chainlen(rev) p1, p2 = r.parentrevs(rev) rs = r.rawsize(rev) ts = ts + rs heads -= set(r.parentrevs(rev)) heads.add(rev) try: compression = ts / r.end(rev) except ZeroDivisionError: compression = 0 ui.write( b"%5d %5d %5d %5d %5d %10d %4d %4d %4d %7d %9d " b"%11d %5d %8d\n" % ( rev, p1, p2, r.start(rev), r.end(rev), r.start(dbase), r.start(cbase), r.start(p1), r.start(p2), rs, ts, compression, len(heads), clen, ) ) def debug_revlog(ui, revlog): """code for `hg debugrevlog`""" r = revlog format = r._format_version v = r._format_flags flags = [] gdelta = False if v & constants.FLAG_INLINE_DATA: flags.append(b'inline') if v & constants.FLAG_GENERALDELTA: gdelta = True flags.append(b'generaldelta') if not flags: flags = [b'(none)'] ### the total size of stored content if incompressed. full_text_total_size = 0 ### tracks merge vs single parent nummerges = 0 ### tracks ways the "delta" are build # nodelta numempty = 0 numemptytext = 0 numemptydelta = 0 # full file content numfull = 0 # intermediate snapshot against a prior snapshot numsemi = 0 # snapshot count per depth numsnapdepth = collections.defaultdict(lambda: 0) # number of snapshots with a non-ancestor delta numsnapdepth_nad = collections.defaultdict(lambda: 0) # delta against previous revision numprev = 0 # delta against prev, where prev is a non-ancestor numprev_nad = 0 # delta against first or second parent (not prev) nump1 = 0 nump2 = 0 # delta against neither prev nor parents numother = 0 # delta against other that is a non-ancestor numother_nad = 0 # delta against prev that are also first or second parent # (details of `numprev`) nump1prev = 0 nump2prev = 0 # data about delta chain of each revs chainlengths = [] chainbases = [] chainspans = [] # data about each revision datasize = [None, 0, 0] fullsize = [None, 0, 0] semisize = [None, 0, 0] # snapshot count per depth snapsizedepth = collections.defaultdict(lambda: [None, 0, 0]) deltasize = [None, 0, 0] chunktypecounts = {} chunktypesizes = {} def addsize(size, l): if l[0] is None or size < l[0]: l[0] = size if size > l[1]: l[1] = size l[2] += size numrevs = len(r) for rev in range(numrevs): p1, p2 = r.parentrevs(rev) delta = r.deltaparent(rev) if format > 0: s = r.rawsize(rev) full_text_total_size += s addsize(s, datasize) if p2 != nodemod.nullrev: nummerges += 1 size = r.length(rev) if delta == nodemod.nullrev: chainlengths.append(0) chainbases.append(r.start(rev)) chainspans.append(size) if size == 0: numempty += 1 numemptytext += 1 else: numfull += 1 numsnapdepth[0] += 1 addsize(size, fullsize) addsize(size, snapsizedepth[0]) else: nad = ( delta != p1 and delta != p2 and not r.isancestorrev(delta, rev) ) chainlengths.append(chainlengths[delta] + 1) baseaddr = chainbases[delta] revaddr = r.start(rev) chainbases.append(baseaddr) chainspans.append((revaddr - baseaddr) + size) if size == 0: numempty += 1 numemptydelta += 1 elif r.issnapshot(rev): addsize(size, semisize) numsemi += 1 depth = r.snapshotdepth(rev) numsnapdepth[depth] += 1 if nad: numsnapdepth_nad[depth] += 1 addsize(size, snapsizedepth[depth]) else: addsize(size, deltasize) if delta == rev - 1: numprev += 1 if delta == p1: nump1prev += 1 elif delta == p2: nump2prev += 1 elif nad: numprev_nad += 1 elif delta == p1: nump1 += 1 elif delta == p2: nump2 += 1 elif delta != nodemod.nullrev: numother += 1 numother_nad += 1 # Obtain data on the raw chunks in the revlog. if util.safehasattr(r, '_getsegmentforrevs'): segment = r._getsegmentforrevs(rev, rev)[1] else: segment = r._revlog._getsegmentforrevs(rev, rev)[1] if segment: chunktype = bytes(segment[0:1]) else: chunktype = b'empty' if chunktype not in chunktypecounts: chunktypecounts[chunktype] = 0 chunktypesizes[chunktype] = 0 chunktypecounts[chunktype] += 1 chunktypesizes[chunktype] += size # Adjust size min value for empty cases for size in (datasize, fullsize, semisize, deltasize): if size[0] is None: size[0] = 0 numdeltas = numrevs - numfull - numempty - numsemi numoprev = numprev - nump1prev - nump2prev - numprev_nad num_other_ancestors = numother - numother_nad totalrawsize = datasize[2] datasize[2] /= numrevs fulltotal = fullsize[2] if numfull == 0: fullsize[2] = 0 else: fullsize[2] /= numfull semitotal = semisize[2] snaptotal = {} if numsemi > 0: semisize[2] /= numsemi for depth in snapsizedepth: snaptotal[depth] = snapsizedepth[depth][2] snapsizedepth[depth][2] /= numsnapdepth[depth] deltatotal = deltasize[2] if numdeltas > 0: deltasize[2] /= numdeltas totalsize = fulltotal + semitotal + deltatotal avgchainlen = sum(chainlengths) / numrevs maxchainlen = max(chainlengths) maxchainspan = max(chainspans) compratio = 1 if totalsize: compratio = totalrawsize / totalsize basedfmtstr = b'%%%dd\n' basepcfmtstr = b'%%%dd %s(%%5.2f%%%%)\n' def dfmtstr(max): return basedfmtstr % len(str(max)) def pcfmtstr(max, padding=0): return basepcfmtstr % (len(str(max)), b' ' * padding) def pcfmt(value, total): if total: return (value, 100 * float(value) / total) else: return value, 100.0 ui.writenoi18n(b'format : %d\n' % format) ui.writenoi18n(b'flags : %s\n' % b', '.join(flags)) ui.write(b'\n') fmt = pcfmtstr(totalsize) fmt2 = dfmtstr(totalsize) ui.writenoi18n(b'revisions : ' + fmt2 % numrevs) ui.writenoi18n(b' merges : ' + fmt % pcfmt(nummerges, numrevs)) ui.writenoi18n( b' normal : ' + fmt % pcfmt(numrevs - nummerges, numrevs) ) ui.writenoi18n(b'revisions : ' + fmt2 % numrevs) ui.writenoi18n(b' empty : ' + fmt % pcfmt(numempty, numrevs)) ui.writenoi18n( b' text : ' + fmt % pcfmt(numemptytext, numemptytext + numemptydelta) ) ui.writenoi18n( b' delta : ' + fmt % pcfmt(numemptydelta, numemptytext + numemptydelta) ) ui.writenoi18n( b' snapshot : ' + fmt % pcfmt(numfull + numsemi, numrevs) ) for depth in sorted(numsnapdepth): base = b' lvl-%-3d : ' % depth count = fmt % pcfmt(numsnapdepth[depth], numrevs) pieces = [base, count] if numsnapdepth_nad[depth]: pieces[-1] = count = count[:-1] # drop the final '\n' more = b' non-ancestor-bases: ' anc_count = fmt anc_count %= pcfmt(numsnapdepth_nad[depth], numsnapdepth[depth]) pieces.append(more) pieces.append(anc_count) ui.write(b''.join(pieces)) ui.writenoi18n(b' deltas : ' + fmt % pcfmt(numdeltas, numrevs)) ui.writenoi18n(b'revision size : ' + fmt2 % totalsize) ui.writenoi18n( b' snapshot : ' + fmt % pcfmt(fulltotal + semitotal, totalsize) ) for depth in sorted(numsnapdepth): ui.write( (b' lvl-%-3d : ' % depth) + fmt % pcfmt(snaptotal[depth], totalsize) ) ui.writenoi18n(b' deltas : ' + fmt % pcfmt(deltatotal, totalsize)) letters = string.ascii_letters.encode('ascii') def fmtchunktype(chunktype): if chunktype == b'empty': return b' %s : ' % chunktype elif chunktype in letters: return b' 0x%s (%s) : ' % (nodemod.hex(chunktype), chunktype) else: return b' 0x%s : ' % nodemod.hex(chunktype) ui.write(b'\n') ui.writenoi18n(b'chunks : ' + fmt2 % numrevs) for chunktype in sorted(chunktypecounts): ui.write(fmtchunktype(chunktype)) ui.write(fmt % pcfmt(chunktypecounts[chunktype], numrevs)) ui.writenoi18n(b'chunks size : ' + fmt2 % totalsize) for chunktype in sorted(chunktypecounts): ui.write(fmtchunktype(chunktype)) ui.write(fmt % pcfmt(chunktypesizes[chunktype], totalsize)) ui.write(b'\n') b_total = b"%d" % full_text_total_size p_total = [] while len(b_total) > 3: p_total.append(b_total[-3:]) b_total = b_total[:-3] p_total.append(b_total) p_total.reverse() b_total = b' '.join(p_total) ui.write(b'\n') ui.writenoi18n(b'total-stored-content: %s bytes\n' % b_total) ui.write(b'\n') fmt = dfmtstr(max(avgchainlen, maxchainlen, maxchainspan, compratio)) ui.writenoi18n(b'avg chain length : ' + fmt % avgchainlen) ui.writenoi18n(b'max chain length : ' + fmt % maxchainlen) ui.writenoi18n(b'max chain reach : ' + fmt % maxchainspan) ui.writenoi18n(b'compression ratio : ' + fmt % compratio) if format > 0: ui.write(b'\n') ui.writenoi18n( b'uncompressed data size (min/max/avg) : %d / %d / %d\n' % tuple(datasize) ) ui.writenoi18n( b'full revision size (min/max/avg) : %d / %d / %d\n' % tuple(fullsize) ) ui.writenoi18n( b'inter-snapshot size (min/max/avg) : %d / %d / %d\n' % tuple(semisize) ) for depth in sorted(snapsizedepth): if depth == 0: continue ui.writenoi18n( b' level-%-3d (min/max/avg) : %d / %d / %d\n' % ((depth,) + tuple(snapsizedepth[depth])) ) ui.writenoi18n( b'delta size (min/max/avg) : %d / %d / %d\n' % tuple(deltasize) ) if numdeltas > 0: ui.write(b'\n') fmt = pcfmtstr(numdeltas) fmt2 = pcfmtstr(numdeltas, 4) ui.writenoi18n( b'deltas against prev : ' + fmt % pcfmt(numprev, numdeltas) ) if numprev > 0: ui.writenoi18n( b' where prev = p1 : ' + fmt2 % pcfmt(nump1prev, numprev) ) ui.writenoi18n( b' where prev = p2 : ' + fmt2 % pcfmt(nump2prev, numprev) ) ui.writenoi18n( b' other-ancestor : ' + fmt2 % pcfmt(numoprev, numprev) ) ui.writenoi18n( b' unrelated : ' + fmt2 % pcfmt(numoprev, numprev) ) if gdelta: ui.writenoi18n( b'deltas against p1 : ' + fmt % pcfmt(nump1, numdeltas) ) ui.writenoi18n( b'deltas against p2 : ' + fmt % pcfmt(nump2, numdeltas) ) ui.writenoi18n( b'deltas against ancs : ' + fmt % pcfmt(num_other_ancestors, numdeltas) ) ui.writenoi18n( b'deltas against other : ' + fmt % pcfmt(numother_nad, numdeltas) ) def debug_delta_find(ui, revlog, rev, base_rev=nodemod.nullrev): """display the search process for a delta""" deltacomputer = deltautil.deltacomputer( revlog, write_debug=ui.write, debug_search=not ui.quiet, ) node = revlog.node(rev) p1r, p2r = revlog.parentrevs(rev) p1 = revlog.node(p1r) p2 = revlog.node(p2r) full_text = revlog.revision(rev) btext = [full_text] textlen = len(btext[0]) cachedelta = None flags = revlog.flags(rev) if base_rev != nodemod.nullrev: base_text = revlog.revision(base_rev) delta = mdiff.textdiff(base_text, full_text) cachedelta = (base_rev, delta, constants.DELTA_BASE_REUSE_TRY) btext = [None] revinfo = revlogutils.revisioninfo( node, p1, p2, btext, textlen, cachedelta, flags, ) fh = revlog._datafp() deltacomputer.finddeltainfo(revinfo, fh, target_rev=rev) def _get_revlogs(repo, changelog: bool, manifest: bool, filelogs: bool): """yield revlogs from this repository""" if changelog: yield repo.changelog if manifest: # XXX: Handle tree manifest root_mf = repo.manifestlog.getstorage(b'') assert not root_mf._treeondisk yield root_mf._revlog if filelogs: files = set() for rev in repo: ctx = repo[rev] files |= set(ctx.files()) for f in sorted(files): yield repo.file(f)._revlog def debug_revlog_stats( repo, fm, changelog: bool, manifest: bool, filelogs: bool ): """Format revlog statistics for debugging purposes fm: the output formatter. """ fm.plain(b'rev-count data-size inl type target \n') for rlog in _get_revlogs(repo, changelog, manifest, filelogs): fm.startitem() nb_rev = len(rlog) inline = rlog._inline data_size = rlog._get_data_offset(nb_rev - 1) target = rlog.target revlog_type = b'unknown' revlog_target = b'' if target[0] == constants.KIND_CHANGELOG: revlog_type = b'changelog' elif target[0] == constants.KIND_MANIFESTLOG: revlog_type = b'manifest' revlog_target = target[1] elif target[0] == constants.KIND_FILELOG: revlog_type = b'file' revlog_target = target[1] fm.write(b'revlog.rev-count', b'%9d', nb_rev) fm.write(b'revlog.data-size', b'%12d', data_size) fm.write(b'revlog.inline', b' %-3s', b'yes' if inline else b'no') fm.write(b'revlog.type', b' %-9s', revlog_type) fm.write(b'revlog.target', b' %s', revlog_target) fm.plain(b'\n')