##// END OF EJS Templates
match: add `filepath:` pattern to match an exact filepath relative to the root...
match: add `filepath:` pattern to match an exact filepath relative to the root It's useful in certain automated workflows to make sure we recurse in directories whose name conflicts with files in other revisions. In addition it makes it possible to avoid building a potentially costly regex, improving performance when the set of files to match explicitly is large. The benchmark below are run in the following configuration : # data-env-vars.name = mozilla-central-2018-08-01-zstd-sparse-revlog # benchmark.name = files # benchmark.variants.rev = tip # benchmark.variants.files = all-list-filepath-sorted # bin-env-vars.hg.flavor = no-rust It also includes timings using the re2 engine (through the `google-re2` module) to show how much can be saved by just using a better regexp engine. Pattern time (seconds) time using re2 ----------------------------------------------------------- just "." 0.4 0.4 list of "filepath:…" 1.3 1.3 list of "path:…" 25.7 3.9 list of patterns 29.7 10.4 As you can see, Without re2, using "filepath:" instead of "path:" is a huge win. With re2, it is still about three times faster to not have to build the regex.

File last commit:

r51574:47b44d80 default
r51588:1c31b343 default
Show More
debug.py
710 lines | 20.9 KiB | text/x-python | PythonLexer
# revlogutils/debug.py - utility used for revlog debuging
#
# Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
# Copyright 2022 Octobus <contact@octobus.net>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
import collections
import string
from .. import (
mdiff,
node as nodemod,
revlogutils,
util,
)
from . import (
constants,
deltas as deltautil,
)
INDEX_ENTRY_DEBUG_COLUMN = []
NODE_SIZE = object()
class _column_base:
"""constains the definition of a revlog column
name: the column header,
value_func: the function called to get a value,
size: the width of the column,
verbose_only: only include the column in verbose mode.
"""
def __init__(self, name, value_func, size=None, verbose=False):
self.name = name
self.value_func = value_func
if size is not NODE_SIZE:
if size is None:
size = 8 # arbitrary default
size = max(len(name), size)
self._size = size
self.verbose_only = verbose
def get_size(self, node_size):
if self._size is NODE_SIZE:
return node_size
else:
return self._size
def debug_column(name, size=None, verbose=False):
"""decorated function is registered as a column
name: the name of the column,
size: the expected size of the column.
"""
def register(func):
entry = _column_base(
name=name,
value_func=func,
size=size,
verbose=verbose,
)
INDEX_ENTRY_DEBUG_COLUMN.append(entry)
return entry
return register
@debug_column(b"rev", size=6)
def _rev(index, rev, entry, hexfn):
return b"%d" % rev
@debug_column(b"rank", size=6, verbose=True)
def rank(index, rev, entry, hexfn):
return b"%d" % entry[constants.ENTRY_RANK]
@debug_column(b"linkrev", size=6)
def _linkrev(index, rev, entry, hexfn):
return b"%d" % entry[constants.ENTRY_LINK_REV]
@debug_column(b"nodeid", size=NODE_SIZE)
def _nodeid(index, rev, entry, hexfn):
return hexfn(entry[constants.ENTRY_NODE_ID])
@debug_column(b"p1-rev", size=6, verbose=True)
def _p1_rev(index, rev, entry, hexfn):
return b"%d" % entry[constants.ENTRY_PARENT_1]
@debug_column(b"p1-nodeid", size=NODE_SIZE)
def _p1_node(index, rev, entry, hexfn):
parent = entry[constants.ENTRY_PARENT_1]
p_entry = index[parent]
return hexfn(p_entry[constants.ENTRY_NODE_ID])
@debug_column(b"p2-rev", size=6, verbose=True)
def _p2_rev(index, rev, entry, hexfn):
return b"%d" % entry[constants.ENTRY_PARENT_2]
@debug_column(b"p2-nodeid", size=NODE_SIZE)
def _p2_node(index, rev, entry, hexfn):
parent = entry[constants.ENTRY_PARENT_2]
p_entry = index[parent]
return hexfn(p_entry[constants.ENTRY_NODE_ID])
@debug_column(b"full-size", size=20, verbose=True)
def full_size(index, rev, entry, hexfn):
return b"%d" % entry[constants.ENTRY_DATA_UNCOMPRESSED_LENGTH]
@debug_column(b"delta-base", size=6, verbose=True)
def delta_base(index, rev, entry, hexfn):
return b"%d" % entry[constants.ENTRY_DELTA_BASE]
@debug_column(b"flags", size=2, verbose=True)
def flags(index, rev, entry, hexfn):
field = entry[constants.ENTRY_DATA_OFFSET]
field &= 0xFFFF
return b"%d" % field
@debug_column(b"comp-mode", size=4, verbose=True)
def compression_mode(index, rev, entry, hexfn):
return b"%d" % entry[constants.ENTRY_DATA_COMPRESSION_MODE]
@debug_column(b"data-offset", size=20, verbose=True)
def data_offset(index, rev, entry, hexfn):
field = entry[constants.ENTRY_DATA_OFFSET]
field >>= 16
return b"%d" % field
@debug_column(b"chunk-size", size=10, verbose=True)
def data_chunk_size(index, rev, entry, hexfn):
return b"%d" % entry[constants.ENTRY_DATA_COMPRESSED_LENGTH]
@debug_column(b"sd-comp-mode", size=7, verbose=True)
def sidedata_compression_mode(index, rev, entry, hexfn):
compression = entry[constants.ENTRY_SIDEDATA_COMPRESSION_MODE]
if compression == constants.COMP_MODE_PLAIN:
return b"plain"
elif compression == constants.COMP_MODE_DEFAULT:
return b"default"
elif compression == constants.COMP_MODE_INLINE:
return b"inline"
else:
return b"%d" % compression
@debug_column(b"sidedata-offset", size=20, verbose=True)
def sidedata_offset(index, rev, entry, hexfn):
return b"%d" % entry[constants.ENTRY_SIDEDATA_OFFSET]
@debug_column(b"sd-chunk-size", size=10, verbose=True)
def sidedata_chunk_size(index, rev, entry, hexfn):
return b"%d" % entry[constants.ENTRY_SIDEDATA_COMPRESSED_LENGTH]
def debug_index(
ui,
repo,
formatter,
revlog,
full_node,
):
"""display index data for a revlog"""
if full_node:
hexfn = nodemod.hex
else:
hexfn = nodemod.short
idlen = 12
for i in revlog:
idlen = len(hexfn(revlog.node(i)))
break
fm = formatter
header_pieces = []
for column in INDEX_ENTRY_DEBUG_COLUMN:
if column.verbose_only and not ui.verbose:
continue
size = column.get_size(idlen)
name = column.name
header_pieces.append(name.rjust(size))
fm.plain(b' '.join(header_pieces) + b'\n')
index = revlog.index
for rev in revlog:
fm.startitem()
entry = index[rev]
first = True
for column in INDEX_ENTRY_DEBUG_COLUMN:
if column.verbose_only and not ui.verbose:
continue
if not first:
fm.plain(b' ')
first = False
size = column.get_size(idlen)
value = column.value_func(index, rev, entry, hexfn)
display = b"%%%ds" % size
fm.write(column.name, display, value)
fm.plain(b'\n')
fm.end()
def dump(ui, revlog):
"""perform the work for `hg debugrevlog --dump"""
# XXX seems redundant with debug index ?
r = revlog
numrevs = len(r)
ui.write(
(
b"# rev p1rev p2rev start end deltastart base p1 p2"
b" rawsize totalsize compression heads chainlen\n"
)
)
ts = 0
heads = set()
for rev in range(numrevs):
dbase = r.deltaparent(rev)
if dbase == -1:
dbase = rev
cbase = r.chainbase(rev)
clen = r.chainlen(rev)
p1, p2 = r.parentrevs(rev)
rs = r.rawsize(rev)
ts = ts + rs
heads -= set(r.parentrevs(rev))
heads.add(rev)
try:
compression = ts / r.end(rev)
except ZeroDivisionError:
compression = 0
ui.write(
b"%5d %5d %5d %5d %5d %10d %4d %4d %4d %7d %9d "
b"%11d %5d %8d\n"
% (
rev,
p1,
p2,
r.start(rev),
r.end(rev),
r.start(dbase),
r.start(cbase),
r.start(p1),
r.start(p2),
rs,
ts,
compression,
len(heads),
clen,
)
)
def debug_revlog(ui, revlog):
"""code for `hg debugrevlog`"""
r = revlog
format = r._format_version
v = r._format_flags
flags = []
gdelta = False
if v & constants.FLAG_INLINE_DATA:
flags.append(b'inline')
if v & constants.FLAG_GENERALDELTA:
gdelta = True
flags.append(b'generaldelta')
if not flags:
flags = [b'(none)']
### the total size of stored content if incompressed.
full_text_total_size = 0
### tracks merge vs single parent
nummerges = 0
### tracks ways the "delta" are build
# nodelta
numempty = 0
numemptytext = 0
numemptydelta = 0
# full file content
numfull = 0
# intermediate snapshot against a prior snapshot
numsemi = 0
# snapshot count per depth
numsnapdepth = collections.defaultdict(lambda: 0)
# number of snapshots with a non-ancestor delta
numsnapdepth_nad = collections.defaultdict(lambda: 0)
# delta against previous revision
numprev = 0
# delta against prev, where prev is a non-ancestor
numprev_nad = 0
# delta against first or second parent (not prev)
nump1 = 0
nump2 = 0
# delta against neither prev nor parents
numother = 0
# delta against other that is a non-ancestor
numother_nad = 0
# delta against prev that are also first or second parent
# (details of `numprev`)
nump1prev = 0
nump2prev = 0
# data about delta chain of each revs
chainlengths = []
chainbases = []
chainspans = []
# data about each revision
datasize = [None, 0, 0]
fullsize = [None, 0, 0]
semisize = [None, 0, 0]
# snapshot count per depth
snapsizedepth = collections.defaultdict(lambda: [None, 0, 0])
deltasize = [None, 0, 0]
chunktypecounts = {}
chunktypesizes = {}
def addsize(size, l):
if l[0] is None or size < l[0]:
l[0] = size
if size > l[1]:
l[1] = size
l[2] += size
numrevs = len(r)
for rev in range(numrevs):
p1, p2 = r.parentrevs(rev)
delta = r.deltaparent(rev)
if format > 0:
s = r.rawsize(rev)
full_text_total_size += s
addsize(s, datasize)
if p2 != nodemod.nullrev:
nummerges += 1
size = r.length(rev)
if delta == nodemod.nullrev:
chainlengths.append(0)
chainbases.append(r.start(rev))
chainspans.append(size)
if size == 0:
numempty += 1
numemptytext += 1
else:
numfull += 1
numsnapdepth[0] += 1
addsize(size, fullsize)
addsize(size, snapsizedepth[0])
else:
nad = (
delta != p1 and delta != p2 and not r.isancestorrev(delta, rev)
)
chainlengths.append(chainlengths[delta] + 1)
baseaddr = chainbases[delta]
revaddr = r.start(rev)
chainbases.append(baseaddr)
chainspans.append((revaddr - baseaddr) + size)
if size == 0:
numempty += 1
numemptydelta += 1
elif r.issnapshot(rev):
addsize(size, semisize)
numsemi += 1
depth = r.snapshotdepth(rev)
numsnapdepth[depth] += 1
if nad:
numsnapdepth_nad[depth] += 1
addsize(size, snapsizedepth[depth])
else:
addsize(size, deltasize)
if delta == rev - 1:
numprev += 1
if delta == p1:
nump1prev += 1
elif delta == p2:
nump2prev += 1
elif nad:
numprev_nad += 1
elif delta == p1:
nump1 += 1
elif delta == p2:
nump2 += 1
elif delta != nodemod.nullrev:
numother += 1
numother_nad += 1
# Obtain data on the raw chunks in the revlog.
if util.safehasattr(r, '_getsegmentforrevs'):
segment = r._getsegmentforrevs(rev, rev)[1]
else:
segment = r._revlog._getsegmentforrevs(rev, rev)[1]
if segment:
chunktype = bytes(segment[0:1])
else:
chunktype = b'empty'
if chunktype not in chunktypecounts:
chunktypecounts[chunktype] = 0
chunktypesizes[chunktype] = 0
chunktypecounts[chunktype] += 1
chunktypesizes[chunktype] += size
# Adjust size min value for empty cases
for size in (datasize, fullsize, semisize, deltasize):
if size[0] is None:
size[0] = 0
numdeltas = numrevs - numfull - numempty - numsemi
numoprev = numprev - nump1prev - nump2prev - numprev_nad
num_other_ancestors = numother - numother_nad
totalrawsize = datasize[2]
datasize[2] /= numrevs
fulltotal = fullsize[2]
if numfull == 0:
fullsize[2] = 0
else:
fullsize[2] /= numfull
semitotal = semisize[2]
snaptotal = {}
if numsemi > 0:
semisize[2] /= numsemi
for depth in snapsizedepth:
snaptotal[depth] = snapsizedepth[depth][2]
snapsizedepth[depth][2] /= numsnapdepth[depth]
deltatotal = deltasize[2]
if numdeltas > 0:
deltasize[2] /= numdeltas
totalsize = fulltotal + semitotal + deltatotal
avgchainlen = sum(chainlengths) / numrevs
maxchainlen = max(chainlengths)
maxchainspan = max(chainspans)
compratio = 1
if totalsize:
compratio = totalrawsize / totalsize
basedfmtstr = b'%%%dd\n'
basepcfmtstr = b'%%%dd %s(%%5.2f%%%%)\n'
def dfmtstr(max):
return basedfmtstr % len(str(max))
def pcfmtstr(max, padding=0):
return basepcfmtstr % (len(str(max)), b' ' * padding)
def pcfmt(value, total):
if total:
return (value, 100 * float(value) / total)
else:
return value, 100.0
ui.writenoi18n(b'format : %d\n' % format)
ui.writenoi18n(b'flags : %s\n' % b', '.join(flags))
ui.write(b'\n')
fmt = pcfmtstr(totalsize)
fmt2 = dfmtstr(totalsize)
ui.writenoi18n(b'revisions : ' + fmt2 % numrevs)
ui.writenoi18n(b' merges : ' + fmt % pcfmt(nummerges, numrevs))
ui.writenoi18n(
b' normal : ' + fmt % pcfmt(numrevs - nummerges, numrevs)
)
ui.writenoi18n(b'revisions : ' + fmt2 % numrevs)
ui.writenoi18n(b' empty : ' + fmt % pcfmt(numempty, numrevs))
ui.writenoi18n(
b' text : '
+ fmt % pcfmt(numemptytext, numemptytext + numemptydelta)
)
ui.writenoi18n(
b' delta : '
+ fmt % pcfmt(numemptydelta, numemptytext + numemptydelta)
)
ui.writenoi18n(
b' snapshot : ' + fmt % pcfmt(numfull + numsemi, numrevs)
)
for depth in sorted(numsnapdepth):
base = b' lvl-%-3d : ' % depth
count = fmt % pcfmt(numsnapdepth[depth], numrevs)
pieces = [base, count]
if numsnapdepth_nad[depth]:
pieces[-1] = count = count[:-1] # drop the final '\n'
more = b' non-ancestor-bases: '
anc_count = fmt
anc_count %= pcfmt(numsnapdepth_nad[depth], numsnapdepth[depth])
pieces.append(more)
pieces.append(anc_count)
ui.write(b''.join(pieces))
ui.writenoi18n(b' deltas : ' + fmt % pcfmt(numdeltas, numrevs))
ui.writenoi18n(b'revision size : ' + fmt2 % totalsize)
ui.writenoi18n(
b' snapshot : ' + fmt % pcfmt(fulltotal + semitotal, totalsize)
)
for depth in sorted(numsnapdepth):
ui.write(
(b' lvl-%-3d : ' % depth)
+ fmt % pcfmt(snaptotal[depth], totalsize)
)
ui.writenoi18n(b' deltas : ' + fmt % pcfmt(deltatotal, totalsize))
letters = string.ascii_letters.encode('ascii')
def fmtchunktype(chunktype):
if chunktype == b'empty':
return b' %s : ' % chunktype
elif chunktype in letters:
return b' 0x%s (%s) : ' % (nodemod.hex(chunktype), chunktype)
else:
return b' 0x%s : ' % nodemod.hex(chunktype)
ui.write(b'\n')
ui.writenoi18n(b'chunks : ' + fmt2 % numrevs)
for chunktype in sorted(chunktypecounts):
ui.write(fmtchunktype(chunktype))
ui.write(fmt % pcfmt(chunktypecounts[chunktype], numrevs))
ui.writenoi18n(b'chunks size : ' + fmt2 % totalsize)
for chunktype in sorted(chunktypecounts):
ui.write(fmtchunktype(chunktype))
ui.write(fmt % pcfmt(chunktypesizes[chunktype], totalsize))
ui.write(b'\n')
b_total = b"%d" % full_text_total_size
p_total = []
while len(b_total) > 3:
p_total.append(b_total[-3:])
b_total = b_total[:-3]
p_total.append(b_total)
p_total.reverse()
b_total = b' '.join(p_total)
ui.write(b'\n')
ui.writenoi18n(b'total-stored-content: %s bytes\n' % b_total)
ui.write(b'\n')
fmt = dfmtstr(max(avgchainlen, maxchainlen, maxchainspan, compratio))
ui.writenoi18n(b'avg chain length : ' + fmt % avgchainlen)
ui.writenoi18n(b'max chain length : ' + fmt % maxchainlen)
ui.writenoi18n(b'max chain reach : ' + fmt % maxchainspan)
ui.writenoi18n(b'compression ratio : ' + fmt % compratio)
if format > 0:
ui.write(b'\n')
ui.writenoi18n(
b'uncompressed data size (min/max/avg) : %d / %d / %d\n'
% tuple(datasize)
)
ui.writenoi18n(
b'full revision size (min/max/avg) : %d / %d / %d\n'
% tuple(fullsize)
)
ui.writenoi18n(
b'inter-snapshot size (min/max/avg) : %d / %d / %d\n'
% tuple(semisize)
)
for depth in sorted(snapsizedepth):
if depth == 0:
continue
ui.writenoi18n(
b' level-%-3d (min/max/avg) : %d / %d / %d\n'
% ((depth,) + tuple(snapsizedepth[depth]))
)
ui.writenoi18n(
b'delta size (min/max/avg) : %d / %d / %d\n'
% tuple(deltasize)
)
if numdeltas > 0:
ui.write(b'\n')
fmt = pcfmtstr(numdeltas)
fmt2 = pcfmtstr(numdeltas, 4)
ui.writenoi18n(
b'deltas against prev : ' + fmt % pcfmt(numprev, numdeltas)
)
if numprev > 0:
ui.writenoi18n(
b' where prev = p1 : ' + fmt2 % pcfmt(nump1prev, numprev)
)
ui.writenoi18n(
b' where prev = p2 : ' + fmt2 % pcfmt(nump2prev, numprev)
)
ui.writenoi18n(
b' other-ancestor : ' + fmt2 % pcfmt(numoprev, numprev)
)
ui.writenoi18n(
b' unrelated : ' + fmt2 % pcfmt(numoprev, numprev)
)
if gdelta:
ui.writenoi18n(
b'deltas against p1 : ' + fmt % pcfmt(nump1, numdeltas)
)
ui.writenoi18n(
b'deltas against p2 : ' + fmt % pcfmt(nump2, numdeltas)
)
ui.writenoi18n(
b'deltas against ancs : '
+ fmt % pcfmt(num_other_ancestors, numdeltas)
)
ui.writenoi18n(
b'deltas against other : '
+ fmt % pcfmt(numother_nad, numdeltas)
)
def debug_delta_find(ui, revlog, rev, base_rev=nodemod.nullrev):
"""display the search process for a delta"""
deltacomputer = deltautil.deltacomputer(
revlog,
write_debug=ui.write,
debug_search=not ui.quiet,
)
node = revlog.node(rev)
p1r, p2r = revlog.parentrevs(rev)
p1 = revlog.node(p1r)
p2 = revlog.node(p2r)
full_text = revlog.revision(rev)
btext = [full_text]
textlen = len(btext[0])
cachedelta = None
flags = revlog.flags(rev)
if base_rev != nodemod.nullrev:
base_text = revlog.revision(base_rev)
delta = mdiff.textdiff(base_text, full_text)
cachedelta = (base_rev, delta, constants.DELTA_BASE_REUSE_TRY)
btext = [None]
revinfo = revlogutils.revisioninfo(
node,
p1,
p2,
btext,
textlen,
cachedelta,
flags,
)
fh = revlog._datafp()
deltacomputer.finddeltainfo(revinfo, fh, target_rev=rev)
def debug_revlog_stats(
repo, fm, changelog: bool, manifest: bool, filelogs: bool
):
"""Format revlog statistics for debugging purposes
fm: the output formatter.
"""
fm.plain(b'rev-count data-size inl type target \n')
revlog_entries = [e for e in repo.store.walk() if e.is_revlog]
revlog_entries.sort(key=lambda e: (e.revlog_type, e.target_id))
for entry in revlog_entries:
if not changelog and entry.is_changelog:
continue
elif not manifest and entry.is_manifestlog:
continue
elif not filelogs and entry.is_filelog:
continue
rlog = entry.get_revlog_instance(repo).get_revlog()
fm.startitem()
nb_rev = len(rlog)
inline = rlog._inline
data_size = rlog._get_data_offset(nb_rev - 1)
target = rlog.target
revlog_type = b'unknown'
revlog_target = b''
if target[0] == constants.KIND_CHANGELOG:
revlog_type = b'changelog'
elif target[0] == constants.KIND_MANIFESTLOG:
revlog_type = b'manifest'
revlog_target = target[1]
elif target[0] == constants.KIND_FILELOG:
revlog_type = b'file'
revlog_target = target[1]
fm.write(b'revlog.rev-count', b'%9d', nb_rev)
fm.write(b'revlog.data-size', b'%12d', data_size)
fm.write(b'revlog.inline', b' %-3s', b'yes' if inline else b'no')
fm.write(b'revlog.type', b' %-9s', revlog_type)
fm.write(b'revlog.target', b' %s', revlog_target)
fm.plain(b'\n')