# HG changeset patch # User Pierre-Yves David # Date 2021-05-03 10:22:36 # Node ID 8d3c2f9d4af7933571c0fd2dca668db2b92d73de # Parent 396442cd7e6a2823f273ae1b5dca5a0a8dd33d9a revlog: use a "radix" to address revlog Instead of pointing to the index directly and to derive the other file from that, we directly provide the radix and let the revlog determine the associated file path internally. This is more robust and will give us more flexibility for picking this file name in the future. Differential Revision: https://phab.mercurial-scm.org/D10576 diff --git a/contrib/dumprevlog b/contrib/dumprevlog --- a/contrib/dumprevlog +++ b/contrib/dumprevlog @@ -36,10 +36,15 @@ def printb(data, end=b'\n'): for f in sys.argv[1:]: + localf = encoding.strtolocal(f) + if not localf.endswith(b'.i'): + print("file:", f, file=sys.stderr) + print(" invalida filename", file=sys.stderr) + r = revlog.revlog( binopen, target=(revlog_constants.KIND_OTHER, b'dump-revlog'), - indexfile=encoding.strtolocal(f), + radix=localf[:-2], ) print("file:", f) for i in r: diff --git a/contrib/perf.py b/contrib/perf.py --- a/contrib/perf.py +++ b/contrib/perf.py @@ -1826,7 +1826,10 @@ def perfnodelookup(ui, repo, rev, **opts mercurial.revlog._prereadsize = 2 ** 24 # disable lazy parser in old hg n = scmutil.revsingle(repo, rev).node() - cl = revlog(getsvfs(repo), indexfile=b"00changelog.i") + try: + cl = revlog(getsvfs(repo), radix=b"00changelog") + except TypeError: + cl = revlog(getsvfs(repo), indexfile=b"00changelog.i") def d(): cl.rev(n) @@ -2610,6 +2613,7 @@ def perfrevlogindex(ui, repo, file_=None opener = getattr(rl, 'opener') # trick linter # compat with hg <= 5.8 + radix = getattr(rl, 'radix', None) indexfile = getattr(rl, '_indexfile', None) if indexfile is None: # compatibility with <= hg-5.8 @@ -2641,7 +2645,11 @@ def perfrevlogindex(ui, repo, file_=None allnodesrev = list(reversed(allnodes)) def constructor(): - revlog(opener, indexfile=indexfile) + if radix is not None: + revlog(opener, radix=radix) + else: + # hg <= 5.8 + revlog(opener, indexfile=indexfile) def read(): with opener(indexfile) as fh: @@ -3043,8 +3051,9 @@ def _temprevlog(ui, orig, truncaterev): datafile = getattr(orig, '_datafile', getattr(orig, 'datafile')) origdatapath = orig.opener.join(datafile) - indexname = 'revlog.i' - dataname = 'revlog.d' + radix = b'revlog' + indexname = b'revlog.i' + dataname = b'revlog.d' tmpdir = tempfile.mkdtemp(prefix='tmp-hgperf-') try: @@ -3069,9 +3078,12 @@ def _temprevlog(ui, orig, truncaterev): vfs = vfsmod.vfs(tmpdir) vfs.options = getattr(orig.opener, 'options', None) - dest = revlog( - vfs, indexfile=indexname, datafile=dataname, **revlogkwargs - ) + try: + dest = revlog(vfs, radix=radix, **revlogkwargs) + except TypeError: + dest = revlog( + vfs, indexfile=indexname, datafile=dataname, **revlogkwargs + ) if dest._inline: raise error.Abort('not supporting inline revlog (yet)') # make sure internals are initialized diff --git a/contrib/undumprevlog b/contrib/undumprevlog --- a/contrib/undumprevlog +++ b/contrib/undumprevlog @@ -32,10 +32,11 @@ while True: break if l.startswith("file:"): f = encoding.strtolocal(l[6:-1]) + assert f.endswith(b'.i') r = revlog.revlog( opener, target=(revlog_constants.KIND_OTHER, b'undump-revlog'), - indexfile=f, + radix=f[:-2], ) procutil.stdout.write(b'%s\n' % f) elif l.startswith("node:"): diff --git a/hgext/remotefilelog/contentstore.py b/hgext/remotefilelog/contentstore.py --- a/hgext/remotefilelog/contentstore.py +++ b/hgext/remotefilelog/contentstore.py @@ -281,7 +281,7 @@ class manifestrevlogstore(object): self._store = repo.store self._svfs = repo.svfs self._revlogs = dict() - self._cl = revlog.revlog(self._svfs, indexfile=b'00changelog.i') + self._cl = revlog.revlog(self._svfs, radix=b'00changelog.i') self._repackstartlinkrev = 0 def get(self, name, node): @@ -341,10 +341,10 @@ class manifestrevlogstore(object): def _revlog(self, name): rl = self._revlogs.get(name) if rl is None: - revlogname = b'00manifesttree.i' + revlogname = b'00manifesttree' if name != b'': - revlogname = b'meta/%s/00manifest.i' % name - rl = revlog.revlog(self._svfs, indexfile=revlogname) + revlogname = b'meta/%s/00manifest' % name + rl = revlog.revlog(self._svfs, radix=revlogname) self._revlogs[name] = rl return rl @@ -365,7 +365,7 @@ class manifestrevlogstore(object): if options and options.get(constants.OPTION_PACKSONLY): return treename = b'' - rl = revlog.revlog(self._svfs, indexfile=b'00manifesttree.i') + rl = revlog.revlog(self._svfs, radix=b'00manifesttree') startlinkrev = self._repackstartlinkrev endlinkrev = self._repackendlinkrev for rev in pycompat.xrange(len(rl) - 1, -1, -1): @@ -382,9 +382,9 @@ class manifestrevlogstore(object): if path[:5] != b'meta/' or path[-2:] != b'.i': continue - treename = path[5 : -len(b'/00manifest.i')] + treename = path[5 : -len(b'/00manifest')] - rl = revlog.revlog(self._svfs, indexfile=path) + rl = revlog.revlog(self._svfs, indexfile=path[:-2]) for rev in pycompat.xrange(len(rl) - 1, -1, -1): linkrev = rl.linkrev(rev) if linkrev < startlinkrev: diff --git a/mercurial/bundlerepo.py b/mercurial/bundlerepo.py --- a/mercurial/bundlerepo.py +++ b/mercurial/bundlerepo.py @@ -52,7 +52,7 @@ from .revlogutils import ( class bundlerevlog(revlog.revlog): - def __init__(self, opener, target, indexfile, cgunpacker, linkmapper): + def __init__(self, opener, target, radix, cgunpacker, linkmapper): # How it works: # To retrieve a revision, we need to know the offset of the revision in # the bundle (an unbundle object). We store this offset in the index @@ -61,7 +61,7 @@ class bundlerevlog(revlog.revlog): # To differentiate a rev in the bundle from a rev in the revlog, we # check revision against repotiprev. opener = vfsmod.readonlyvfs(opener) - revlog.revlog.__init__(self, opener, target=target, indexfile=indexfile) + revlog.revlog.__init__(self, opener, target=target, radix=radix) self.bundle = cgunpacker n = len(self) self.repotiprev = n - 1 @@ -180,7 +180,7 @@ class bundlechangelog(bundlerevlog, chan self, opener, (revlog_constants.KIND_CHANGELOG, None), - self._indexfile, + self.radix, cgunpacker, linkmapper, ) @@ -201,7 +201,7 @@ class bundlemanifest(bundlerevlog, manif self, opener, (revlog_constants.KIND_MANIFESTLOG, dir), - self._revlog._indexfile, + self._revlog.radix, cgunpacker, linkmapper, ) @@ -233,7 +233,7 @@ class bundlefilelog(filelog.filelog): opener, # XXX should use the unencoded path target=(revlog_constants.KIND_FILELOG, path), - indexfile=self._revlog._indexfile, + radix=self._revlog.radix, cgunpacker=cgunpacker, linkmapper=linkmapper, ) diff --git a/mercurial/changelog.py b/mercurial/changelog.py --- a/mercurial/changelog.py +++ b/mercurial/changelog.py @@ -396,20 +396,17 @@ class changelog(revlog.revlog): the documentation there. """ - indexfile = b'00changelog.i' if trypending and opener.exists(b'00changelog.i.a'): postfix = b'a' else: postfix = None - datafile = b'00changelog.d' revlog.revlog.__init__( self, opener, target=(revlog_constants.KIND_CHANGELOG, None), + radix=b'00changelog', postfix=postfix, - indexfile=indexfile, - datafile=datafile, checkambig=True, mmaplargeindex=True, persistentnodemap=opener.options.get(b'persistent-nodemap', False), diff --git a/mercurial/cmdutil.py b/mercurial/cmdutil.py --- a/mercurial/cmdutil.py +++ b/mercurial/cmdutil.py @@ -1437,7 +1437,7 @@ def openstorage(repo, cmd, file_, opts, r = revlog.revlog( vfsmod.vfs(encoding.getcwd(), audit=False), target=target, - indexfile=file_[:-2] + b".i", + radix=file_[:-2], ) return r diff --git a/mercurial/filelog.py b/mercurial/filelog.py --- a/mercurial/filelog.py +++ b/mercurial/filelog.py @@ -30,7 +30,7 @@ class filelog(object): opener, # XXX should use the unencoded path target=(revlog_constants.KIND_FILELOG, path), - indexfile=b'/'.join((b'data', path + b'.i')), + radix=b'/'.join((b'data', path)), censorable=True, ) # Full name of the user visible file, relative to the repository root. diff --git a/mercurial/manifest.py b/mercurial/manifest.py --- a/mercurial/manifest.py +++ b/mercurial/manifest.py @@ -1567,7 +1567,6 @@ class manifestrevlog(object): opener, tree=b'', dirlogcache=None, - indexfile=None, treemanifest=False, ): """Constructs a new manifest revlog @@ -1598,10 +1597,9 @@ class manifestrevlog(object): if tree: assert self._treeondisk, b'opts is %r' % opts - if indexfile is None: - indexfile = b'00manifest.i' - if tree: - indexfile = b"meta/" + tree + indexfile + radix = b'00manifest' + if tree: + radix = b"meta/" + tree + radix self.tree = tree @@ -1614,7 +1612,7 @@ class manifestrevlog(object): self._revlog = revlog.revlog( opener, target=(revlog_constants.KIND_MANIFESTLOG, self.tree), - indexfile=indexfile, + radix=radix, # only root indexfile is cached checkambig=not bool(tree), mmaplargeindex=True, diff --git a/mercurial/revlog.py b/mercurial/revlog.py --- a/mercurial/revlog.py +++ b/mercurial/revlog.py @@ -289,9 +289,8 @@ class revlog(object): self, opener, target, + radix, postfix=None, - indexfile=None, - datafile=None, checkambig=False, mmaplargeindex=False, censorable=False, @@ -313,16 +312,19 @@ class revlog(object): accurate value. """ self.upperboundcomp = upperboundcomp - if not indexfile.endswith(b'.i'): - raise error.ProgrammingError( - b"revlog's indexfile should end with `.i`" - ) - if datafile is None: - datafile = indexfile[:-2] + b".d" - if postfix is not None: - datafile = b'%s.%s' % (datafile, postfix) - if postfix is not None: - indexfile = b'%s.%s' % (indexfile, postfix) + + self.radix = radix + + if postfix is None: + indexfile = b'%s.i' % self.radix + datafile = b'%s.d' % self.radix + elif postfix == b'a': + indexfile = b'%s.i.a' % self.radix + datafile = b'%s.d' % self.radix + else: + indexfile = b'%s.i.%s' % (self.radix, postfix) + datafile = b'%s.d.%s' % (self.radix, postfix) + self._indexfile = indexfile self._datafile = datafile self.nodemap_file = None @@ -2900,8 +2902,8 @@ class revlog(object): newrl = revlog( self.opener, target=self.target, + radix=self.radix, postfix=b'tmpcensored', - indexfile=self._indexfile, censorable=True, ) newrl._format_version = self._format_version diff --git a/mercurial/unionrepo.py b/mercurial/unionrepo.py --- a/mercurial/unionrepo.py +++ b/mercurial/unionrepo.py @@ -33,7 +33,7 @@ from . import ( class unionrevlog(revlog.revlog): - def __init__(self, opener, indexfile, revlog2, linkmapper): + def __init__(self, opener, radix, revlog2, linkmapper): # How it works: # To retrieve a revision, we just need to know the node id so we can # look it up in revlog2. @@ -45,7 +45,7 @@ class unionrevlog(revlog.revlog): if target is None: # a revlog wrapper, eg: the manifestlog that is not an actual revlog target = revlog2._revlog.target - revlog.revlog.__init__(self, opener, target=target, indexfile=indexfile) + revlog.revlog.__init__(self, opener, target=target, radix=radix) self.revlog2 = revlog2 n = len(self) @@ -164,9 +164,7 @@ class unionchangelog(unionrevlog, change changelog.changelog.__init__(self, opener) linkmapper = None changelog2 = changelog.changelog(opener2) - unionrevlog.__init__( - self, opener, self._indexfile, changelog2, linkmapper - ) + unionrevlog.__init__(self, opener, self.radix, changelog2, linkmapper) class unionmanifest(unionrevlog, manifest.manifestrevlog): @@ -174,7 +172,7 @@ class unionmanifest(unionrevlog, manifes manifest.manifestrevlog.__init__(self, nodeconstants, opener) manifest2 = manifest.manifestrevlog(nodeconstants, opener2) unionrevlog.__init__( - self, opener, self._revlog._indexfile, manifest2, linkmapper + self, opener, self._revlog.radix, manifest2, linkmapper ) @@ -183,7 +181,7 @@ class unionfilelog(filelog.filelog): filelog.filelog.__init__(self, opener, path) filelog2 = filelog.filelog(opener2, path) self._revlog = unionrevlog( - opener, self._revlog._indexfile, filelog2._revlog, linkmapper + opener, self._revlog.radix, filelog2._revlog, linkmapper ) self._repo = repo self.repotiprev = self._revlog.repotiprev diff --git a/tests/test-revlog-raw.py b/tests/test-revlog-raw.py --- a/tests/test-revlog-raw.py +++ b/tests/test-revlog-raw.py @@ -79,12 +79,11 @@ def newtransaction(): return transaction.transaction(report, tvfs, {'plain': tvfs}, b'journal') -def newrevlog(name=b'_testrevlog.i', recreate=False): +def newrevlog(name=b'_testrevlog', recreate=False): if recreate: - tvfs.tryunlink(name) - rlog = revlog.revlog( - tvfs, target=(constants.KIND_OTHER, b'test'), indexfile=name - ) + tvfs.tryunlink(name + b'.i') + target = (constants.KIND_OTHER, b'test') + rlog = revlog.revlog(tvfs, target=target, radix=name) return rlog @@ -112,7 +111,7 @@ def appendrev(rlog, text, tr, isext=Fals rlog._storedeltachains = True -def addgroupcopy(rlog, tr, destname=b'_destrevlog.i', optimaldelta=True): +def addgroupcopy(rlog, tr, destname=b'_destrevlog', optimaldelta=True): """Copy revlog to destname using revlog.addgroup. Return the copied revlog. This emulates push or pull. They use changegroup. Changegroup requires @@ -177,7 +176,7 @@ def addgroupcopy(rlog, tr, destname=b'_d return dlog -def lowlevelcopy(rlog, tr, destname=b'_destrevlog.i'): +def lowlevelcopy(rlog, tr, destname=b'_destrevlog'): """Like addgroupcopy, but use the low level revlog._addrevision directly. It exercises some code paths that are hard to reach easily otherwise. @@ -427,7 +426,7 @@ data = [ def makesnapshot(tr): - rl = newrevlog(name=b'_snaprevlog3.i', recreate=True) + rl = newrevlog(name=b'_snaprevlog3', recreate=True) for i in data: appendrev(rl, i, tr) return rl @@ -483,7 +482,7 @@ def maintest(): checkrevlog(rl2, expected) print('addgroupcopy test passed') # Copy via revlog.clone - rl3 = newrevlog(name=b'_destrevlog3.i', recreate=True) + rl3 = newrevlog(name=b'_destrevlog3', recreate=True) rl.clone(tr, rl3) checkrevlog(rl3, expected) print('clone test passed') diff --git a/tests/test-revlog.t b/tests/test-revlog.t --- a/tests/test-revlog.t +++ b/tests/test-revlog.t @@ -49,6 +49,6 @@ Test for CVE-2016-3630 >>> from mercurial import revlog, vfs >>> tvfs = vfs.vfs(b'.') >>> tvfs.options = {b'revlogv1': True} - >>> rl = revlog.revlog(tvfs, target=(KIND_OTHER, b'test'), indexfile=b'a.i') + >>> rl = revlog.revlog(tvfs, target=(KIND_OTHER, b'test'), radix=b'a') >>> rl.revision(1) mpatchError(*'patch cannot be decoded'*) (glob)