##// END OF EJS Templates
git: skeleton of a new extension to _directly_ operate on git repos...
git: skeleton of a new extension to _directly_ operate on git repos This is based in part of work I did years ago in hgit, but it's mostly new code since I'm using pygit2 instead of dulwich and the hg storage interfaces have improved. Some cleanup of old hgit code by Pulkit, which I greatly appreciate. test-git-interop.t does not cover a whole lot of cases, but it passes. It includes status, diff, making a new commit, and `hg annotate` working on the git repository. This is _not_ (yet) production quality code: this is an experiment. Known technical debt lurking in this implementation: * Writing bookmarks just totally ignores transactions. * The way progress is threaded down into the gitstore is awful. * Ideally we'd find a way to incrementally reindex DAGs. I'm not sure how to do that efficiently, so we might need a "known only fast-forwards" mode on the DAG indexer for use on `hg commit` and friends. * We don't even _try_ to do anything reasonable for `hg pull` or `hg push`. * Mercurial need an interface for the changelog type. Tests currently require git 2.24 as far as I'm aware: `git status` has some changed output that I didn't try and handle in a compatible way. This patch has produced some interesting cleanups, most recently on the manifest type. I expect continuing down this road will produce other meritorious cleanups throughout our code. Differential Revision: https://phab.mercurial-scm.org/D6734

File last commit:

r44937:9d2b2df2 default
r44961:ad718271 default
Show More
contentstore.py
388 lines | 12.7 KiB | text/x-python | PythonLexer
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 from __future__ import absolute_import
import threading
from mercurial.node import hex, nullid
Gregory Szorc
py3: manually import getattr where it is needed...
r43359 from mercurial.pycompat import getattr
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 from mercurial import (
mdiff,
pycompat,
revlog,
)
from . import (
basestore,
constants,
shallowutil,
)
Augie Fackler
formatting: blacken the codebase...
r43346
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 class ChainIndicies(object):
"""A static class for easy reference to the delta chain indicies.
"""
Augie Fackler
formatting: blacken the codebase...
r43346
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 # The filename of this revision delta
NAME = 0
# The mercurial file node for this revision delta
NODE = 1
# The filename of the delta base's revision. This is useful when delta
# between different files (like in the case of a move or copy, we can delta
# against the original file content).
BASENAME = 2
# The mercurial file node for the delta base revision. This is the nullid if
# this delta is a full text.
BASENODE = 3
# The actual delta or full text data.
DATA = 4
Augie Fackler
formatting: blacken the codebase...
r43346
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 class unioncontentstore(basestore.baseunionstore):
def __init__(self, *args, **kwargs):
super(unioncontentstore, self).__init__(*args, **kwargs)
self.stores = args
Augie Fackler
cleanup: remove pointless r-prefixes on single-quoted strings...
r43906 self.writestore = kwargs.get('writestore')
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
# If allowincomplete==True then the union store can return partial
# delta chains, otherwise it will throw a KeyError if a full
# deltachain can't be found.
Augie Fackler
cleanup: remove pointless r-prefixes on single-quoted strings...
r43906 self.allowincomplete = kwargs.get('allowincomplete', False)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
def get(self, name, node):
"""Fetches the full text revision contents of the given name+node pair.
If the full text doesn't exist, throws a KeyError.
Under the hood, this uses getdeltachain() across all the stores to build
up a full chain to produce the full text.
"""
chain = self.getdeltachain(name, node)
if chain[-1][ChainIndicies.BASENODE] != nullid:
# If we didn't receive a full chain, throw
raise KeyError((name, hex(node)))
# The last entry in the chain is a full text, so we start our delta
# applies with that.
fulltext = chain.pop()[ChainIndicies.DATA]
text = fulltext
while chain:
delta = chain.pop()[ChainIndicies.DATA]
text = mdiff.patches(text, [delta])
return text
@basestore.baseunionstore.retriable
def getdelta(self, name, node):
"""Return the single delta entry for the given name/node pair.
"""
for store in self.stores:
try:
return store.getdelta(name, node)
except KeyError:
pass
raise KeyError((name, hex(node)))
def getdeltachain(self, name, node):
"""Returns the deltachain for the given name/node pair.
Returns an ordered list of:
[(name, node, deltabasename, deltabasenode, deltacontent),...]
where the chain is terminated by a full text entry with a nullid
deltabasenode.
"""
chain = self._getpartialchain(name, node)
while chain[-1][ChainIndicies.BASENODE] != nullid:
x, x, deltabasename, deltabasenode, x = chain[-1]
try:
morechain = self._getpartialchain(deltabasename, deltabasenode)
chain.extend(morechain)
except KeyError:
# If we allow incomplete chains, don't throw.
if not self.allowincomplete:
raise
break
return chain
@basestore.baseunionstore.retriable
def getmeta(self, name, node):
"""Returns the metadata dict for given node."""
for store in self.stores:
try:
return store.getmeta(name, node)
except KeyError:
pass
raise KeyError((name, hex(node)))
def getmetrics(self):
metrics = [s.getmetrics() for s in self.stores]
return shallowutil.sumdicts(*metrics)
@basestore.baseunionstore.retriable
def _getpartialchain(self, name, node):
"""Returns a partial delta chain for the given name/node pair.
A partial chain is a chain that may not be terminated in a full-text.
"""
for store in self.stores:
try:
return store.getdeltachain(name, node)
except KeyError:
pass
raise KeyError((name, hex(node)))
def add(self, name, node, data):
Augie Fackler
formatting: blacken the codebase...
r43346 raise RuntimeError(
Martin von Zweigbergk
cleanup: join string literals that are already on one line...
r43387 b"cannot add content only to remotefilelog contentstore"
Augie Fackler
formatting: blacken the codebase...
r43346 )
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
def getmissing(self, keys):
missing = keys
for store in self.stores:
if missing:
missing = store.getmissing(missing)
return missing
def addremotefilelognode(self, name, node, data):
if self.writestore:
self.writestore.addremotefilelognode(name, node, data)
else:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 raise RuntimeError(b"no writable store configured")
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
def markledger(self, ledger, options=None):
for store in self.stores:
store.markledger(ledger, options)
Augie Fackler
formatting: blacken the codebase...
r43346
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 class remotefilelogcontentstore(basestore.basestore):
def __init__(self, *args, **kwargs):
super(remotefilelogcontentstore, self).__init__(*args, **kwargs)
self._threaddata = threading.local()
def get(self, name, node):
# return raw revision text
data = self._getdata(name, node)
offset, size, flags = shallowutil.parsesizeflags(data)
Augie Fackler
formatting: blacken the codebase...
r43346 content = data[offset : offset + size]
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
ancestormap = shallowutil.ancestormap(data)
p1, p2, linknode, copyfrom = ancestormap[node]
copyrev = None
if copyfrom:
copyrev = hex(p1)
self._updatemetacache(node, size, flags)
# lfs tracks renames in its own metadata, remove hg copy metadata,
# because copy metadata will be re-added by lfs flag processor.
if flags & revlog.REVIDX_EXTSTORED:
copyrev = copyfrom = None
revision = shallowutil.createrevlogtext(content, copyfrom, copyrev)
return revision
def getdelta(self, name, node):
# Since remotefilelog content stores only contain full texts, just
# return that.
revision = self.get(name, node)
return revision, name, nullid, self.getmeta(name, node)
def getdeltachain(self, name, node):
# Since remotefilelog content stores just contain full texts, we return
# a fake delta chain that just consists of a single full text revision.
# The nullid in the deltabasenode slot indicates that the revision is a
# fulltext.
revision = self.get(name, node)
return [(name, node, None, nullid, revision)]
def getmeta(self, name, node):
self._sanitizemetacache()
if node != self._threaddata.metacache[0]:
data = self._getdata(name, node)
offset, size, flags = shallowutil.parsesizeflags(data)
self._updatemetacache(node, size, flags)
return self._threaddata.metacache[1]
def add(self, name, node, data):
Augie Fackler
formatting: blacken the codebase...
r43346 raise RuntimeError(
Martin von Zweigbergk
cleanup: join string literals that are already on one line...
r43387 b"cannot add content only to remotefilelog contentstore"
Augie Fackler
formatting: blacken the codebase...
r43346 )
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
def _sanitizemetacache(self):
metacache = getattr(self._threaddata, 'metacache', None)
if metacache is None:
Augie Fackler
formatting: blacken the codebase...
r43346 self._threaddata.metacache = (None, None) # (node, meta)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
def _updatemetacache(self, node, size, flags):
self._sanitizemetacache()
if node == self._threaddata.metacache[0]:
return
Augie Fackler
formatting: blacken the codebase...
r43346 meta = {constants.METAKEYFLAG: flags, constants.METAKEYSIZE: size}
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 self._threaddata.metacache = (node, meta)
Augie Fackler
formatting: blacken the codebase...
r43346
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 class remotecontentstore(object):
def __init__(self, ui, fileservice, shared):
self._fileservice = fileservice
# type(shared) is usually remotefilelogcontentstore
self._shared = shared
def get(self, name, node):
Augie Fackler
formatting: blacken the codebase...
r43346 self._fileservice.prefetch(
[(name, hex(node))], force=True, fetchdata=True
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 return self._shared.get(name, node)
def getdelta(self, name, node):
revision = self.get(name, node)
return revision, name, nullid, self._shared.getmeta(name, node)
def getdeltachain(self, name, node):
# Since our remote content stores just contain full texts, we return a
# fake delta chain that just consists of a single full text revision.
# The nullid in the deltabasenode slot indicates that the revision is a
# fulltext.
revision = self.get(name, node)
return [(name, node, None, nullid, revision)]
def getmeta(self, name, node):
Augie Fackler
formatting: blacken the codebase...
r43346 self._fileservice.prefetch(
[(name, hex(node))], force=True, fetchdata=True
)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 return self._shared.getmeta(name, node)
def add(self, name, node, data):
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 raise RuntimeError(b"cannot add to a remote store")
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
def getmissing(self, keys):
return keys
def markledger(self, ledger, options=None):
pass
Augie Fackler
formatting: blacken the codebase...
r43346
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 class manifestrevlogstore(object):
def __init__(self, repo):
self._store = repo.store
self._svfs = repo.svfs
self._revlogs = dict()
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 self._cl = revlog.revlog(self._svfs, b'00changelog.i')
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 self._repackstartlinkrev = 0
def get(self, name, node):
rawdata: update caller in remotefilelog...
r43039 return self._revlog(name).rawdata(node)
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
def getdelta(self, name, node):
revision = self.get(name, node)
return revision, name, nullid, self.getmeta(name, node)
def getdeltachain(self, name, node):
revision = self.get(name, node)
return [(name, node, None, nullid, revision)]
def getmeta(self, name, node):
rl = self._revlog(name)
rev = rl.rev(node)
Augie Fackler
formatting: blacken the codebase...
r43346 return {
constants.METAKEYFLAG: rl.flags(rev),
constants.METAKEYSIZE: rl.rawsize(rev),
}
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
def getancestors(self, name, node, known=None):
if known is None:
known = set()
if node in known:
return []
rl = self._revlog(name)
ancestors = {}
Augie Fackler
cleanup: run pyupgrade on our source tree to clean up varying things...
r44937 missing = {node}
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 for ancrev in rl.ancestors([rl.rev(node)], inclusive=True):
ancnode = rl.node(ancrev)
missing.discard(ancnode)
p1, p2 = rl.parents(ancnode)
if p1 != nullid and p1 not in known:
missing.add(p1)
if p2 != nullid and p2 not in known:
missing.add(p2)
linknode = self._cl.node(rl.linkrev(ancrev))
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 ancestors[rl.node(ancrev)] = (p1, p2, linknode, b'')
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 if not missing:
break
return ancestors
def getnodeinfo(self, name, node):
cl = self._cl
rl = self._revlog(name)
parents = rl.parents(node)
linkrev = rl.linkrev(rl.rev(node))
return (parents[0], parents[1], cl.node(linkrev), None)
def add(self, *args):
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 raise RuntimeError(b"cannot add to a revlog store")
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
def _revlog(self, name):
rl = self._revlogs.get(name)
if rl is None:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 revlogname = b'00manifesttree.i'
if name != b'':
revlogname = b'meta/%s/00manifest.i' % name
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 rl = revlog.revlog(self._svfs, revlogname)
self._revlogs[name] = rl
return rl
def getmissing(self, keys):
missing = []
for name, node in keys:
mfrevlog = self._revlog(name)
if node not in mfrevlog.nodemap:
missing.append((name, node))
return missing
def setrepacklinkrevrange(self, startrev, endrev):
self._repackstartlinkrev = startrev
self._repackendlinkrev = endrev
def markledger(self, ledger, options=None):
if options and options.get(constants.OPTION_PACKSONLY):
return
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 treename = b''
rl = revlog.revlog(self._svfs, b'00manifesttree.i')
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 startlinkrev = self._repackstartlinkrev
endlinkrev = self._repackendlinkrev
for rev in pycompat.xrange(len(rl) - 1, -1, -1):
linkrev = rl.linkrev(rev)
if linkrev < startlinkrev:
break
if linkrev > endlinkrev:
continue
node = rl.node(rev)
ledger.markdataentry(self, treename, node)
ledger.markhistoryentry(self, treename, node)
for path, encoded, size in self._store.datafiles():
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if path[:5] != b'meta/' or path[-2:] != b'.i':
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530 continue
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 treename = path[5 : -len(b'/00manifest.i')]
Augie Fackler
remotefilelog: import pruned-down remotefilelog extension from hg-experimental...
r40530
rl = revlog.revlog(self._svfs, path)
for rev in pycompat.xrange(len(rl) - 1, -1, -1):
linkrev = rl.linkrev(rev)
if linkrev < startlinkrev:
break
if linkrev > endlinkrev:
continue
node = rl.node(rev)
ledger.markdataentry(self, treename, node)
ledger.markhistoryentry(self, treename, node)
def cleanup(self, ledger):
pass