narrowchangegroup.py
362 lines
| 16.2 KiB
| text/x-python
|
PythonLexer
Augie Fackler
|
r36096 | # narrowchangegroup.py - narrow clone changegroup creation and consumption | ||
# | ||||
# Copyright 2017 Google, Inc. | ||||
# | ||||
# This software may be used and distributed according to the terms of the | ||||
# GNU General Public License version 2 or any later version. | ||||
from __future__ import absolute_import | ||||
from mercurial.i18n import _ | ||||
from mercurial import ( | ||||
changegroup, | ||||
error, | ||||
extensions, | ||||
manifest, | ||||
mdiff, | ||||
node, | ||||
Gregory Szorc
|
r38806 | pycompat, | ||
Augie Fackler
|
r36107 | revlog, | ||
Augie Fackler
|
r36096 | util, | ||
) | ||||
def setup(): | ||||
def prune(orig, self, revlog, missing, commonrevs): | ||||
if isinstance(revlog, manifest.manifestrevlog): | ||||
Gregory Szorc
|
r38818 | if not self._filematcher.visitdir(revlog._dir[:-1] or '.'): | ||
Augie Fackler
|
r36096 | return [] | ||
Gregory Szorc
|
r38818 | |||
Augie Fackler
|
r36096 | return orig(self, revlog, missing, commonrevs) | ||
extensions.wrapfunction(changegroup.cg1packer, 'prune', prune) | ||||
def generatefiles(orig, self, changedfiles, linknodes, commonrevs, | ||||
source): | ||||
Gregory Szorc
|
r38818 | changedfiles = list(filter(self._filematcher, changedfiles)) | ||
Augie Fackler
|
r36096 | if getattr(self, 'is_shallow', False): | ||
# See comment in generate() for why this sadness is a thing. | ||||
mfdicts = self._mfdicts | ||||
del self._mfdicts | ||||
# In a shallow clone, the linknodes callback needs to also include | ||||
# those file nodes that are in the manifests we sent but weren't | ||||
# introduced by those manifests. | ||||
commonctxs = [self._repo[c] for c in commonrevs] | ||||
oldlinknodes = linknodes | ||||
clrev = self._repo.changelog.rev | ||||
def linknodes(flog, fname): | ||||
for c in commonctxs: | ||||
try: | ||||
fnode = c.filenode(fname) | ||||
self.clrev_to_localrev[c.rev()] = flog.rev(fnode) | ||||
except error.ManifestLookupError: | ||||
pass | ||||
links = oldlinknodes(flog, fname) | ||||
if len(links) != len(mfdicts): | ||||
for mf, lr in mfdicts: | ||||
fnode = mf.get(fname, None) | ||||
if fnode in links: | ||||
links[fnode] = min(links[fnode], lr, key=clrev) | ||||
elif fnode: | ||||
links[fnode] = lr | ||||
return links | ||||
return orig(self, changedfiles, linknodes, commonrevs, source) | ||||
extensions.wrapfunction( | ||||
changegroup.cg1packer, 'generatefiles', generatefiles) | ||||
Augie Fackler
|
r36107 | def ellipsisdata(packer, rev, revlog_, p1, p2, data, linknode): | ||
n = revlog_.node(rev) | ||||
p1n, p2n = revlog_.node(p1), revlog_.node(p2) | ||||
flags = revlog_.flags(rev) | ||||
flags |= revlog.REVIDX_ELLIPSIS | ||||
Augie Fackler
|
r36096 | meta = packer.builddeltaheader( | ||
n, p1n, p2n, node.nullid, linknode, flags) | ||||
# TODO: try and actually send deltas for ellipsis data blocks | ||||
diffheader = mdiff.trivialdiffheader(len(data)) | ||||
l = len(meta) + len(diffheader) + len(data) | ||||
return ''.join((changegroup.chunkheader(l), | ||||
meta, | ||||
diffheader, | ||||
data)) | ||||
def close(orig, self): | ||||
getattr(self, 'clrev_to_localrev', {}).clear() | ||||
if getattr(self, 'next_clrev_to_localrev', {}): | ||||
self.clrev_to_localrev = self.next_clrev_to_localrev | ||||
del self.next_clrev_to_localrev | ||||
self.changelog_done = True | ||||
return orig(self) | ||||
extensions.wrapfunction(changegroup.cg1packer, 'close', close) | ||||
# In a perfect world, we'd generate better ellipsis-ified graphs | ||||
# for non-changelog revlogs. In practice, we haven't started doing | ||||
# that yet, so the resulting DAGs for the manifestlog and filelogs | ||||
# are actually full of bogus parentage on all the ellipsis | ||||
# nodes. This has the side effect that, while the contents are | ||||
# correct, the individual DAGs might be completely out of whack in | ||||
# a case like 882681bc3166 and its ancestors (back about 10 | ||||
# revisions or so) in the main hg repo. | ||||
# | ||||
# The one invariant we *know* holds is that the new (potentially | ||||
# bogus) DAG shape will be valid if we order the nodes in the | ||||
# order that they're introduced in dramatis personae by the | ||||
# changelog, so what we do is we sort the non-changelog histories | ||||
# by the order in which they are used by the changelog. | ||||
def _sortgroup(orig, self, revlog, nodelist, lookup): | ||||
if not util.safehasattr(self, 'full_nodes') or not self.clnode_to_rev: | ||||
return orig(self, revlog, nodelist, lookup) | ||||
key = lambda n: self.clnode_to_rev[lookup(n)] | ||||
return [revlog.rev(n) for n in sorted(nodelist, key=key)] | ||||
extensions.wrapfunction(changegroup.cg1packer, '_sortgroup', _sortgroup) | ||||
def generate(orig, self, commonrevs, clnodes, fastpathlinkrev, source): | ||||
'''yield a sequence of changegroup chunks (strings)''' | ||||
# Note: other than delegating to orig, the only deviation in | ||||
# logic from normal hg's generate is marked with BEGIN/END | ||||
# NARROW HACK. | ||||
if not util.safehasattr(self, 'full_nodes'): | ||||
# not sending a narrow bundle | ||||
for x in orig(self, commonrevs, clnodes, fastpathlinkrev, source): | ||||
yield x | ||||
return | ||||
repo = self._repo | ||||
cl = repo.changelog | ||||
mfl = repo.manifestlog | ||||
mfrevlog = mfl._revlog | ||||
clrevorder = {} | ||||
mfs = {} # needed manifests | ||||
fnodes = {} # needed file nodes | ||||
changedfiles = set() | ||||
# Callback for the changelog, used to collect changed files and manifest | ||||
# nodes. | ||||
# Returns the linkrev node (identity in the changelog case). | ||||
def lookupcl(x): | ||||
c = cl.read(x) | ||||
clrevorder[x] = len(clrevorder) | ||||
# BEGIN NARROW HACK | ||||
# | ||||
# Only update mfs if x is going to be sent. Otherwise we | ||||
# end up with bogus linkrevs specified for manifests and | ||||
# we skip some manifest nodes that we should otherwise | ||||
# have sent. | ||||
if x in self.full_nodes or cl.rev(x) in self.precomputed_ellipsis: | ||||
n = c[0] | ||||
# record the first changeset introducing this manifest version | ||||
mfs.setdefault(n, x) | ||||
# Set this narrow-specific dict so we have the lowest manifest | ||||
# revnum to look up for this cl revnum. (Part of mapping | ||||
# changelog ellipsis parents to manifest ellipsis parents) | ||||
self.next_clrev_to_localrev.setdefault(cl.rev(x), | ||||
mfrevlog.rev(n)) | ||||
# We can't trust the changed files list in the changeset if the | ||||
# client requested a shallow clone. | ||||
if self.is_shallow: | ||||
changedfiles.update(mfl[c[0]].read().keys()) | ||||
else: | ||||
changedfiles.update(c[3]) | ||||
# END NARROW HACK | ||||
# Record a complete list of potentially-changed files in | ||||
# this manifest. | ||||
return x | ||||
self._verbosenote(_('uncompressed size of bundle content:\n')) | ||||
size = 0 | ||||
for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')): | ||||
size += len(chunk) | ||||
yield chunk | ||||
self._verbosenote(_('%8.i (changelog)\n') % size) | ||||
# We need to make sure that the linkrev in the changegroup refers to | ||||
# the first changeset that introduced the manifest or file revision. | ||||
# The fastpath is usually safer than the slowpath, because the filelogs | ||||
# are walked in revlog order. | ||||
# | ||||
# When taking the slowpath with reorder=None and the manifest revlog | ||||
# uses generaldelta, the manifest may be walked in the "wrong" order. | ||||
# Without 'clrevorder', we would get an incorrect linkrev (see fix in | ||||
# cc0ff93d0c0c). | ||||
# | ||||
# When taking the fastpath, we are only vulnerable to reordering | ||||
# of the changelog itself. The changelog never uses generaldelta, so | ||||
# it is only reordered when reorder=True. To handle this case, we | ||||
# simply take the slowpath, which already has the 'clrevorder' logic. | ||||
# This was also fixed in cc0ff93d0c0c. | ||||
fastpathlinkrev = fastpathlinkrev and not self._reorder | ||||
# Treemanifests don't work correctly with fastpathlinkrev | ||||
# either, because we don't discover which directory nodes to | ||||
# send along with files. This could probably be fixed. | ||||
fastpathlinkrev = fastpathlinkrev and ( | ||||
'treemanifest' not in repo.requirements) | ||||
# Shallow clones also don't work correctly with fastpathlinkrev | ||||
# because file nodes may need to be sent for a manifest even if they | ||||
# weren't introduced by that manifest. | ||||
fastpathlinkrev = fastpathlinkrev and not self.is_shallow | ||||
for chunk in self.generatemanifests(commonrevs, clrevorder, | ||||
Augie Fackler
|
r36368 | fastpathlinkrev, mfs, fnodes, source): | ||
Augie Fackler
|
r36096 | yield chunk | ||
# BEGIN NARROW HACK | ||||
mfdicts = None | ||||
if self.is_shallow: | ||||
mfdicts = [(self._repo.manifestlog[n].read(), lr) | ||||
for (n, lr) in mfs.iteritems()] | ||||
# END NARROW HACK | ||||
mfs.clear() | ||||
clrevs = set(cl.rev(x) for x in clnodes) | ||||
if not fastpathlinkrev: | ||||
def linknodes(unused, fname): | ||||
return fnodes.get(fname, {}) | ||||
else: | ||||
cln = cl.node | ||||
def linknodes(filerevlog, fname): | ||||
llr = filerevlog.linkrev | ||||
fln = filerevlog.node | ||||
revs = ((r, llr(r)) for r in filerevlog) | ||||
return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs) | ||||
# BEGIN NARROW HACK | ||||
# | ||||
# We need to pass the mfdicts variable down into | ||||
# generatefiles(), but more than one command might have | ||||
# wrapped generatefiles so we can't modify the function | ||||
# signature. Instead, we pass the data to ourselves using an | ||||
# instance attribute. I'm sorry. | ||||
self._mfdicts = mfdicts | ||||
# END NARROW HACK | ||||
for chunk in self.generatefiles(changedfiles, linknodes, commonrevs, | ||||
source): | ||||
yield chunk | ||||
yield self.close() | ||||
if clnodes: | ||||
repo.hook('outgoing', node=node.hex(clnodes[0]), source=source) | ||||
extensions.wrapfunction(changegroup.cg1packer, 'generate', generate) | ||||
def revchunk(orig, self, revlog, rev, prev, linknode): | ||||
if not util.safehasattr(self, 'full_nodes'): | ||||
# not sending a narrow changegroup | ||||
for x in orig(self, revlog, rev, prev, linknode): | ||||
yield x | ||||
return | ||||
# build up some mapping information that's useful later. See | ||||
# the local() nested function below. | ||||
if not self.changelog_done: | ||||
self.clnode_to_rev[linknode] = rev | ||||
linkrev = rev | ||||
self.clrev_to_localrev[linkrev] = rev | ||||
else: | ||||
linkrev = self.clnode_to_rev[linknode] | ||||
self.clrev_to_localrev[linkrev] = rev | ||||
# This is a node to send in full, because the changeset it | ||||
# corresponds to was a full changeset. | ||||
if linknode in self.full_nodes: | ||||
for x in orig(self, revlog, rev, prev, linknode): | ||||
yield x | ||||
return | ||||
# At this point, a node can either be one we should skip or an | ||||
# ellipsis. If it's not an ellipsis, bail immediately. | ||||
if linkrev not in self.precomputed_ellipsis: | ||||
return | ||||
linkparents = self.precomputed_ellipsis[linkrev] | ||||
def local(clrev): | ||||
"""Turn a changelog revnum into a local revnum. | ||||
The ellipsis dag is stored as revnums on the changelog, | ||||
but when we're producing ellipsis entries for | ||||
non-changelog revlogs, we need to turn those numbers into | ||||
something local. This does that for us, and during the | ||||
changelog sending phase will also expand the stored | ||||
mappings as needed. | ||||
""" | ||||
if clrev == node.nullrev: | ||||
return node.nullrev | ||||
if not self.changelog_done: | ||||
# If we're doing the changelog, it's possible that we | ||||
# have a parent that is already on the client, and we | ||||
# need to store some extra mapping information so that | ||||
# our contained ellipsis nodes will be able to resolve | ||||
# their parents. | ||||
if clrev not in self.clrev_to_localrev: | ||||
clnode = revlog.node(clrev) | ||||
self.clnode_to_rev[clnode] = clrev | ||||
return clrev | ||||
# Walk the ellipsis-ized changelog breadth-first looking for a | ||||
# change that has been linked from the current revlog. | ||||
# | ||||
# For a flat manifest revlog only a single step should be necessary | ||||
# as all relevant changelog entries are relevant to the flat | ||||
# manifest. | ||||
# | ||||
# For a filelog or tree manifest dirlog however not every changelog | ||||
# entry will have been relevant, so we need to skip some changelog | ||||
# nodes even after ellipsis-izing. | ||||
walk = [clrev] | ||||
while walk: | ||||
p = walk[0] | ||||
walk = walk[1:] | ||||
if p in self.clrev_to_localrev: | ||||
return self.clrev_to_localrev[p] | ||||
elif p in self.full_nodes: | ||||
walk.extend([pp for pp in self._repo.changelog.parentrevs(p) | ||||
if pp != node.nullrev]) | ||||
elif p in self.precomputed_ellipsis: | ||||
walk.extend([pp for pp in self.precomputed_ellipsis[p] | ||||
if pp != node.nullrev]) | ||||
else: | ||||
# In this case, we've got an ellipsis with parents | ||||
# outside the current bundle (likely an | ||||
# incremental pull). We "know" that we can use the | ||||
# value of this same revlog at whatever revision | ||||
# is pointed to by linknode. "Know" is in scare | ||||
# quotes because I haven't done enough examination | ||||
# of edge cases to convince myself this is really | ||||
# a fact - it works for all the (admittedly | ||||
# thorough) cases in our testsuite, but I would be | ||||
# somewhat unsurprised to find a case in the wild | ||||
# where this breaks down a bit. That said, I don't | ||||
# know if it would hurt anything. | ||||
Gregory Szorc
|
r38806 | for i in pycompat.xrange(rev, 0, -1): | ||
Augie Fackler
|
r36096 | if revlog.linkrev(i) == clrev: | ||
return i | ||||
# We failed to resolve a parent for this node, so | ||||
# we crash the changegroup construction. | ||||
raise error.Abort( | ||||
'unable to resolve parent while packing %r %r' | ||||
' for changeset %r' % (revlog.indexfile, rev, clrev)) | ||||
return node.nullrev | ||||
if not linkparents or ( | ||||
revlog.parentrevs(rev) == (node.nullrev, node.nullrev)): | ||||
p1, p2 = node.nullrev, node.nullrev | ||||
elif len(linkparents) == 1: | ||||
p1, = sorted(local(p) for p in linkparents) | ||||
p2 = node.nullrev | ||||
else: | ||||
p1, p2 = sorted(local(p) for p in linkparents) | ||||
Gregory Szorc
|
r37358 | n = revlog.node(rev) | ||
Augie Fackler
|
r36096 | yield ellipsisdata( | ||
Gregory Szorc
|
r37358 | self, rev, revlog, p1, p2, revlog.revision(n), linknode) | ||
Augie Fackler
|
r36096 | extensions.wrapfunction(changegroup.cg1packer, 'revchunk', revchunk) | ||
def deltaparent(orig, self, revlog, rev, p1, p2, prev): | ||||
if util.safehasattr(self, 'full_nodes'): | ||||
# TODO: send better deltas when in narrow mode. | ||||
# | ||||
# changegroup.group() loops over revisions to send, | ||||
# including revisions we'll skip. What this means is that | ||||
# `prev` will be a potentially useless delta base for all | ||||
# ellipsis nodes, as the client likely won't have it. In | ||||
# the future we should do bookkeeping about which nodes | ||||
# have been sent to the client, and try to be | ||||
# significantly smarter about delta bases. This is | ||||
# slightly tricky because this same code has to work for | ||||
# all revlogs, and we don't have the linkrev/linknode here. | ||||
return p1 | ||||
return orig(self, revlog, rev, p1, p2, prev) | ||||
extensions.wrapfunction(changegroup.cg2packer, 'deltaparent', deltaparent) | ||||