changegroup.py
886 lines
| 32.3 KiB
| text/x-python
|
PythonLexer
/ mercurial / changegroup.py
Martin Geisler
|
r8226 | # changegroup.py - Mercurial changegroup manipulation functions | ||
# | ||||
# Copyright 2006 Matt Mackall <mpm@selenic.com> | ||||
# | ||||
# This software may be used and distributed according to the terms of the | ||||
Matt Mackall
|
r10263 | # GNU General Public License version 2 or any later version. | ||
Matt Mackall
|
r3877 | |||
Pierre-Yves David
|
r20933 | import weakref | ||
Matt Mackall
|
r3891 | from i18n import _ | ||
Pierre-Yves David
|
r20933 | from node import nullrev, nullid, hex, short | ||
Sune Foldager
|
r19200 | import mdiff, util, dagutil | ||
Simon Heimberg
|
r8312 | import struct, os, bz2, zlib, tempfile | ||
Pierre-Yves David
|
r20933 | import discovery, error, phases, branchmap | ||
Thomas Arendsen Hein
|
r1981 | |||
Sune Foldager
|
r22390 | _CHANGEGROUPV1_DELTA_HEADER = "20s20s20s20s" | ||
Sune Foldager
|
r23181 | _CHANGEGROUPV2_DELTA_HEADER = "20s20s20s20s20s" | ||
Benoit Boissinot
|
r14141 | |||
Mads Kiilerich
|
r13457 | def readexactly(stream, n): | ||
'''read n bytes from stream.read and abort if less was available''' | ||||
s = stream.read(n) | ||||
if len(s) < n: | ||||
raise util.Abort(_("stream ended unexpectedly" | ||||
" (got %d bytes, expected %d)") | ||||
% (len(s), n)) | ||||
return s | ||||
def getchunk(stream): | ||||
"""return the next chunk from stream as a string""" | ||||
d = readexactly(stream, 4) | ||||
Thomas Arendsen Hein
|
r1981 | l = struct.unpack(">l", d)[0] | ||
if l <= 4: | ||||
Mads Kiilerich
|
r13458 | if l: | ||
raise util.Abort(_("invalid chunk length %d") % l) | ||||
Thomas Arendsen Hein
|
r1981 | return "" | ||
Mads Kiilerich
|
r13457 | return readexactly(stream, l - 4) | ||
Thomas Arendsen Hein
|
r1981 | |||
Matt Mackall
|
r5368 | def chunkheader(length): | ||
Greg Ward
|
r9437 | """return a changegroup chunk header (string)""" | ||
Matt Mackall
|
r5368 | return struct.pack(">l", length + 4) | ||
Thomas Arendsen Hein
|
r1981 | |||
def closechunk(): | ||||
Greg Ward
|
r9437 | """return a changegroup chunk header (string) for a zero-length chunk""" | ||
Thomas Arendsen Hein
|
r1981 | return struct.pack(">l", 0) | ||
Eric Sumner
|
r23890 | def combineresults(results): | ||
"""logic to combine 0 or more addchangegroup results into one""" | ||||
changedheads = 0 | ||||
result = 1 | ||||
for ret in results: | ||||
# If any changegroup result is 0, return 0 | ||||
if ret == 0: | ||||
result = 0 | ||||
break | ||||
if ret < -1: | ||||
changedheads += ret + 1 | ||||
elif ret > 1: | ||||
changedheads += ret - 1 | ||||
if changedheads > 0: | ||||
result = 1 + changedheads | ||||
elif changedheads < 0: | ||||
result = -1 + changedheads | ||||
return result | ||||
Matt Mackall
|
r3659 | class nocompress(object): | ||
def compress(self, x): | ||||
return x | ||||
def flush(self): | ||||
return "" | ||||
Matt Mackall
|
r3662 | bundletypes = { | ||
Benoit Boissinot
|
r14060 | "": ("", nocompress), # only when using unbundle on ssh and old http servers | ||
# since the unification ssh accepts a header but there | ||||
# is no capability signaling it. | ||||
Pierre-Yves David
|
r24686 | "HG20": (), # special-cased below | ||
Benoit Boissinot
|
r3704 | "HG10UN": ("HG10UN", nocompress), | ||
Alexis S. L. Carvalho
|
r3762 | "HG10BZ": ("HG10", lambda: bz2.BZ2Compressor()), | ||
"HG10GZ": ("HG10GZ", lambda: zlib.compressobj()), | ||||
Matt Mackall
|
r3662 | } | ||
Martin Geisler
|
r9087 | # hgweb uses this list to communicate its preferred type | ||
Dirkjan Ochtman
|
r6152 | bundlepriority = ['HG10GZ', 'HG10BZ', 'HG10UN'] | ||
Eric Sumner
|
r23895 | def writebundle(ui, cg, filename, bundletype, vfs=None): | ||
Matt Mackall
|
r3659 | """Write a bundle file and return its filename. | ||
Existing files will not be overwritten. | ||||
If no filename is specified, a temporary file is created. | ||||
bz2 compression can be turned off. | ||||
The bundle file will be deleted in case of errors. | ||||
""" | ||||
fh = None | ||||
cleanup = None | ||||
try: | ||||
if filename: | ||||
FUJIWARA Katsunori
|
r20976 | if vfs: | ||
fh = vfs.open(filename, "wb") | ||||
else: | ||||
fh = open(filename, "wb") | ||||
Matt Mackall
|
r3659 | else: | ||
fd, filename = tempfile.mkstemp(prefix="hg-bundle-", suffix=".hg") | ||||
fh = os.fdopen(fd, "wb") | ||||
cleanup = filename | ||||
Pierre-Yves David
|
r24686 | if bundletype == "HG20": | ||
Eric Sumner
|
r23896 | import bundle2 | ||
bundle = bundle2.bundle20(ui) | ||||
Pierre-Yves David
|
r24686 | part = bundle.newpart('changegroup', data=cg.getchunks()) | ||
Eric Sumner
|
r23896 | part.addparam('version', cg.version) | ||
z = nocompress() | ||||
chunkiter = bundle.getchunks() | ||||
else: | ||||
if cg.version != '01': | ||||
Mads Kiilerich
|
r24180 | raise util.Abort(_('old bundle types only supports v1 ' | ||
'changegroups')) | ||||
Eric Sumner
|
r23896 | header, compressor = bundletypes[bundletype] | ||
fh.write(header) | ||||
z = compressor() | ||||
chunkiter = cg.getchunks() | ||||
Matt Mackall
|
r3662 | |||
Matt Mackall
|
r3659 | # parse the changegroup data, otherwise we will block | ||
# in case of sshrepo because we don't know the end of the stream | ||||
Matt Mackall
|
r12335 | # an empty chunkgroup is the end of the changegroup | ||
# a changegroup has at least 2 chunkgroups (changelog and manifest). | ||||
# after that, an empty chunkgroup is the end of the changegroup | ||||
Eric Sumner
|
r23896 | for chunk in chunkiter: | ||
Pierre-Yves David
|
r20999 | fh.write(z.compress(chunk)) | ||
Matt Mackall
|
r3659 | fh.write(z.flush()) | ||
cleanup = None | ||||
return filename | ||||
finally: | ||||
if fh is not None: | ||||
fh.close() | ||||
if cleanup is not None: | ||||
FUJIWARA Katsunori
|
r20976 | if filename and vfs: | ||
vfs.unlink(cleanup) | ||||
else: | ||||
os.unlink(cleanup) | ||||
Matt Mackall
|
r3660 | |||
Matt Mackall
|
r12041 | def decompressor(fh, alg): | ||
if alg == 'UN': | ||||
Dirkjan Ochtman
|
r6154 | return fh | ||
Matt Mackall
|
r12041 | elif alg == 'GZ': | ||
Dirkjan Ochtman
|
r6154 | def generator(f): | ||
zd = zlib.decompressobj() | ||||
Michael Tjørnemark
|
r16557 | for chunk in util.filechunkiter(f): | ||
Dirkjan Ochtman
|
r6154 | yield zd.decompress(chunk) | ||
Matt Mackall
|
r12041 | elif alg == 'BZ': | ||
Matt Mackall
|
r3660 | def generator(f): | ||
zd = bz2.BZ2Decompressor() | ||||
zd.decompress("BZ") | ||||
for chunk in util.filechunkiter(f, 4096): | ||||
yield zd.decompress(chunk) | ||||
Matt Mackall
|
r12041 | else: | ||
raise util.Abort("unknown bundle compression '%s'" % alg) | ||||
Matt Mackall
|
r12329 | return util.chunkbuffer(generator(fh)) | ||
Matt Mackall
|
r12041 | |||
Sune Foldager
|
r22390 | class cg1unpacker(object): | ||
deltaheader = _CHANGEGROUPV1_DELTA_HEADER | ||||
Benoit Boissinot
|
r14141 | deltaheadersize = struct.calcsize(deltaheader) | ||
Eric Sumner
|
r23896 | version = '01' | ||
Matt Mackall
|
r12043 | def __init__(self, fh, alg): | ||
Matt Mackall
|
r12329 | self._stream = decompressor(fh, alg) | ||
Matt Mackall
|
r12044 | self._type = alg | ||
Matt Mackall
|
r12334 | self.callback = None | ||
Matt Mackall
|
r12044 | def compressed(self): | ||
return self._type != 'UN' | ||||
Matt Mackall
|
r12043 | def read(self, l): | ||
return self._stream.read(l) | ||||
Matt Mackall
|
r12330 | def seek(self, pos): | ||
return self._stream.seek(pos) | ||||
def tell(self): | ||||
Matt Mackall
|
r12332 | return self._stream.tell() | ||
Matt Mackall
|
r12347 | def close(self): | ||
return self._stream.close() | ||||
Matt Mackall
|
r12334 | |||
def chunklength(self): | ||||
Jim Hague
|
r13459 | d = readexactly(self._stream, 4) | ||
Mads Kiilerich
|
r13458 | l = struct.unpack(">l", d)[0] | ||
if l <= 4: | ||||
if l: | ||||
raise util.Abort(_("invalid chunk length %d") % l) | ||||
return 0 | ||||
if self.callback: | ||||
Matt Mackall
|
r12334 | self.callback() | ||
Mads Kiilerich
|
r13458 | return l - 4 | ||
Matt Mackall
|
r12334 | |||
Benoit Boissinot
|
r14144 | def changelogheader(self): | ||
"""v10 does not have a changelog header chunk""" | ||||
return {} | ||||
def manifestheader(self): | ||||
"""v10 does not have a manifest header chunk""" | ||||
return {} | ||||
def filelogheader(self): | ||||
"""return the header of the filelogs chunk, v10 only has the filename""" | ||||
Matt Mackall
|
r12334 | l = self.chunklength() | ||
Benoit Boissinot
|
r14144 | if not l: | ||
return {} | ||||
fname = readexactly(self._stream, l) | ||||
Augie Fackler
|
r20675 | return {'filename': fname} | ||
Matt Mackall
|
r12334 | |||
Benoit Boissinot
|
r14141 | def _deltaheader(self, headertuple, prevnode): | ||
node, p1, p2, cs = headertuple | ||||
if prevnode is None: | ||||
deltabase = p1 | ||||
else: | ||||
deltabase = prevnode | ||||
return node, p1, p2, deltabase, cs | ||||
Benoit Boissinot
|
r14144 | def deltachunk(self, prevnode): | ||
Matt Mackall
|
r12336 | l = self.chunklength() | ||
if not l: | ||||
return {} | ||||
Benoit Boissinot
|
r14141 | headerdata = readexactly(self._stream, self.deltaheadersize) | ||
header = struct.unpack(self.deltaheader, headerdata) | ||||
delta = readexactly(self._stream, l - self.deltaheadersize) | ||||
node, p1, p2, deltabase, cs = self._deltaheader(header, prevnode) | ||||
Augie Fackler
|
r20675 | return {'node': node, 'p1': p1, 'p2': p2, 'cs': cs, | ||
'deltabase': deltabase, 'delta': delta} | ||||
Matt Mackall
|
r12336 | |||
Pierre-Yves David
|
r20999 | def getchunks(self): | ||
"""returns all the chunks contains in the bundle | ||||
Used when you need to forward the binary stream to a file or another | ||||
network API. To do so, it parse the changegroup data, otherwise it will | ||||
block in case of sshrepo because it don't know the end of the stream. | ||||
""" | ||||
# an empty chunkgroup is the end of the changegroup | ||||
# a changegroup has at least 2 chunkgroups (changelog and manifest). | ||||
# after that, an empty chunkgroup is the end of the changegroup | ||||
empty = False | ||||
count = 0 | ||||
while not empty or count <= 2: | ||||
empty = True | ||||
count += 1 | ||||
while True: | ||||
chunk = getchunk(self) | ||||
if not chunk: | ||||
break | ||||
empty = False | ||||
yield chunkheader(len(chunk)) | ||||
pos = 0 | ||||
while pos < len(chunk): | ||||
next = pos + 2**20 | ||||
yield chunk[pos:next] | ||||
pos = next | ||||
yield closechunk() | ||||
Sune Foldager
|
r23181 | class cg2unpacker(cg1unpacker): | ||
deltaheader = _CHANGEGROUPV2_DELTA_HEADER | ||||
deltaheadersize = struct.calcsize(deltaheader) | ||||
Eric Sumner
|
r23896 | version = '02' | ||
Sune Foldager
|
r23181 | |||
def _deltaheader(self, headertuple, prevnode): | ||||
node, p1, p2, deltabase, cs = headertuple | ||||
return node, p1, p2, deltabase, cs | ||||
Matt Mackall
|
r12329 | class headerlessfixup(object): | ||
def __init__(self, fh, h): | ||||
self._h = h | ||||
self._fh = fh | ||||
def read(self, n): | ||||
if self._h: | ||||
d, self._h = self._h[:n], self._h[n:] | ||||
if len(d) < n: | ||||
Mads Kiilerich
|
r13457 | d += readexactly(self._fh, n - len(d)) | ||
Matt Mackall
|
r12329 | return d | ||
Mads Kiilerich
|
r13457 | return readexactly(self._fh, n) | ||
Matt Mackall
|
r12329 | |||
Sune Foldager
|
r22390 | class cg1packer(object): | ||
deltaheader = _CHANGEGROUPV1_DELTA_HEADER | ||||
Eric Sumner
|
r23896 | version = '01' | ||
Sune Foldager
|
r19202 | def __init__(self, repo, bundlecaps=None): | ||
"""Given a source repo, construct a bundler. | ||||
bundlecaps is optional and can be used to specify the set of | ||||
capabilities which can be used to build the bundle. | ||||
""" | ||||
Benoit Boissinot
|
r19201 | # Set of capabilities we can use to build the bundle. | ||
if bundlecaps is None: | ||||
bundlecaps = set() | ||||
self._bundlecaps = bundlecaps | ||||
Sune Foldager
|
r19202 | reorder = repo.ui.config('bundle', 'reorder', 'auto') | ||
if reorder == 'auto': | ||||
reorder = None | ||||
else: | ||||
reorder = util.parsebool(reorder) | ||||
self._repo = repo | ||||
self._reorder = reorder | ||||
Benoit Boissinot
|
r19208 | self._progress = repo.ui.progress | ||
Mads Kiilerich
|
r23748 | if self._repo.ui.verbose and not self._repo.ui.debugflag: | ||
self._verbosenote = self._repo.ui.note | ||||
else: | ||||
self._verbosenote = lambda s: None | ||||
Matt Mackall
|
r13831 | def close(self): | ||
return closechunk() | ||||
Sune Foldager
|
r19200 | |||
Matt Mackall
|
r13831 | def fileheader(self, fname): | ||
return chunkheader(len(fname)) + fname | ||||
Sune Foldager
|
r19200 | |||
Martin von Zweigbergk
|
r24912 | def group(self, nodelist, revlog, lookup, units=None): | ||
Sune Foldager
|
r19200 | """Calculate a delta group, yielding a sequence of changegroup chunks | ||
(strings). | ||||
Given a list of changeset revs, return a set of deltas and | ||||
metadata corresponding to nodes. The first delta is | ||||
first parent(nodelist[0]) -> nodelist[0], the receiver is | ||||
guaranteed to have this parent as it has all history before | ||||
these changesets. In the case firstparent is nullrev the | ||||
changegroup starts with a full revision. | ||||
Benoit Boissinot
|
r19208 | |||
If units is not None, progress detail will be generated, units specifies | ||||
the type of revlog that is touched (changelog, manifest, etc.). | ||||
Sune Foldager
|
r19200 | """ | ||
# if we don't have any revisions touched by these changesets, bail | ||||
if len(nodelist) == 0: | ||||
yield self.close() | ||||
return | ||||
# for generaldelta revlogs, we linearize the revs; this will both be | ||||
# much quicker and generate a much smaller bundle | ||||
Martin von Zweigbergk
|
r24912 | if (revlog._generaldelta and self._reorder is None) or self._reorder: | ||
Sune Foldager
|
r19200 | dag = dagutil.revlogdag(revlog) | ||
revs = set(revlog.rev(n) for n in nodelist) | ||||
revs = dag.linearize(revs) | ||||
else: | ||||
revs = sorted([revlog.rev(n) for n in nodelist]) | ||||
# add the parent of the first rev | ||||
p = revlog.parentrevs(revs[0])[0] | ||||
revs.insert(0, p) | ||||
# build deltas | ||||
Benoit Boissinot
|
r19208 | total = len(revs) - 1 | ||
msgbundling = _('bundling') | ||||
Sune Foldager
|
r19200 | for r in xrange(len(revs) - 1): | ||
Benoit Boissinot
|
r19208 | if units is not None: | ||
self._progress(msgbundling, r + 1, unit=units, total=total) | ||||
Sune Foldager
|
r19200 | prev, curr = revs[r], revs[r + 1] | ||
Benoit Boissinot
|
r19207 | linknode = lookup(revlog.node(curr)) | ||
for c in self.revchunk(revlog, curr, prev, linknode): | ||||
Sune Foldager
|
r19200 | yield c | ||
Martin von Zweigbergk
|
r24901 | if units is not None: | ||
self._progress(msgbundling, None) | ||||
Sune Foldager
|
r19200 | yield self.close() | ||
Durham Goode
|
r19289 | # filter any nodes that claim to be part of the known set | ||
Martin von Zweigbergk
|
r24896 | def prune(self, revlog, missing, commonrevs): | ||
Durham Goode
|
r19289 | rr, rl = revlog.rev, revlog.linkrev | ||
return [n for n in missing if rl(rr(n)) not in commonrevs] | ||||
Benoit Boissinot
|
r19204 | def generate(self, commonrevs, clnodes, fastpathlinkrev, source): | ||
Sune Foldager
|
r19202 | '''yield a sequence of changegroup chunks (strings)''' | ||
repo = self._repo | ||||
Martin von Zweigbergk
|
r24978 | cl = repo.changelog | ||
ml = repo.manifest | ||||
Benoit Boissinot
|
r19204 | |||
Durham Goode
|
r23381 | clrevorder = {} | ||
Benoit Boissinot
|
r19204 | mfs = {} # needed manifests | ||
fnodes = {} # needed file nodes | ||||
changedfiles = set() | ||||
Benoit Boissinot
|
r19207 | # Callback for the changelog, used to collect changed files and manifest | ||
# nodes. | ||||
# Returns the linkrev node (identity in the changelog case). | ||||
def lookupcl(x): | ||||
c = cl.read(x) | ||||
Durham Goode
|
r23381 | clrevorder[x] = len(clrevorder) | ||
Benoit Boissinot
|
r19207 | changedfiles.update(c[3]) | ||
# record the first changeset introducing this manifest version | ||||
mfs.setdefault(c[0], x) | ||||
return x | ||||
Benoit Boissinot
|
r19204 | |||
Mads Kiilerich
|
r23748 | self._verbosenote(_('uncompressed size of bundle content:\n')) | ||
size = 0 | ||||
Martin von Zweigbergk
|
r24912 | for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')): | ||
Mads Kiilerich
|
r23748 | size += len(chunk) | ||
Gregory Szorc
|
r23224 | yield chunk | ||
Mads Kiilerich
|
r23748 | self._verbosenote(_('%8.i (changelog)\n') % size) | ||
Gregory Szorc
|
r23224 | |||
Martin von Zweigbergk
|
r24977 | # We need to make sure that the linkrev in the changegroup refers to | ||
# the first changeset that introduced the manifest or file revision. | ||||
# The fastpath is usually safer than the slowpath, because the filelogs | ||||
# are walked in revlog order. | ||||
# | ||||
# When taking the slowpath with reorder=None and the manifest revlog | ||||
# uses generaldelta, the manifest may be walked in the "wrong" order. | ||||
# Without 'clrevorder', we would get an incorrect linkrev (see fix in | ||||
# cc0ff93d0c0c). | ||||
# | ||||
# When taking the fastpath, we are only vulnerable to reordering | ||||
# of the changelog itself. The changelog never uses generaldelta, so | ||||
# it is only reordered when reorder=True. To handle this case, we | ||||
# simply take the slowpath, which already has the 'clrevorder' logic. | ||||
# This was also fixed in cc0ff93d0c0c. | ||||
Martin von Zweigbergk
|
r24976 | fastpathlinkrev = fastpathlinkrev and not self._reorder | ||
Benoit Boissinot
|
r19207 | # Callback for the manifest, used to collect linkrevs for filelog | ||
# revisions. | ||||
# Returns the linkrev node (collected in lookupcl). | ||||
def lookupmf(x): | ||||
clnode = mfs[x] | ||||
Martin von Zweigbergk
|
r24976 | if not fastpathlinkrev: | ||
Martin von Zweigbergk
|
r24899 | mdata = ml.readfast(x) | ||
Benoit Boissinot
|
r19207 | for f, n in mdata.iteritems(): | ||
if f in changedfiles: | ||||
# record the first changeset introducing this filelog | ||||
# version | ||||
Durham Goode
|
r23381 | fclnodes = fnodes.setdefault(f, {}) | ||
fclnode = fclnodes.setdefault(n, clnode) | ||||
if clrevorder[clnode] < clrevorder[fclnode]: | ||||
fclnodes[n] = clnode | ||||
Benoit Boissinot
|
r19207 | return clnode | ||
Sune Foldager
|
r19206 | |||
Martin von Zweigbergk
|
r24899 | mfnodes = self.prune(ml, mfs, commonrevs) | ||
Mads Kiilerich
|
r23748 | size = 0 | ||
Martin von Zweigbergk
|
r24912 | for chunk in self.group(mfnodes, ml, lookupmf, units=_('manifests')): | ||
Mads Kiilerich
|
r23748 | size += len(chunk) | ||
Sune Foldager
|
r19206 | yield chunk | ||
Mads Kiilerich
|
r23748 | self._verbosenote(_('%8.i (manifests)\n') % size) | ||
Sune Foldager
|
r19206 | |||
mfs.clear() | ||||
Martin von Zweigbergk
|
r24898 | clrevs = set(cl.rev(x) for x in clnodes) | ||
Sune Foldager
|
r19206 | |||
Durham Goode
|
r19334 | def linknodes(filerevlog, fname): | ||
Martin von Zweigbergk
|
r24976 | if fastpathlinkrev: | ||
Sean Farley
|
r20936 | llr = filerevlog.linkrev | ||
Benoit Boissinot
|
r19204 | def genfilenodes(): | ||
for r in filerevlog: | ||||
linkrev = llr(r) | ||||
Martin von Zweigbergk
|
r24898 | if linkrev in clrevs: | ||
Benoit Boissinot
|
r19204 | yield filerevlog.node(r), cl.node(linkrev) | ||
Gregory Szorc
|
r23225 | return dict(genfilenodes()) | ||
Durham Goode
|
r19334 | return fnodes.get(fname, {}) | ||
Benoit Boissinot
|
r19207 | |||
Durham Goode
|
r19334 | for chunk in self.generatefiles(changedfiles, linknodes, commonrevs, | ||
source): | ||||
yield chunk | ||||
yield self.close() | ||||
if clnodes: | ||||
repo.hook('outgoing', node=hex(clnodes[0]), source=source) | ||||
Martin von Zweigbergk
|
r24897 | # The 'source' parameter is useful for extensions | ||
Durham Goode
|
r19334 | def generatefiles(self, changedfiles, linknodes, commonrevs, source): | ||
repo = self._repo | ||||
progress = self._progress | ||||
msgbundling = _('bundling') | ||||
total = len(changedfiles) | ||||
# for progress output | ||||
msgfiles = _('files') | ||||
for i, fname in enumerate(sorted(changedfiles)): | ||||
filerevlog = repo.file(fname) | ||||
if not filerevlog: | ||||
raise util.Abort(_("empty or missing revlog for %s") % fname) | ||||
linkrevnodes = linknodes(filerevlog, fname) | ||||
Benoit Boissinot
|
r19207 | # Lookup for filenodes, we collected the linkrev nodes above in the | ||
# fastpath case and with lookupmf in the slowpath case. | ||||
def lookupfilelog(x): | ||||
return linkrevnodes[x] | ||||
Martin von Zweigbergk
|
r24896 | filenodes = self.prune(filerevlog, linkrevnodes, commonrevs) | ||
Sune Foldager
|
r19206 | if filenodes: | ||
Benoit Boissinot
|
r19208 | progress(msgbundling, i + 1, item=fname, unit=msgfiles, | ||
total=total) | ||||
Mads Kiilerich
|
r23748 | h = self.fileheader(fname) | ||
size = len(h) | ||||
yield h | ||||
Martin von Zweigbergk
|
r24912 | for chunk in self.group(filenodes, filerevlog, lookupfilelog): | ||
Mads Kiilerich
|
r23748 | size += len(chunk) | ||
Sune Foldager
|
r19202 | yield chunk | ||
Mads Kiilerich
|
r23748 | self._verbosenote(_('%8.i %s\n') % (size, fname)) | ||
Martin von Zweigbergk
|
r24901 | progress(msgbundling, None) | ||
Sune Foldager
|
r19200 | |||
Sune Foldager
|
r23181 | def deltaparent(self, revlog, rev, p1, p2, prev): | ||
return prev | ||||
Benoit Boissinot
|
r19207 | def revchunk(self, revlog, rev, prev, linknode): | ||
Benoit Boissinot
|
r14143 | node = revlog.node(rev) | ||
p1, p2 = revlog.parentrevs(rev) | ||||
Sune Foldager
|
r23181 | base = self.deltaparent(revlog, rev, p1, p2, prev) | ||
Benoit Boissinot
|
r14143 | |||
prefix = '' | ||||
Mike Edgar
|
r24190 | if revlog.iscensored(base) or revlog.iscensored(rev): | ||
try: | ||||
delta = revlog.revision(node) | ||||
except error.CensoredNodeError, e: | ||||
delta = e.tombstone | ||||
if base == nullrev: | ||||
prefix = mdiff.trivialdiffheader(len(delta)) | ||||
else: | ||||
baselen = revlog.rawsize(base) | ||||
prefix = mdiff.replacediffheader(baselen, len(delta)) | ||||
elif base == nullrev: | ||||
Benoit Boissinot
|
r14143 | delta = revlog.revision(node) | ||
prefix = mdiff.trivialdiffheader(len(delta)) | ||||
else: | ||||
delta = revlog.revdiff(base, rev) | ||||
p1n, p2n = revlog.parents(node) | ||||
basenode = revlog.node(base) | ||||
meta = self.builddeltaheader(node, p1n, p2n, basenode, linknode) | ||||
meta += prefix | ||||
l = len(meta) + len(delta) | ||||
Matt Mackall
|
r13831 | yield chunkheader(l) | ||
yield meta | ||||
Benoit Boissinot
|
r14143 | yield delta | ||
def builddeltaheader(self, node, p1n, p2n, basenode, linknode): | ||||
# do nothing with basenode, it is implicitly the previous one in HG10 | ||||
return struct.pack(self.deltaheader, node, p1n, p2n, linknode) | ||||
Pierre-Yves David
|
r20925 | |||
Sune Foldager
|
r23181 | class cg2packer(cg1packer): | ||
Eric Sumner
|
r23896 | version = '02' | ||
Sune Foldager
|
r23181 | deltaheader = _CHANGEGROUPV2_DELTA_HEADER | ||
Martin von Zweigbergk
|
r24911 | def __init__(self, repo, bundlecaps=None): | ||
super(cg2packer, self).__init__(repo, bundlecaps) | ||||
if self._reorder is None: | ||||
# Since generaldelta is directly supported by cg2, reordering | ||||
# generally doesn't help, so we disable it by default (treating | ||||
# bundle.reorder=auto just like bundle.reorder=False). | ||||
self._reorder = False | ||||
Sune Foldager
|
r23181 | |||
def deltaparent(self, revlog, rev, p1, p2, prev): | ||||
dp = revlog.deltaparent(rev) | ||||
# avoid storing full revisions; pick prev in those cases | ||||
# also pick prev when we can't be sure remote has dp | ||||
if dp == nullrev or (dp != p1 and dp != p2 and dp != prev): | ||||
return prev | ||||
return dp | ||||
def builddeltaheader(self, node, p1n, p2n, basenode, linknode): | ||||
return struct.pack(self.deltaheader, node, p1n, p2n, basenode, linknode) | ||||
packermap = {'01': (cg1packer, cg1unpacker), | ||||
'02': (cg2packer, cg2unpacker)} | ||||
Pierre-Yves David
|
r23168 | |||
Pierre-Yves David
|
r20926 | def _changegroupinfo(repo, nodes, source): | ||
if repo.ui.verbose or source == 'bundle': | ||||
repo.ui.status(_("%d changesets found\n") % len(nodes)) | ||||
if repo.ui.debugflag: | ||||
repo.ui.debug("list of changesets:\n") | ||||
for node in nodes: | ||||
repo.ui.debug("%s\n" % hex(node)) | ||||
Sune Foldager
|
r23177 | def getsubsetraw(repo, outgoing, bundler, source, fastpath=False): | ||
Pierre-Yves David
|
r20925 | repo = repo.unfiltered() | ||
commonrevs = outgoing.common | ||||
csets = outgoing.missing | ||||
heads = outgoing.missingheads | ||||
# We go through the fast path if we get told to, or if all (unfiltered | ||||
# heads have been requested (since we then know there all linkrevs will | ||||
# be pulled by the client). | ||||
heads.sort() | ||||
fastpathlinkrev = fastpath or ( | ||||
repo.filtername is None and heads == sorted(repo.heads())) | ||||
repo.hook('preoutgoing', throw=True, source=source) | ||||
Pierre-Yves David
|
r20926 | _changegroupinfo(repo, csets, source) | ||
Sune Foldager
|
r23177 | return bundler.generate(commonrevs, csets, fastpathlinkrev, source) | ||
Eric Sumner
|
r23897 | def getsubset(repo, outgoing, bundler, source, fastpath=False, version='01'): | ||
Sune Foldager
|
r23177 | gengroup = getsubsetraw(repo, outgoing, bundler, source, fastpath) | ||
Eric Sumner
|
r23897 | return packermap[version][1](util.chunkbuffer(gengroup), 'UN') | ||
Pierre-Yves David
|
r20927 | |||
Eric Sumner
|
r23897 | def changegroupsubset(repo, roots, heads, source, version='01'): | ||
Pierre-Yves David
|
r20927 | """Compute a changegroup consisting of all the nodes that are | ||
descendants of any of the roots and ancestors of any of the heads. | ||||
Return a chunkbuffer object whose read() method will return | ||||
successive changegroup chunks. | ||||
It is fairly complex as determining which filenodes and which | ||||
manifest nodes need to be included for the changeset to be complete | ||||
is non-trivial. | ||||
Another wrinkle is doing the reverse, figuring out which changeset in | ||||
the changegroup a particular filenode or manifestnode belongs to. | ||||
""" | ||||
cl = repo.changelog | ||||
if not roots: | ||||
roots = [nullid] | ||||
# TODO: remove call to nodesbetween. | ||||
csets, roots, heads = cl.nodesbetween(roots, heads) | ||||
discbases = [] | ||||
for n in roots: | ||||
discbases.extend([p for p in cl.parents(n) if p != nullid]) | ||||
outgoing = discovery.outgoing(cl, discbases, heads) | ||||
Eric Sumner
|
r23897 | bundler = packermap[version][0](repo) | ||
return getsubset(repo, outgoing, bundler, source, version=version) | ||||
Pierre-Yves David
|
r20927 | |||
Sune Foldager
|
r23178 | def getlocalchangegroupraw(repo, source, outgoing, bundlecaps=None, | ||
version='01'): | ||||
Sune Foldager
|
r23177 | """Like getbundle, but taking a discovery.outgoing as an argument. | ||
This is only implemented for local repos and reuses potentially | ||||
precomputed sets in outgoing. Returns a raw changegroup generator.""" | ||||
if not outgoing.missing: | ||||
return None | ||||
Sune Foldager
|
r23178 | bundler = packermap[version][0](repo, bundlecaps) | ||
Sune Foldager
|
r23177 | return getsubsetraw(repo, outgoing, bundler, source) | ||
Sune Foldager
|
r22390 | def getlocalchangegroup(repo, source, outgoing, bundlecaps=None): | ||
Pierre-Yves David
|
r20928 | """Like getbundle, but taking a discovery.outgoing as an argument. | ||
This is only implemented for local repos and reuses potentially | ||||
precomputed sets in outgoing.""" | ||||
if not outgoing.missing: | ||||
return None | ||||
Sune Foldager
|
r22390 | bundler = cg1packer(repo, bundlecaps) | ||
Pierre-Yves David
|
r20928 | return getsubset(repo, outgoing, bundler, source) | ||
Gregory Szorc
|
r25400 | def computeoutgoing(repo, heads, common): | ||
Durham Goode
|
r21260 | """Computes which revs are outgoing given a set of common | ||
and a set of heads. | ||||
This is a separate function so extensions can have access to | ||||
the logic. | ||||
Returns a discovery.outgoing object. | ||||
""" | ||||
cl = repo.changelog | ||||
if common: | ||||
hasnode = cl.hasnode | ||||
common = [n for n in common if hasnode(n)] | ||||
else: | ||||
common = [nullid] | ||||
if not heads: | ||||
heads = cl.heads() | ||||
return discovery.outgoing(cl, common, heads) | ||||
Sune Foldager
|
r22390 | def getchangegroup(repo, source, heads=None, common=None, bundlecaps=None): | ||
Pierre-Yves David
|
r20930 | """Like changegroupsubset, but returns the set difference between the | ||
ancestors of heads and the ancestors common. | ||||
If heads is None, use the local heads. If common is None, use [nullid]. | ||||
The nodes in common might not all be known locally due to the way the | ||||
current discovery protocol works. | ||||
""" | ||||
Gregory Szorc
|
r25400 | outgoing = computeoutgoing(repo, heads, common) | ||
Sune Foldager
|
r22390 | return getlocalchangegroup(repo, source, outgoing, bundlecaps=bundlecaps) | ||
Pierre-Yves David
|
r20930 | |||
Pierre-Yves David
|
r20931 | def changegroup(repo, basenodes, source): | ||
# to avoid a race we use changegroupsubset() (issue1320) | ||||
return changegroupsubset(repo, basenodes, repo.heads(), source) | ||||
Pierre-Yves David
|
r20932 | def addchangegroupfiles(repo, source, revmap, trp, pr, needfiles): | ||
revisions = 0 | ||||
files = 0 | ||||
while True: | ||||
chunkdata = source.filelogheader() | ||||
if not chunkdata: | ||||
break | ||||
f = chunkdata["filename"] | ||||
repo.ui.debug("adding %s revisions\n" % f) | ||||
pr() | ||||
fl = repo.file(f) | ||||
o = len(fl) | ||||
Mike Edgar
|
r24120 | try: | ||
if not fl.addgroup(source, revmap, trp): | ||||
raise util.Abort(_("received file revlog group is empty")) | ||||
except error.CensoredBaseError, e: | ||||
raise util.Abort(_("received delta base is censored: %s") % e) | ||||
Pierre-Yves David
|
r20932 | revisions += len(fl) - o | ||
files += 1 | ||||
if f in needfiles: | ||||
needs = needfiles[f] | ||||
for new in xrange(o, len(fl)): | ||||
n = fl.node(new) | ||||
if n in needs: | ||||
needs.remove(n) | ||||
else: | ||||
raise util.Abort( | ||||
_("received spurious file revlog entry")) | ||||
if not needs: | ||||
del needfiles[f] | ||||
repo.ui.progress(_('files'), None) | ||||
for f, needs in needfiles.iteritems(): | ||||
fl = repo.file(f) | ||||
for n in needs: | ||||
try: | ||||
fl.rev(n) | ||||
except error.LookupError: | ||||
raise util.Abort( | ||||
_('missing file data for %s:%s - run hg verify') % | ||||
(f, hex(n))) | ||||
return revisions, files | ||||
Pierre-Yves David
|
r20933 | |||
Pierre-Yves David
|
r22041 | def addchangegroup(repo, source, srctype, url, emptyok=False, | ||
Pierre-Yves David
|
r25517 | targetphase=phases.draft, expectedtotal=None): | ||
Pierre-Yves David
|
r20933 | """Add the changegroup returned by source.read() to this repo. | ||
srctype is a string like 'push', 'pull', or 'unbundle'. url is | ||||
the URL of the repo where this changegroup is coming from. | ||||
Return an integer summarizing the change to this repo: | ||||
- nothing changed or no source: 0 | ||||
- more heads than before: 1+added heads (2..n) | ||||
- fewer heads than before: -1-removed heads (-2..-n) | ||||
- number of heads stays the same: 1 | ||||
""" | ||||
repo = repo.unfiltered() | ||||
def csmap(x): | ||||
repo.ui.debug("add changeset %s\n" % short(x)) | ||||
return len(cl) | ||||
def revmap(x): | ||||
return cl.rev(x) | ||||
if not source: | ||||
return 0 | ||||
changesets = files = revisions = 0 | ||||
efiles = set() | ||||
tr = repo.transaction("\n".join([srctype, util.hidepassword(url)])) | ||||
Pierre-Yves David
|
r22971 | # The transaction could have been created before and already carries source | ||
# information. In this case we use the top level data. We overwrite the | ||||
# argument because we need to use the top level value (if they exist) in | ||||
# this function. | ||||
srctype = tr.hookargs.setdefault('source', srctype) | ||||
url = tr.hookargs.setdefault('url', url) | ||||
Pierre-Yves David
|
r23203 | |||
# write changelog data to temp files so concurrent readers will not see | ||||
# inconsistent view | ||||
cl = repo.changelog | ||||
cl.delayupdate(tr) | ||||
oldheads = cl.heads() | ||||
Pierre-Yves David
|
r20933 | try: | ||
Pierre-Yves David
|
r22971 | repo.hook('prechangegroup', throw=True, **tr.hookargs) | ||
Pierre-Yves David
|
r22969 | |||
Pierre-Yves David
|
r20933 | trp = weakref.proxy(tr) | ||
# pull off the changeset group | ||||
repo.ui.status(_("adding changesets\n")) | ||||
clstart = len(cl) | ||||
class prog(object): | ||||
Martin von Zweigbergk
|
r25574 | def __init__(self, step, total): | ||
self._step = step | ||||
self._total = total | ||||
self._count = 1 | ||||
Martin von Zweigbergk
|
r25573 | def __call__(self): | ||
Martin von Zweigbergk
|
r25574 | repo.ui.progress(self._step, self._count, unit=_('chunks'), | ||
total=self._total) | ||||
self._count += 1 | ||||
source.callback = prog(_('changesets'), expectedtotal) | ||||
Pierre-Yves David
|
r20933 | |||
source.changelogheader() | ||||
srccontent = cl.addgroup(source, csmap, trp) | ||||
if not (srccontent or emptyok): | ||||
raise util.Abort(_("received changelog group is empty")) | ||||
clend = len(cl) | ||||
changesets = clend - clstart | ||||
for c in xrange(clstart, clend): | ||||
efiles.update(repo[c].files()) | ||||
efiles = len(efiles) | ||||
repo.ui.progress(_('changesets'), None) | ||||
# pull off the manifest group | ||||
repo.ui.status(_("adding manifests\n")) | ||||
Martin von Zweigbergk
|
r25574 | # manifests <= changesets | ||
source.callback = prog(_('manifests'), changesets) | ||||
Pierre-Yves David
|
r20933 | # no need to check for empty manifest group here: | ||
# if the result of the merge of 1 and 2 is the same in 3 and 4, | ||||
# no new manifest will be created and the manifest group will | ||||
# be empty during the pull | ||||
source.manifestheader() | ||||
repo.manifest.addgroup(source, revmap, trp) | ||||
repo.ui.progress(_('manifests'), None) | ||||
needfiles = {} | ||||
if repo.ui.configbool('server', 'validate', default=False): | ||||
# validate incoming csets have their manifests | ||||
for cset in xrange(clstart, clend): | ||||
Martin von Zweigbergk
|
r24900 | mfnode = repo.changelog.read(repo.changelog.node(cset))[0] | ||
mfest = repo.manifest.readdelta(mfnode) | ||||
Pierre-Yves David
|
r20933 | # store file nodes we must see | ||
for f, n in mfest.iteritems(): | ||||
needfiles.setdefault(f, set()).add(n) | ||||
# process the files | ||||
repo.ui.status(_("adding file changes\n")) | ||||
source.callback = None | ||||
Martin von Zweigbergk
|
r25574 | pr = prog(_('files'), efiles) | ||
Pierre-Yves David
|
r20933 | newrevs, newfiles = addchangegroupfiles(repo, source, revmap, trp, pr, | ||
needfiles) | ||||
revisions += newrevs | ||||
files += newfiles | ||||
dh = 0 | ||||
if oldheads: | ||||
heads = cl.heads() | ||||
dh = len(heads) - len(oldheads) | ||||
for h in heads: | ||||
if h not in oldheads and repo[h].closesbranch(): | ||||
dh -= 1 | ||||
htext = "" | ||||
if dh: | ||||
htext = _(" (%+d heads)") % dh | ||||
repo.ui.status(_("added %d changesets" | ||||
" with %d changes to %d files%s\n") | ||||
% (changesets, revisions, files, htext)) | ||||
repo.invalidatevolatilesets() | ||||
if changesets > 0: | ||||
Pierre-Yves David
|
r23203 | p = lambda: tr.writepending() and repo.root or "" | ||
Pierre-Yves David
|
r21151 | if 'node' not in tr.hookargs: | ||
tr.hookargs['node'] = hex(cl.node(clstart)) | ||||
Mike Hommey
|
r22960 | hookargs = dict(tr.hookargs) | ||
else: | ||||
hookargs = dict(tr.hookargs) | ||||
hookargs['node'] = hex(cl.node(clstart)) | ||||
Pierre-Yves David
|
r22971 | repo.hook('pretxnchangegroup', throw=True, pending=p, **hookargs) | ||
Pierre-Yves David
|
r20933 | |||
added = [cl.node(r) for r in xrange(clstart, clend)] | ||||
Matt Mackall
|
r25624 | publishing = repo.publishing() | ||
Pierre-Yves David
|
r20966 | if srctype in ('push', 'serve'): | ||
Pierre-Yves David
|
r20933 | # Old servers can not push the boundary themselves. | ||
# New servers won't push the boundary if changeset already | ||||
# exists locally as secret | ||||
# | ||||
# We should not use added here but the list of all change in | ||||
# the bundle | ||||
if publishing: | ||||
Pierre-Yves David
|
r22069 | phases.advanceboundary(repo, tr, phases.public, srccontent) | ||
Pierre-Yves David
|
r20933 | else: | ||
Pierre-Yves David
|
r22041 | # Those changesets have been pushed from the outside, their | ||
# phases are going to be pushed alongside. Therefor | ||||
# `targetphase` is ignored. | ||||
Pierre-Yves David
|
r22069 | phases.advanceboundary(repo, tr, phases.draft, srccontent) | ||
Pierre-Yves David
|
r22070 | phases.retractboundary(repo, tr, phases.draft, added) | ||
Pierre-Yves David
|
r20933 | elif srctype != 'strip': | ||
# publishing only alter behavior during push | ||||
# | ||||
# strip should not touch boundary at all | ||||
Pierre-Yves David
|
r22070 | phases.retractboundary(repo, tr, targetphase, added) | ||
Pierre-Yves David
|
r20933 | |||
if changesets > 0: | ||||
if srctype != 'strip': | ||||
# During strip, branchcache is invalid but coming call to | ||||
# `destroyed` will repair it. | ||||
# In other case we can safely update cache on disk. | ||||
branchmap.updatecache(repo.filtered('served')) | ||||
Mike Hommey
|
r22960 | |||
Pierre-Yves David
|
r20933 | def runhooks(): | ||
# These hooks run when the lock releases, not when the | ||||
# transaction closes. So it's possible for the changelog | ||||
# to have changed since we last saw it. | ||||
if clstart >= len(repo): | ||||
return | ||||
# forcefully update the on-disk branch cache | ||||
repo.ui.debug("updating the branch cache\n") | ||||
Pierre-Yves David
|
r22971 | repo.hook("changegroup", **hookargs) | ||
Pierre-Yves David
|
r20933 | |||
for n in added: | ||||
Pierre-Yves David
|
r22968 | args = hookargs.copy() | ||
args['node'] = hex(n) | ||||
Pierre-Yves David
|
r22971 | repo.hook("incoming", **args) | ||
Pierre-Yves David
|
r20933 | |||
newheads = [h for h in repo.heads() if h not in oldheads] | ||||
repo.ui.log("incoming", | ||||
"%s incoming changes - new heads: %s\n", | ||||
len(added), | ||||
', '.join([hex(c[:6]) for c in newheads])) | ||||
Pierre-Yves David
|
r23221 | |||
tr.addpostclose('changegroup-runhooks-%020i' % clstart, | ||||
Pierre-Yves David
|
r23282 | lambda tr: repo._afterlock(runhooks)) | ||
Pierre-Yves David
|
r23221 | |||
tr.close() | ||||
Pierre-Yves David
|
r20933 | |||
finally: | ||||
tr.release() | ||||
Matt Harbison
|
r24717 | repo.ui.flush() | ||
Pierre-Yves David
|
r20933 | # never return 0 here: | ||
if dh < 0: | ||||
return dh - 1 | ||||
else: | ||||
return dh + 1 | ||||