changegroup.py
430 lines
| 14.7 KiB
| text/x-python
|
PythonLexer
/ mercurial / changegroup.py
Martin Geisler
|
r8226 | # changegroup.py - Mercurial changegroup manipulation functions | ||
# | ||||
# Copyright 2006 Matt Mackall <mpm@selenic.com> | ||||
# | ||||
# This software may be used and distributed according to the terms of the | ||||
Matt Mackall
|
r10263 | # GNU General Public License version 2 or any later version. | ||
Matt Mackall
|
r3877 | |||
Matt Mackall
|
r3891 | from i18n import _ | ||
Sune Foldager
|
r19202 | from node import nullrev, hex | ||
Sune Foldager
|
r19200 | import mdiff, util, dagutil | ||
Simon Heimberg
|
r8312 | import struct, os, bz2, zlib, tempfile | ||
Thomas Arendsen Hein
|
r1981 | |||
Benoit Boissinot
|
r14141 | _BUNDLE10_DELTA_HEADER = "20s20s20s20s" | ||
Mads Kiilerich
|
r13457 | def readexactly(stream, n): | ||
'''read n bytes from stream.read and abort if less was available''' | ||||
s = stream.read(n) | ||||
if len(s) < n: | ||||
raise util.Abort(_("stream ended unexpectedly" | ||||
" (got %d bytes, expected %d)") | ||||
% (len(s), n)) | ||||
return s | ||||
def getchunk(stream): | ||||
"""return the next chunk from stream as a string""" | ||||
d = readexactly(stream, 4) | ||||
Thomas Arendsen Hein
|
r1981 | l = struct.unpack(">l", d)[0] | ||
if l <= 4: | ||||
Mads Kiilerich
|
r13458 | if l: | ||
raise util.Abort(_("invalid chunk length %d") % l) | ||||
Thomas Arendsen Hein
|
r1981 | return "" | ||
Mads Kiilerich
|
r13457 | return readexactly(stream, l - 4) | ||
Thomas Arendsen Hein
|
r1981 | |||
Matt Mackall
|
r5368 | def chunkheader(length): | ||
Greg Ward
|
r9437 | """return a changegroup chunk header (string)""" | ||
Matt Mackall
|
r5368 | return struct.pack(">l", length + 4) | ||
Thomas Arendsen Hein
|
r1981 | |||
def closechunk(): | ||||
Greg Ward
|
r9437 | """return a changegroup chunk header (string) for a zero-length chunk""" | ||
Thomas Arendsen Hein
|
r1981 | return struct.pack(">l", 0) | ||
Matt Mackall
|
r3659 | class nocompress(object): | ||
def compress(self, x): | ||||
return x | ||||
def flush(self): | ||||
return "" | ||||
Matt Mackall
|
r3662 | bundletypes = { | ||
Benoit Boissinot
|
r14060 | "": ("", nocompress), # only when using unbundle on ssh and old http servers | ||
# since the unification ssh accepts a header but there | ||||
# is no capability signaling it. | ||||
Benoit Boissinot
|
r3704 | "HG10UN": ("HG10UN", nocompress), | ||
Alexis S. L. Carvalho
|
r3762 | "HG10BZ": ("HG10", lambda: bz2.BZ2Compressor()), | ||
"HG10GZ": ("HG10GZ", lambda: zlib.compressobj()), | ||||
Matt Mackall
|
r3662 | } | ||
Martin Geisler
|
r9087 | # hgweb uses this list to communicate its preferred type | ||
Dirkjan Ochtman
|
r6152 | bundlepriority = ['HG10GZ', 'HG10BZ', 'HG10UN'] | ||
Thomas Arendsen Hein
|
r3706 | def writebundle(cg, filename, bundletype): | ||
Matt Mackall
|
r3659 | """Write a bundle file and return its filename. | ||
Existing files will not be overwritten. | ||||
If no filename is specified, a temporary file is created. | ||||
bz2 compression can be turned off. | ||||
The bundle file will be deleted in case of errors. | ||||
""" | ||||
fh = None | ||||
cleanup = None | ||||
try: | ||||
if filename: | ||||
fh = open(filename, "wb") | ||||
else: | ||||
fd, filename = tempfile.mkstemp(prefix="hg-bundle-", suffix=".hg") | ||||
fh = os.fdopen(fd, "wb") | ||||
cleanup = filename | ||||
Thomas Arendsen Hein
|
r3706 | header, compressor = bundletypes[bundletype] | ||
Benoit Boissinot
|
r3704 | fh.write(header) | ||
z = compressor() | ||||
Matt Mackall
|
r3662 | |||
Matt Mackall
|
r3659 | # parse the changegroup data, otherwise we will block | ||
# in case of sshrepo because we don't know the end of the stream | ||||
Matt Mackall
|
r12335 | # an empty chunkgroup is the end of the changegroup | ||
# a changegroup has at least 2 chunkgroups (changelog and manifest). | ||||
# after that, an empty chunkgroup is the end of the changegroup | ||||
Matt Mackall
|
r3659 | empty = False | ||
Alexis S. L. Carvalho
|
r5906 | count = 0 | ||
while not empty or count <= 2: | ||||
Matt Mackall
|
r3659 | empty = True | ||
Alexis S. L. Carvalho
|
r5906 | count += 1 | ||
Martin Geisler
|
r14494 | while True: | ||
Matt Mackall
|
r12335 | chunk = getchunk(cg) | ||
if not chunk: | ||||
break | ||||
Matt Mackall
|
r3659 | empty = False | ||
Matt Mackall
|
r5368 | fh.write(z.compress(chunkheader(len(chunk)))) | ||
pos = 0 | ||||
while pos < len(chunk): | ||||
next = pos + 2**20 | ||||
fh.write(z.compress(chunk[pos:next])) | ||||
pos = next | ||||
Matt Mackall
|
r3659 | fh.write(z.compress(closechunk())) | ||
fh.write(z.flush()) | ||||
cleanup = None | ||||
return filename | ||||
finally: | ||||
if fh is not None: | ||||
fh.close() | ||||
if cleanup is not None: | ||||
os.unlink(cleanup) | ||||
Matt Mackall
|
r3660 | |||
Matt Mackall
|
r12041 | def decompressor(fh, alg): | ||
if alg == 'UN': | ||||
Dirkjan Ochtman
|
r6154 | return fh | ||
Matt Mackall
|
r12041 | elif alg == 'GZ': | ||
Dirkjan Ochtman
|
r6154 | def generator(f): | ||
zd = zlib.decompressobj() | ||||
Michael Tjørnemark
|
r16557 | for chunk in util.filechunkiter(f): | ||
Dirkjan Ochtman
|
r6154 | yield zd.decompress(chunk) | ||
Matt Mackall
|
r12041 | elif alg == 'BZ': | ||
Matt Mackall
|
r3660 | def generator(f): | ||
zd = bz2.BZ2Decompressor() | ||||
zd.decompress("BZ") | ||||
for chunk in util.filechunkiter(f, 4096): | ||||
yield zd.decompress(chunk) | ||||
Matt Mackall
|
r12041 | else: | ||
raise util.Abort("unknown bundle compression '%s'" % alg) | ||||
Matt Mackall
|
r12329 | return util.chunkbuffer(generator(fh)) | ||
Matt Mackall
|
r12041 | |||
Matt Mackall
|
r12043 | class unbundle10(object): | ||
Benoit Boissinot
|
r14141 | deltaheader = _BUNDLE10_DELTA_HEADER | ||
deltaheadersize = struct.calcsize(deltaheader) | ||||
Matt Mackall
|
r12043 | def __init__(self, fh, alg): | ||
Matt Mackall
|
r12329 | self._stream = decompressor(fh, alg) | ||
Matt Mackall
|
r12044 | self._type = alg | ||
Matt Mackall
|
r12334 | self.callback = None | ||
Matt Mackall
|
r12044 | def compressed(self): | ||
return self._type != 'UN' | ||||
Matt Mackall
|
r12043 | def read(self, l): | ||
return self._stream.read(l) | ||||
Matt Mackall
|
r12330 | def seek(self, pos): | ||
return self._stream.seek(pos) | ||||
def tell(self): | ||||
Matt Mackall
|
r12332 | return self._stream.tell() | ||
Matt Mackall
|
r12347 | def close(self): | ||
return self._stream.close() | ||||
Matt Mackall
|
r12334 | |||
def chunklength(self): | ||||
Jim Hague
|
r13459 | d = readexactly(self._stream, 4) | ||
Mads Kiilerich
|
r13458 | l = struct.unpack(">l", d)[0] | ||
if l <= 4: | ||||
if l: | ||||
raise util.Abort(_("invalid chunk length %d") % l) | ||||
return 0 | ||||
if self.callback: | ||||
Matt Mackall
|
r12334 | self.callback() | ||
Mads Kiilerich
|
r13458 | return l - 4 | ||
Matt Mackall
|
r12334 | |||
Benoit Boissinot
|
r14144 | def changelogheader(self): | ||
"""v10 does not have a changelog header chunk""" | ||||
return {} | ||||
def manifestheader(self): | ||||
"""v10 does not have a manifest header chunk""" | ||||
return {} | ||||
def filelogheader(self): | ||||
"""return the header of the filelogs chunk, v10 only has the filename""" | ||||
Matt Mackall
|
r12334 | l = self.chunklength() | ||
Benoit Boissinot
|
r14144 | if not l: | ||
return {} | ||||
fname = readexactly(self._stream, l) | ||||
return dict(filename=fname) | ||||
Matt Mackall
|
r12334 | |||
Benoit Boissinot
|
r14141 | def _deltaheader(self, headertuple, prevnode): | ||
node, p1, p2, cs = headertuple | ||||
if prevnode is None: | ||||
deltabase = p1 | ||||
else: | ||||
deltabase = prevnode | ||||
return node, p1, p2, deltabase, cs | ||||
Benoit Boissinot
|
r14144 | def deltachunk(self, prevnode): | ||
Matt Mackall
|
r12336 | l = self.chunklength() | ||
if not l: | ||||
return {} | ||||
Benoit Boissinot
|
r14141 | headerdata = readexactly(self._stream, self.deltaheadersize) | ||
header = struct.unpack(self.deltaheader, headerdata) | ||||
delta = readexactly(self._stream, l - self.deltaheadersize) | ||||
node, p1, p2, deltabase, cs = self._deltaheader(header, prevnode) | ||||
return dict(node=node, p1=p1, p2=p2, cs=cs, | ||||
deltabase=deltabase, delta=delta) | ||||
Matt Mackall
|
r12336 | |||
Matt Mackall
|
r12329 | class headerlessfixup(object): | ||
def __init__(self, fh, h): | ||||
self._h = h | ||||
self._fh = fh | ||||
def read(self, n): | ||||
if self._h: | ||||
d, self._h = self._h[:n], self._h[n:] | ||||
if len(d) < n: | ||||
Mads Kiilerich
|
r13457 | d += readexactly(self._fh, n - len(d)) | ||
Matt Mackall
|
r12329 | return d | ||
Mads Kiilerich
|
r13457 | return readexactly(self._fh, n) | ||
Matt Mackall
|
r12329 | |||
Dirkjan Ochtman
|
r6154 | def readbundle(fh, fname): | ||
Mads Kiilerich
|
r13457 | header = readexactly(fh, 6) | ||
Matt Mackall
|
r12042 | |||
if not fname: | ||||
fname = "stream" | ||||
if not header.startswith('HG') and header.startswith('\0'): | ||||
Matt Mackall
|
r12329 | fh = headerlessfixup(fh, header) | ||
Matt Mackall
|
r12042 | header = "HG10UN" | ||
magic, version, alg = header[0:2], header[2:4], header[4:6] | ||||
if magic != 'HG': | ||||
raise util.Abort(_('%s: not a Mercurial bundle') % fname) | ||||
if version != '10': | ||||
raise util.Abort(_('%s: unknown bundle version %s') % (fname, version)) | ||||
Matt Mackall
|
r12043 | return unbundle10(fh, alg) | ||
Matt Mackall
|
r13831 | |||
class bundle10(object): | ||||
Benoit Boissinot
|
r14143 | deltaheader = _BUNDLE10_DELTA_HEADER | ||
Sune Foldager
|
r19202 | def __init__(self, repo, bundlecaps=None): | ||
"""Given a source repo, construct a bundler. | ||||
bundlecaps is optional and can be used to specify the set of | ||||
capabilities which can be used to build the bundle. | ||||
""" | ||||
Benoit Boissinot
|
r19201 | # Set of capabilities we can use to build the bundle. | ||
if bundlecaps is None: | ||||
bundlecaps = set() | ||||
self._bundlecaps = bundlecaps | ||||
Sune Foldager
|
r19202 | self._changelog = repo.changelog | ||
self._manifest = repo.manifest | ||||
reorder = repo.ui.config('bundle', 'reorder', 'auto') | ||||
if reorder == 'auto': | ||||
reorder = None | ||||
else: | ||||
reorder = util.parsebool(reorder) | ||||
self._repo = repo | ||||
self._reorder = reorder | ||||
Benoit Boissinot
|
r19208 | self._progress = repo.ui.progress | ||
Matt Mackall
|
r13831 | def close(self): | ||
return closechunk() | ||||
Sune Foldager
|
r19200 | |||
Matt Mackall
|
r13831 | def fileheader(self, fname): | ||
return chunkheader(len(fname)) + fname | ||||
Sune Foldager
|
r19200 | |||
Benoit Boissinot
|
r19208 | def group(self, nodelist, revlog, lookup, units=None, reorder=None): | ||
Sune Foldager
|
r19200 | """Calculate a delta group, yielding a sequence of changegroup chunks | ||
(strings). | ||||
Given a list of changeset revs, return a set of deltas and | ||||
metadata corresponding to nodes. The first delta is | ||||
first parent(nodelist[0]) -> nodelist[0], the receiver is | ||||
guaranteed to have this parent as it has all history before | ||||
these changesets. In the case firstparent is nullrev the | ||||
changegroup starts with a full revision. | ||||
Benoit Boissinot
|
r19208 | |||
If units is not None, progress detail will be generated, units specifies | ||||
the type of revlog that is touched (changelog, manifest, etc.). | ||||
Sune Foldager
|
r19200 | """ | ||
# if we don't have any revisions touched by these changesets, bail | ||||
if len(nodelist) == 0: | ||||
yield self.close() | ||||
return | ||||
# for generaldelta revlogs, we linearize the revs; this will both be | ||||
# much quicker and generate a much smaller bundle | ||||
if (revlog._generaldelta and reorder is not False) or reorder: | ||||
dag = dagutil.revlogdag(revlog) | ||||
revs = set(revlog.rev(n) for n in nodelist) | ||||
revs = dag.linearize(revs) | ||||
else: | ||||
revs = sorted([revlog.rev(n) for n in nodelist]) | ||||
# add the parent of the first rev | ||||
p = revlog.parentrevs(revs[0])[0] | ||||
revs.insert(0, p) | ||||
# build deltas | ||||
Benoit Boissinot
|
r19208 | total = len(revs) - 1 | ||
msgbundling = _('bundling') | ||||
Sune Foldager
|
r19200 | for r in xrange(len(revs) - 1): | ||
Benoit Boissinot
|
r19208 | if units is not None: | ||
self._progress(msgbundling, r + 1, unit=units, total=total) | ||||
Sune Foldager
|
r19200 | prev, curr = revs[r], revs[r + 1] | ||
Benoit Boissinot
|
r19207 | linknode = lookup(revlog.node(curr)) | ||
for c in self.revchunk(revlog, curr, prev, linknode): | ||||
Sune Foldager
|
r19200 | yield c | ||
yield self.close() | ||||
Durham Goode
|
r19289 | # filter any nodes that claim to be part of the known set | ||
def prune(self, revlog, missing, commonrevs, source): | ||||
rr, rl = revlog.rev, revlog.linkrev | ||||
return [n for n in missing if rl(rr(n)) not in commonrevs] | ||||
Benoit Boissinot
|
r19204 | def generate(self, commonrevs, clnodes, fastpathlinkrev, source): | ||
Sune Foldager
|
r19202 | '''yield a sequence of changegroup chunks (strings)''' | ||
repo = self._repo | ||||
cl = self._changelog | ||||
mf = self._manifest | ||||
reorder = self._reorder | ||||
Benoit Boissinot
|
r19208 | progress = self._progress | ||
# for progress output | ||||
msgbundling = _('bundling') | ||||
Benoit Boissinot
|
r19204 | |||
mfs = {} # needed manifests | ||||
fnodes = {} # needed file nodes | ||||
changedfiles = set() | ||||
Benoit Boissinot
|
r19207 | # Callback for the changelog, used to collect changed files and manifest | ||
# nodes. | ||||
# Returns the linkrev node (identity in the changelog case). | ||||
def lookupcl(x): | ||||
c = cl.read(x) | ||||
changedfiles.update(c[3]) | ||||
# record the first changeset introducing this manifest version | ||||
mfs.setdefault(c[0], x) | ||||
return x | ||||
Benoit Boissinot
|
r19204 | |||
Benoit Boissinot
|
r19207 | # Callback for the manifest, used to collect linkrevs for filelog | ||
# revisions. | ||||
# Returns the linkrev node (collected in lookupcl). | ||||
def lookupmf(x): | ||||
clnode = mfs[x] | ||||
if not fastpathlinkrev: | ||||
mdata = mf.readfast(x) | ||||
for f, n in mdata.iteritems(): | ||||
if f in changedfiles: | ||||
# record the first changeset introducing this filelog | ||||
# version | ||||
fnodes[f].setdefault(n, clnode) | ||||
return clnode | ||||
Sune Foldager
|
r19206 | |||
Benoit Boissinot
|
r19208 | for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets'), | ||
reorder=reorder): | ||||
Sune Foldager
|
r19206 | yield chunk | ||
Benoit Boissinot
|
r19208 | progress(msgbundling, None) | ||
Benoit Boissinot
|
r19204 | |||
Sune Foldager
|
r19206 | for f in changedfiles: | ||
fnodes[f] = {} | ||||
Durham Goode
|
r19289 | mfnodes = self.prune(mf, mfs, commonrevs, source) | ||
Benoit Boissinot
|
r19208 | for chunk in self.group(mfnodes, mf, lookupmf, units=_('manifests'), | ||
reorder=reorder): | ||||
Sune Foldager
|
r19206 | yield chunk | ||
Benoit Boissinot
|
r19208 | progress(msgbundling, None) | ||
Sune Foldager
|
r19206 | |||
mfs.clear() | ||||
Durham Goode
|
r19334 | def linknodes(filerevlog, fname): | ||
Benoit Boissinot
|
r19204 | if fastpathlinkrev: | ||
ln, llr = filerevlog.node, filerevlog.linkrev | ||||
Matt Mackall
|
r19325 | needed = set(cl.rev(x) for x in clnodes) | ||
Benoit Boissinot
|
r19204 | def genfilenodes(): | ||
for r in filerevlog: | ||||
linkrev = llr(r) | ||||
Matt Mackall
|
r19325 | if linkrev in needed: | ||
Benoit Boissinot
|
r19204 | yield filerevlog.node(r), cl.node(linkrev) | ||
fnodes[fname] = dict(genfilenodes()) | ||||
Durham Goode
|
r19334 | return fnodes.get(fname, {}) | ||
Benoit Boissinot
|
r19207 | |||
Durham Goode
|
r19334 | for chunk in self.generatefiles(changedfiles, linknodes, commonrevs, | ||
source): | ||||
yield chunk | ||||
yield self.close() | ||||
progress(msgbundling, None) | ||||
if clnodes: | ||||
repo.hook('outgoing', node=hex(clnodes[0]), source=source) | ||||
def generatefiles(self, changedfiles, linknodes, commonrevs, source): | ||||
repo = self._repo | ||||
progress = self._progress | ||||
reorder = self._reorder | ||||
msgbundling = _('bundling') | ||||
total = len(changedfiles) | ||||
# for progress output | ||||
msgfiles = _('files') | ||||
for i, fname in enumerate(sorted(changedfiles)): | ||||
filerevlog = repo.file(fname) | ||||
if not filerevlog: | ||||
raise util.Abort(_("empty or missing revlog for %s") % fname) | ||||
linkrevnodes = linknodes(filerevlog, fname) | ||||
Benoit Boissinot
|
r19207 | # Lookup for filenodes, we collected the linkrev nodes above in the | ||
# fastpath case and with lookupmf in the slowpath case. | ||||
def lookupfilelog(x): | ||||
return linkrevnodes[x] | ||||
Durham Goode
|
r19289 | filenodes = self.prune(filerevlog, linkrevnodes, commonrevs, source) | ||
Sune Foldager
|
r19206 | if filenodes: | ||
Benoit Boissinot
|
r19208 | progress(msgbundling, i + 1, item=fname, unit=msgfiles, | ||
total=total) | ||||
Sune Foldager
|
r19202 | yield self.fileheader(fname) | ||
Benoit Boissinot
|
r19207 | for chunk in self.group(filenodes, filerevlog, lookupfilelog, | ||
Benoit Boissinot
|
r19208 | reorder=reorder): | ||
Sune Foldager
|
r19202 | yield chunk | ||
Sune Foldager
|
r19200 | |||
Benoit Boissinot
|
r19207 | def revchunk(self, revlog, rev, prev, linknode): | ||
Benoit Boissinot
|
r14143 | node = revlog.node(rev) | ||
p1, p2 = revlog.parentrevs(rev) | ||||
base = prev | ||||
prefix = '' | ||||
if base == nullrev: | ||||
delta = revlog.revision(node) | ||||
prefix = mdiff.trivialdiffheader(len(delta)) | ||||
else: | ||||
delta = revlog.revdiff(base, rev) | ||||
p1n, p2n = revlog.parents(node) | ||||
basenode = revlog.node(base) | ||||
meta = self.builddeltaheader(node, p1n, p2n, basenode, linknode) | ||||
meta += prefix | ||||
l = len(meta) + len(delta) | ||||
Matt Mackall
|
r13831 | yield chunkheader(l) | ||
yield meta | ||||
Benoit Boissinot
|
r14143 | yield delta | ||
def builddeltaheader(self, node, p1n, p2n, basenode, linknode): | ||||
# do nothing with basenode, it is implicitly the previous one in HG10 | ||||
return struct.pack(self.deltaheader, node, p1n, p2n, linknode) | ||||