##// END OF EJS Templates
shrink: delete extension preventing further refactoring (BC)...
shrink: delete extension preventing further refactoring (BC) Reordering can be done by setting bundle.reorder to true and doing a clone --pull.

File last commit:

r19204:e9c5b1c2 default
r19205:94a22595 default
Show More
changegroup.py
417 lines | 13.8 KiB | text/x-python | PythonLexer
Martin Geisler
put license and copyright info into comment blocks
r8226 # changegroup.py - Mercurial changegroup manipulation functions
#
# Copyright 2006 Matt Mackall <mpm@selenic.com>
#
# This software may be used and distributed according to the terms of the
Matt Mackall
Update license to GPLv2+
r10263 # GNU General Public License version 2 or any later version.
Matt Mackall
Replace demandload with new demandimport
r3877
Matt Mackall
Simplify i18n imports
r3891 from i18n import _
Sune Foldager
bundle-ng: move gengroup into bundler, pass repo object to bundler...
r19202 from node import nullrev, hex
Sune Foldager
bundle-ng: move group into the bundler...
r19200 import mdiff, util, dagutil
Simon Heimberg
separate import lines from mercurial and general python modules
r8312 import struct, os, bz2, zlib, tempfile
Thomas Arendsen Hein
make incoming work via ssh (issue139); move chunk code into separate module....
r1981
Benoit Boissinot
bundler: make parsechunk return the base revision of the delta
r14141 _BUNDLE10_DELTA_HEADER = "20s20s20s20s"
Mads Kiilerich
changegroup: verify all stream reads...
r13457 def readexactly(stream, n):
'''read n bytes from stream.read and abort if less was available'''
s = stream.read(n)
if len(s) < n:
raise util.Abort(_("stream ended unexpectedly"
" (got %d bytes, expected %d)")
% (len(s), n))
return s
def getchunk(stream):
"""return the next chunk from stream as a string"""
d = readexactly(stream, 4)
Thomas Arendsen Hein
make incoming work via ssh (issue139); move chunk code into separate module....
r1981 l = struct.unpack(">l", d)[0]
if l <= 4:
Mads Kiilerich
changegroup: don't accept odd chunk headers
r13458 if l:
raise util.Abort(_("invalid chunk length %d") % l)
Thomas Arendsen Hein
make incoming work via ssh (issue139); move chunk code into separate module....
r1981 return ""
Mads Kiilerich
changegroup: verify all stream reads...
r13457 return readexactly(stream, l - 4)
Thomas Arendsen Hein
make incoming work via ssh (issue139); move chunk code into separate module....
r1981
Matt Mackall
changegroup: avoid large copies...
r5368 def chunkheader(length):
Greg Ward
Improve some docstrings relating to changegroups and prepush().
r9437 """return a changegroup chunk header (string)"""
Matt Mackall
changegroup: avoid large copies...
r5368 return struct.pack(">l", length + 4)
Thomas Arendsen Hein
make incoming work via ssh (issue139); move chunk code into separate module....
r1981
def closechunk():
Greg Ward
Improve some docstrings relating to changegroups and prepush().
r9437 """return a changegroup chunk header (string) for a zero-length chunk"""
Thomas Arendsen Hein
make incoming work via ssh (issue139); move chunk code into separate module....
r1981 return struct.pack(">l", 0)
Matt Mackall
move write_bundle to changegroup.py
r3659 class nocompress(object):
def compress(self, x):
return x
def flush(self):
return ""
Matt Mackall
unduplicate bundle writing code from httprepo
r3662 bundletypes = {
Benoit Boissinot
bundle: more comments about the different header types, remove useless if
r14060 "": ("", nocompress), # only when using unbundle on ssh and old http servers
# since the unification ssh accepts a header but there
# is no capability signaling it.
Benoit Boissinot
fix writebundle for bz2 bundles
r3704 "HG10UN": ("HG10UN", nocompress),
Alexis S. L. Carvalho
changegroup.py: delay the loading of the bz2 and zlib modules
r3762 "HG10BZ": ("HG10", lambda: bz2.BZ2Compressor()),
"HG10GZ": ("HG10GZ", lambda: zlib.compressobj()),
Matt Mackall
unduplicate bundle writing code from httprepo
r3662 }
Martin Geisler
typos: "it's" -> "its"
r9087 # hgweb uses this list to communicate its preferred type
Dirkjan Ochtman
hgweb: use bundletypes from mercurial.changegroup
r6152 bundlepriority = ['HG10GZ', 'HG10BZ', 'HG10UN']
Thomas Arendsen Hein
Use 'bundletype' instead of 'type' to not shadow built-in function.
r3706 def writebundle(cg, filename, bundletype):
Matt Mackall
move write_bundle to changegroup.py
r3659 """Write a bundle file and return its filename.
Existing files will not be overwritten.
If no filename is specified, a temporary file is created.
bz2 compression can be turned off.
The bundle file will be deleted in case of errors.
"""
fh = None
cleanup = None
try:
if filename:
fh = open(filename, "wb")
else:
fd, filename = tempfile.mkstemp(prefix="hg-bundle-", suffix=".hg")
fh = os.fdopen(fd, "wb")
cleanup = filename
Thomas Arendsen Hein
Use 'bundletype' instead of 'type' to not shadow built-in function.
r3706 header, compressor = bundletypes[bundletype]
Benoit Boissinot
fix writebundle for bz2 bundles
r3704 fh.write(header)
z = compressor()
Matt Mackall
unduplicate bundle writing code from httprepo
r3662
Matt Mackall
move write_bundle to changegroup.py
r3659 # parse the changegroup data, otherwise we will block
# in case of sshrepo because we don't know the end of the stream
Matt Mackall
bundle: get rid of chunkiter
r12335 # an empty chunkgroup is the end of the changegroup
# a changegroup has at least 2 chunkgroups (changelog and manifest).
# after that, an empty chunkgroup is the end of the changegroup
Matt Mackall
move write_bundle to changegroup.py
r3659 empty = False
Alexis S. L. Carvalho
allow the creation of bundles with empty changelog/manifest chunks
r5906 count = 0
while not empty or count <= 2:
Matt Mackall
move write_bundle to changegroup.py
r3659 empty = True
Alexis S. L. Carvalho
allow the creation of bundles with empty changelog/manifest chunks
r5906 count += 1
Martin Geisler
check-code: flag 0/1 used as constant Boolean expression
r14494 while True:
Matt Mackall
bundle: get rid of chunkiter
r12335 chunk = getchunk(cg)
if not chunk:
break
Matt Mackall
move write_bundle to changegroup.py
r3659 empty = False
Matt Mackall
changegroup: avoid large copies...
r5368 fh.write(z.compress(chunkheader(len(chunk))))
pos = 0
while pos < len(chunk):
next = pos + 2**20
fh.write(z.compress(chunk[pos:next]))
pos = next
Matt Mackall
move write_bundle to changegroup.py
r3659 fh.write(z.compress(closechunk()))
fh.write(z.flush())
cleanup = None
return filename
finally:
if fh is not None:
fh.close()
if cleanup is not None:
os.unlink(cleanup)
Matt Mackall
create a readbundle function
r3660
Matt Mackall
bundle: factor out decompressor
r12041 def decompressor(fh, alg):
if alg == 'UN':
Dirkjan Ochtman
improve changegroup.readbundle(), use it in hgweb
r6154 return fh
Matt Mackall
bundle: factor out decompressor
r12041 elif alg == 'GZ':
Dirkjan Ochtman
improve changegroup.readbundle(), use it in hgweb
r6154 def generator(f):
zd = zlib.decompressobj()
Michael Tjørnemark
changegroup: decompress GZ algorithm in larger chunks for better performance
r16557 for chunk in util.filechunkiter(f):
Dirkjan Ochtman
improve changegroup.readbundle(), use it in hgweb
r6154 yield zd.decompress(chunk)
Matt Mackall
bundle: factor out decompressor
r12041 elif alg == 'BZ':
Matt Mackall
create a readbundle function
r3660 def generator(f):
zd = bz2.BZ2Decompressor()
zd.decompress("BZ")
for chunk in util.filechunkiter(f, 4096):
yield zd.decompress(chunk)
Matt Mackall
bundle: factor out decompressor
r12041 else:
raise util.Abort("unknown bundle compression '%s'" % alg)
Matt Mackall
bundle: push chunkbuffer down into decompress...
r12329 return util.chunkbuffer(generator(fh))
Matt Mackall
bundle: factor out decompressor
r12041
Matt Mackall
bundle: introduce bundle class
r12043 class unbundle10(object):
Benoit Boissinot
bundler: make parsechunk return the base revision of the delta
r14141 deltaheader = _BUNDLE10_DELTA_HEADER
deltaheadersize = struct.calcsize(deltaheader)
Matt Mackall
bundle: introduce bundle class
r12043 def __init__(self, fh, alg):
Matt Mackall
bundle: push chunkbuffer down into decompress...
r12329 self._stream = decompressor(fh, alg)
Matt Mackall
bundlerepo: remove duplication of bundle decompressors
r12044 self._type = alg
Matt Mackall
bundle: refactor progress callback...
r12334 self.callback = None
Matt Mackall
bundlerepo: remove duplication of bundle decompressors
r12044 def compressed(self):
return self._type != 'UN'
Matt Mackall
bundle: introduce bundle class
r12043 def read(self, l):
return self._stream.read(l)
Matt Mackall
bundle: make unbundle object seekable...
r12330 def seek(self, pos):
return self._stream.seek(pos)
def tell(self):
Matt Mackall
bundlerepo: use bundle objects everywhere
r12332 return self._stream.tell()
Matt Mackall
bundlerepo: restore close() method
r12347 def close(self):
return self._stream.close()
Matt Mackall
bundle: refactor progress callback...
r12334
def chunklength(self):
Jim Hague
changegroup: fix typo introduced in 9f2c407caf34
r13459 d = readexactly(self._stream, 4)
Mads Kiilerich
changegroup: don't accept odd chunk headers
r13458 l = struct.unpack(">l", d)[0]
if l <= 4:
if l:
raise util.Abort(_("invalid chunk length %d") % l)
return 0
if self.callback:
Matt Mackall
bundle: refactor progress callback...
r12334 self.callback()
Mads Kiilerich
changegroup: don't accept odd chunk headers
r13458 return l - 4
Matt Mackall
bundle: refactor progress callback...
r12334
Benoit Boissinot
unbundler: separate delta and header parsing...
r14144 def changelogheader(self):
"""v10 does not have a changelog header chunk"""
return {}
def manifestheader(self):
"""v10 does not have a manifest header chunk"""
return {}
def filelogheader(self):
"""return the header of the filelogs chunk, v10 only has the filename"""
Matt Mackall
bundle: refactor progress callback...
r12334 l = self.chunklength()
Benoit Boissinot
unbundler: separate delta and header parsing...
r14144 if not l:
return {}
fname = readexactly(self._stream, l)
return dict(filename=fname)
Matt Mackall
bundle: refactor progress callback...
r12334
Benoit Boissinot
bundler: make parsechunk return the base revision of the delta
r14141 def _deltaheader(self, headertuple, prevnode):
node, p1, p2, cs = headertuple
if prevnode is None:
deltabase = p1
else:
deltabase = prevnode
return node, p1, p2, deltabase, cs
Benoit Boissinot
unbundler: separate delta and header parsing...
r14144 def deltachunk(self, prevnode):
Matt Mackall
bundle: move chunk parsing into unbundle class
r12336 l = self.chunklength()
if not l:
return {}
Benoit Boissinot
bundler: make parsechunk return the base revision of the delta
r14141 headerdata = readexactly(self._stream, self.deltaheadersize)
header = struct.unpack(self.deltaheader, headerdata)
delta = readexactly(self._stream, l - self.deltaheadersize)
node, p1, p2, deltabase, cs = self._deltaheader(header, prevnode)
return dict(node=node, p1=p1, p2=p2, cs=cs,
deltabase=deltabase, delta=delta)
Matt Mackall
bundle: move chunk parsing into unbundle class
r12336
Matt Mackall
bundle: push chunkbuffer down into decompress...
r12329 class headerlessfixup(object):
def __init__(self, fh, h):
self._h = h
self._fh = fh
def read(self, n):
if self._h:
d, self._h = self._h[:n], self._h[n:]
if len(d) < n:
Mads Kiilerich
changegroup: verify all stream reads...
r13457 d += readexactly(self._fh, n - len(d))
Matt Mackall
bundle: push chunkbuffer down into decompress...
r12329 return d
Mads Kiilerich
changegroup: verify all stream reads...
r13457 return readexactly(self._fh, n)
Matt Mackall
bundle: push chunkbuffer down into decompress...
r12329
Dirkjan Ochtman
improve changegroup.readbundle(), use it in hgweb
r6154 def readbundle(fh, fname):
Mads Kiilerich
changegroup: verify all stream reads...
r13457 header = readexactly(fh, 6)
Matt Mackall
bundle: unify/refactor unbundle/readbundle
r12042
if not fname:
fname = "stream"
if not header.startswith('HG') and header.startswith('\0'):
Matt Mackall
bundle: push chunkbuffer down into decompress...
r12329 fh = headerlessfixup(fh, header)
Matt Mackall
bundle: unify/refactor unbundle/readbundle
r12042 header = "HG10UN"
magic, version, alg = header[0:2], header[2:4], header[4:6]
if magic != 'HG':
raise util.Abort(_('%s: not a Mercurial bundle') % fname)
if version != '10':
raise util.Abort(_('%s: unknown bundle version %s') % (fname, version))
Matt Mackall
bundle: introduce bundle class
r12043 return unbundle10(fh, alg)
Matt Mackall
changegroup: introduce bundler objects...
r13831
class bundle10(object):
Benoit Boissinot
changegroup: new bundler API
r14143 deltaheader = _BUNDLE10_DELTA_HEADER
Sune Foldager
bundle-ng: move gengroup into bundler, pass repo object to bundler...
r19202 def __init__(self, repo, bundlecaps=None):
"""Given a source repo, construct a bundler.
bundlecaps is optional and can be used to specify the set of
capabilities which can be used to build the bundle.
"""
Benoit Boissinot
bundle-ng: add bundlecaps argument to getbundle() command
r19201 # Set of capabilities we can use to build the bundle.
if bundlecaps is None:
bundlecaps = set()
self._bundlecaps = bundlecaps
Sune Foldager
bundle-ng: move gengroup into bundler, pass repo object to bundler...
r19202 self._changelog = repo.changelog
self._manifest = repo.manifest
reorder = repo.ui.config('bundle', 'reorder', 'auto')
if reorder == 'auto':
reorder = None
else:
reorder = util.parsebool(reorder)
self._repo = repo
self._reorder = reorder
self.count = [0, 0]
Benoit Boissinot
bundle-ng: move bundler creation up in the stack...
r19199 def start(self, lookup):
Matt Mackall
changegroup: introduce bundler objects...
r13831 self._lookup = lookup
def close(self):
return closechunk()
Sune Foldager
bundle-ng: move group into the bundler...
r19200
Matt Mackall
changegroup: introduce bundler objects...
r13831 def fileheader(self, fname):
return chunkheader(len(fname)) + fname
Sune Foldager
bundle-ng: move group into the bundler...
r19200
def group(self, nodelist, revlog, reorder=None):
"""Calculate a delta group, yielding a sequence of changegroup chunks
(strings).
Given a list of changeset revs, return a set of deltas and
metadata corresponding to nodes. The first delta is
first parent(nodelist[0]) -> nodelist[0], the receiver is
guaranteed to have this parent as it has all history before
these changesets. In the case firstparent is nullrev the
changegroup starts with a full revision.
"""
# if we don't have any revisions touched by these changesets, bail
if len(nodelist) == 0:
yield self.close()
return
# for generaldelta revlogs, we linearize the revs; this will both be
# much quicker and generate a much smaller bundle
if (revlog._generaldelta and reorder is not False) or reorder:
dag = dagutil.revlogdag(revlog)
revs = set(revlog.rev(n) for n in nodelist)
revs = dag.linearize(revs)
else:
revs = sorted([revlog.rev(n) for n in nodelist])
# add the parent of the first rev
p = revlog.parentrevs(revs[0])[0]
revs.insert(0, p)
# build deltas
for r in xrange(len(revs) - 1):
prev, curr = revs[r], revs[r + 1]
for c in self.revchunk(revlog, curr, prev):
yield c
yield self.close()
Benoit Boissinot
bundle-ng: move bundle generation to changegroup.py
r19204 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
Sune Foldager
bundle-ng: move gengroup into bundler, pass repo object to bundler...
r19202 '''yield a sequence of changegroup chunks (strings)'''
repo = self._repo
cl = self._changelog
mf = self._manifest
reorder = self._reorder
progress = repo.ui.progress
count = self.count
_bundling = _('bundling')
Benoit Boissinot
bundle-ng: move bundle generation to changegroup.py
r19204 _changesets = _('changesets')
_manifests = _('manifests')
_files = _('files')
mfs = {} # needed manifests
fnodes = {} # needed file nodes
changedfiles = set()
fstate = ['', {}]
# filter any nodes that claim to be part of the known set
def prune(revlog, missing):
rr, rl = revlog.rev, revlog.linkrev
return [n for n in missing
if rl(rr(n)) not in commonrevs]
def lookup(revlog, x):
if revlog == cl:
c = cl.read(x)
changedfiles.update(c[3])
mfs.setdefault(c[0], x)
count[0] += 1
progress(_bundling, count[0],
unit=_changesets, total=count[1])
return x
elif revlog == mf:
clnode = mfs[x]
if not fastpathlinkrev:
mdata = mf.readfast(x)
for f, n in mdata.iteritems():
if f in changedfiles:
fnodes[f].setdefault(n, clnode)
count[0] += 1
progress(_bundling, count[0],
unit=_manifests, total=count[1])
return clnode
else:
progress(_bundling, count[0], item=fstate[0],
unit=_files, total=count[1])
return fstate[1][x]
self.start(lookup)
def getmfnodes():
for f in changedfiles:
fnodes[f] = {}
count[:] = [0, len(mfs)]
return prune(mf, mfs)
def getfiles():
mfs.clear()
return changedfiles
def getfilenodes(fname, filerevlog):
if fastpathlinkrev:
ln, llr = filerevlog.node, filerevlog.linkrev
def genfilenodes():
for r in filerevlog:
linkrev = llr(r)
if linkrev not in commonrevs:
yield filerevlog.node(r), cl.node(linkrev)
fnodes[fname] = dict(genfilenodes())
fstate[0] = fname
fstate[1] = fnodes.pop(fname, {})
return prune(filerevlog, fstate[1])
Sune Foldager
bundle-ng: move gengroup into bundler, pass repo object to bundler...
r19202
count[:] = [0, len(clnodes)]
for chunk in self.group(clnodes, cl, reorder=reorder):
yield chunk
progress(_bundling, None)
for chunk in self.group(getmfnodes(), mf, reorder=reorder):
yield chunk
progress(_bundling, None)
changedfiles = getfiles()
count[:] = [0, len(changedfiles)]
for fname in sorted(changedfiles):
filerevlog = repo.file(fname)
if not len(filerevlog):
raise util.Abort(_("empty or missing revlog for %s")
% fname)
nodelist = getfilenodes(fname, filerevlog)
if nodelist:
count[0] += 1
yield self.fileheader(fname)
for chunk in self.group(nodelist, filerevlog, reorder):
yield chunk
yield self.close()
progress(_bundling, None)
if clnodes:
repo.hook('outgoing', node=hex(clnodes[0]), source=source)
Sune Foldager
bundle-ng: move group into the bundler...
r19200
Benoit Boissinot
changegroup: new bundler API
r14143 def revchunk(self, revlog, rev, prev):
node = revlog.node(rev)
p1, p2 = revlog.parentrevs(rev)
base = prev
prefix = ''
if base == nullrev:
delta = revlog.revision(node)
prefix = mdiff.trivialdiffheader(len(delta))
else:
delta = revlog.revdiff(base, rev)
Matt Mackall
changegroup: introduce bundler objects...
r13831 linknode = self._lookup(revlog, node)
Benoit Boissinot
changegroup: new bundler API
r14143 p1n, p2n = revlog.parents(node)
basenode = revlog.node(base)
meta = self.builddeltaheader(node, p1n, p2n, basenode, linknode)
meta += prefix
l = len(meta) + len(delta)
Matt Mackall
changegroup: introduce bundler objects...
r13831 yield chunkheader(l)
yield meta
Benoit Boissinot
changegroup: new bundler API
r14143 yield delta
def builddeltaheader(self, node, p1n, p2n, basenode, linknode):
# do nothing with basenode, it is implicitly the previous one in HG10
return struct.pack(self.deltaheader, node, p1n, p2n, linknode)