##// END OF EJS Templates
discovery: stop using nodemap for membership testing...
discovery: stop using nodemap for membership testing Nodemap is not aware of filtering so we need to ask the changelog itself if a node is known. This is probably a bit slower but such check does not dominated discovery time. This is necessary if we want to run discovery on filtered repo.

File last commit:

r19708:fd4f612f stable
r20225:d2704c48 default
Show More
changegroup.py
430 lines | 14.7 KiB | text/x-python | PythonLexer
Martin Geisler
put license and copyright info into comment blocks
r8226 # changegroup.py - Mercurial changegroup manipulation functions
#
# Copyright 2006 Matt Mackall <mpm@selenic.com>
#
# This software may be used and distributed according to the terms of the
Matt Mackall
Update license to GPLv2+
r10263 # GNU General Public License version 2 or any later version.
Matt Mackall
Replace demandload with new demandimport
r3877
Matt Mackall
Simplify i18n imports
r3891 from i18n import _
Sune Foldager
bundle-ng: move gengroup into bundler, pass repo object to bundler...
r19202 from node import nullrev, hex
Sune Foldager
bundle-ng: move group into the bundler...
r19200 import mdiff, util, dagutil
Simon Heimberg
separate import lines from mercurial and general python modules
r8312 import struct, os, bz2, zlib, tempfile
Thomas Arendsen Hein
make incoming work via ssh (issue139); move chunk code into separate module....
r1981
Benoit Boissinot
bundler: make parsechunk return the base revision of the delta
r14141 _BUNDLE10_DELTA_HEADER = "20s20s20s20s"
Mads Kiilerich
changegroup: verify all stream reads...
r13457 def readexactly(stream, n):
'''read n bytes from stream.read and abort if less was available'''
s = stream.read(n)
if len(s) < n:
raise util.Abort(_("stream ended unexpectedly"
" (got %d bytes, expected %d)")
% (len(s), n))
return s
def getchunk(stream):
"""return the next chunk from stream as a string"""
d = readexactly(stream, 4)
Thomas Arendsen Hein
make incoming work via ssh (issue139); move chunk code into separate module....
r1981 l = struct.unpack(">l", d)[0]
if l <= 4:
Mads Kiilerich
changegroup: don't accept odd chunk headers
r13458 if l:
raise util.Abort(_("invalid chunk length %d") % l)
Thomas Arendsen Hein
make incoming work via ssh (issue139); move chunk code into separate module....
r1981 return ""
Mads Kiilerich
changegroup: verify all stream reads...
r13457 return readexactly(stream, l - 4)
Thomas Arendsen Hein
make incoming work via ssh (issue139); move chunk code into separate module....
r1981
Matt Mackall
changegroup: avoid large copies...
r5368 def chunkheader(length):
Greg Ward
Improve some docstrings relating to changegroups and prepush().
r9437 """return a changegroup chunk header (string)"""
Matt Mackall
changegroup: avoid large copies...
r5368 return struct.pack(">l", length + 4)
Thomas Arendsen Hein
make incoming work via ssh (issue139); move chunk code into separate module....
r1981
def closechunk():
Greg Ward
Improve some docstrings relating to changegroups and prepush().
r9437 """return a changegroup chunk header (string) for a zero-length chunk"""
Thomas Arendsen Hein
make incoming work via ssh (issue139); move chunk code into separate module....
r1981 return struct.pack(">l", 0)
Matt Mackall
move write_bundle to changegroup.py
r3659 class nocompress(object):
def compress(self, x):
return x
def flush(self):
return ""
Matt Mackall
unduplicate bundle writing code from httprepo
r3662 bundletypes = {
Benoit Boissinot
bundle: more comments about the different header types, remove useless if
r14060 "": ("", nocompress), # only when using unbundle on ssh and old http servers
# since the unification ssh accepts a header but there
# is no capability signaling it.
Benoit Boissinot
fix writebundle for bz2 bundles
r3704 "HG10UN": ("HG10UN", nocompress),
Alexis S. L. Carvalho
changegroup.py: delay the loading of the bz2 and zlib modules
r3762 "HG10BZ": ("HG10", lambda: bz2.BZ2Compressor()),
"HG10GZ": ("HG10GZ", lambda: zlib.compressobj()),
Matt Mackall
unduplicate bundle writing code from httprepo
r3662 }
Martin Geisler
typos: "it's" -> "its"
r9087 # hgweb uses this list to communicate its preferred type
Dirkjan Ochtman
hgweb: use bundletypes from mercurial.changegroup
r6152 bundlepriority = ['HG10GZ', 'HG10BZ', 'HG10UN']
Thomas Arendsen Hein
Use 'bundletype' instead of 'type' to not shadow built-in function.
r3706 def writebundle(cg, filename, bundletype):
Matt Mackall
move write_bundle to changegroup.py
r3659 """Write a bundle file and return its filename.
Existing files will not be overwritten.
If no filename is specified, a temporary file is created.
bz2 compression can be turned off.
The bundle file will be deleted in case of errors.
"""
fh = None
cleanup = None
try:
if filename:
fh = open(filename, "wb")
else:
fd, filename = tempfile.mkstemp(prefix="hg-bundle-", suffix=".hg")
fh = os.fdopen(fd, "wb")
cleanup = filename
Thomas Arendsen Hein
Use 'bundletype' instead of 'type' to not shadow built-in function.
r3706 header, compressor = bundletypes[bundletype]
Benoit Boissinot
fix writebundle for bz2 bundles
r3704 fh.write(header)
z = compressor()
Matt Mackall
unduplicate bundle writing code from httprepo
r3662
Matt Mackall
move write_bundle to changegroup.py
r3659 # parse the changegroup data, otherwise we will block
# in case of sshrepo because we don't know the end of the stream
Matt Mackall
bundle: get rid of chunkiter
r12335 # an empty chunkgroup is the end of the changegroup
# a changegroup has at least 2 chunkgroups (changelog and manifest).
# after that, an empty chunkgroup is the end of the changegroup
Matt Mackall
move write_bundle to changegroup.py
r3659 empty = False
Alexis S. L. Carvalho
allow the creation of bundles with empty changelog/manifest chunks
r5906 count = 0
while not empty or count <= 2:
Matt Mackall
move write_bundle to changegroup.py
r3659 empty = True
Alexis S. L. Carvalho
allow the creation of bundles with empty changelog/manifest chunks
r5906 count += 1
Martin Geisler
check-code: flag 0/1 used as constant Boolean expression
r14494 while True:
Matt Mackall
bundle: get rid of chunkiter
r12335 chunk = getchunk(cg)
if not chunk:
break
Matt Mackall
move write_bundle to changegroup.py
r3659 empty = False
Matt Mackall
changegroup: avoid large copies...
r5368 fh.write(z.compress(chunkheader(len(chunk))))
pos = 0
while pos < len(chunk):
next = pos + 2**20
fh.write(z.compress(chunk[pos:next]))
pos = next
Matt Mackall
move write_bundle to changegroup.py
r3659 fh.write(z.compress(closechunk()))
fh.write(z.flush())
cleanup = None
return filename
finally:
if fh is not None:
fh.close()
if cleanup is not None:
os.unlink(cleanup)
Matt Mackall
create a readbundle function
r3660
Matt Mackall
bundle: factor out decompressor
r12041 def decompressor(fh, alg):
if alg == 'UN':
Dirkjan Ochtman
improve changegroup.readbundle(), use it in hgweb
r6154 return fh
Matt Mackall
bundle: factor out decompressor
r12041 elif alg == 'GZ':
Dirkjan Ochtman
improve changegroup.readbundle(), use it in hgweb
r6154 def generator(f):
zd = zlib.decompressobj()
Michael Tjørnemark
changegroup: decompress GZ algorithm in larger chunks for better performance
r16557 for chunk in util.filechunkiter(f):
Dirkjan Ochtman
improve changegroup.readbundle(), use it in hgweb
r6154 yield zd.decompress(chunk)
Matt Mackall
bundle: factor out decompressor
r12041 elif alg == 'BZ':
Matt Mackall
create a readbundle function
r3660 def generator(f):
zd = bz2.BZ2Decompressor()
zd.decompress("BZ")
for chunk in util.filechunkiter(f, 4096):
yield zd.decompress(chunk)
Matt Mackall
bundle: factor out decompressor
r12041 else:
raise util.Abort("unknown bundle compression '%s'" % alg)
Matt Mackall
bundle: push chunkbuffer down into decompress...
r12329 return util.chunkbuffer(generator(fh))
Matt Mackall
bundle: factor out decompressor
r12041
Matt Mackall
bundle: introduce bundle class
r12043 class unbundle10(object):
Benoit Boissinot
bundler: make parsechunk return the base revision of the delta
r14141 deltaheader = _BUNDLE10_DELTA_HEADER
deltaheadersize = struct.calcsize(deltaheader)
Matt Mackall
bundle: introduce bundle class
r12043 def __init__(self, fh, alg):
Matt Mackall
bundle: push chunkbuffer down into decompress...
r12329 self._stream = decompressor(fh, alg)
Matt Mackall
bundlerepo: remove duplication of bundle decompressors
r12044 self._type = alg
Matt Mackall
bundle: refactor progress callback...
r12334 self.callback = None
Matt Mackall
bundlerepo: remove duplication of bundle decompressors
r12044 def compressed(self):
return self._type != 'UN'
Matt Mackall
bundle: introduce bundle class
r12043 def read(self, l):
return self._stream.read(l)
Matt Mackall
bundle: make unbundle object seekable...
r12330 def seek(self, pos):
return self._stream.seek(pos)
def tell(self):
Matt Mackall
bundlerepo: use bundle objects everywhere
r12332 return self._stream.tell()
Matt Mackall
bundlerepo: restore close() method
r12347 def close(self):
return self._stream.close()
Matt Mackall
bundle: refactor progress callback...
r12334
def chunklength(self):
Jim Hague
changegroup: fix typo introduced in 9f2c407caf34
r13459 d = readexactly(self._stream, 4)
Mads Kiilerich
changegroup: don't accept odd chunk headers
r13458 l = struct.unpack(">l", d)[0]
if l <= 4:
if l:
raise util.Abort(_("invalid chunk length %d") % l)
return 0
if self.callback:
Matt Mackall
bundle: refactor progress callback...
r12334 self.callback()
Mads Kiilerich
changegroup: don't accept odd chunk headers
r13458 return l - 4
Matt Mackall
bundle: refactor progress callback...
r12334
Benoit Boissinot
unbundler: separate delta and header parsing...
r14144 def changelogheader(self):
"""v10 does not have a changelog header chunk"""
return {}
def manifestheader(self):
"""v10 does not have a manifest header chunk"""
return {}
def filelogheader(self):
"""return the header of the filelogs chunk, v10 only has the filename"""
Matt Mackall
bundle: refactor progress callback...
r12334 l = self.chunklength()
Benoit Boissinot
unbundler: separate delta and header parsing...
r14144 if not l:
return {}
fname = readexactly(self._stream, l)
return dict(filename=fname)
Matt Mackall
bundle: refactor progress callback...
r12334
Benoit Boissinot
bundler: make parsechunk return the base revision of the delta
r14141 def _deltaheader(self, headertuple, prevnode):
node, p1, p2, cs = headertuple
if prevnode is None:
deltabase = p1
else:
deltabase = prevnode
return node, p1, p2, deltabase, cs
Benoit Boissinot
unbundler: separate delta and header parsing...
r14144 def deltachunk(self, prevnode):
Matt Mackall
bundle: move chunk parsing into unbundle class
r12336 l = self.chunklength()
if not l:
return {}
Benoit Boissinot
bundler: make parsechunk return the base revision of the delta
r14141 headerdata = readexactly(self._stream, self.deltaheadersize)
header = struct.unpack(self.deltaheader, headerdata)
delta = readexactly(self._stream, l - self.deltaheadersize)
node, p1, p2, deltabase, cs = self._deltaheader(header, prevnode)
return dict(node=node, p1=p1, p2=p2, cs=cs,
deltabase=deltabase, delta=delta)
Matt Mackall
bundle: move chunk parsing into unbundle class
r12336
Matt Mackall
bundle: push chunkbuffer down into decompress...
r12329 class headerlessfixup(object):
def __init__(self, fh, h):
self._h = h
self._fh = fh
def read(self, n):
if self._h:
d, self._h = self._h[:n], self._h[n:]
if len(d) < n:
Mads Kiilerich
changegroup: verify all stream reads...
r13457 d += readexactly(self._fh, n - len(d))
Matt Mackall
bundle: push chunkbuffer down into decompress...
r12329 return d
Mads Kiilerich
changegroup: verify all stream reads...
r13457 return readexactly(self._fh, n)
Matt Mackall
bundle: push chunkbuffer down into decompress...
r12329
Dirkjan Ochtman
improve changegroup.readbundle(), use it in hgweb
r6154 def readbundle(fh, fname):
Mads Kiilerich
changegroup: verify all stream reads...
r13457 header = readexactly(fh, 6)
Matt Mackall
bundle: unify/refactor unbundle/readbundle
r12042
if not fname:
fname = "stream"
if not header.startswith('HG') and header.startswith('\0'):
Matt Mackall
bundle: push chunkbuffer down into decompress...
r12329 fh = headerlessfixup(fh, header)
Matt Mackall
bundle: unify/refactor unbundle/readbundle
r12042 header = "HG10UN"
magic, version, alg = header[0:2], header[2:4], header[4:6]
if magic != 'HG':
raise util.Abort(_('%s: not a Mercurial bundle') % fname)
if version != '10':
raise util.Abort(_('%s: unknown bundle version %s') % (fname, version))
Matt Mackall
bundle: introduce bundle class
r12043 return unbundle10(fh, alg)
Matt Mackall
changegroup: introduce bundler objects...
r13831
class bundle10(object):
Benoit Boissinot
changegroup: new bundler API
r14143 deltaheader = _BUNDLE10_DELTA_HEADER
Sune Foldager
bundle-ng: move gengroup into bundler, pass repo object to bundler...
r19202 def __init__(self, repo, bundlecaps=None):
"""Given a source repo, construct a bundler.
bundlecaps is optional and can be used to specify the set of
capabilities which can be used to build the bundle.
"""
Benoit Boissinot
bundle-ng: add bundlecaps argument to getbundle() command
r19201 # Set of capabilities we can use to build the bundle.
if bundlecaps is None:
bundlecaps = set()
self._bundlecaps = bundlecaps
Sune Foldager
bundle-ng: move gengroup into bundler, pass repo object to bundler...
r19202 self._changelog = repo.changelog
self._manifest = repo.manifest
reorder = repo.ui.config('bundle', 'reorder', 'auto')
if reorder == 'auto':
reorder = None
else:
reorder = util.parsebool(reorder)
self._repo = repo
self._reorder = reorder
Benoit Boissinot
bundle-ng: move progress handling out of the linkrev callback
r19208 self._progress = repo.ui.progress
Matt Mackall
changegroup: introduce bundler objects...
r13831 def close(self):
return closechunk()
Sune Foldager
bundle-ng: move group into the bundler...
r19200
Matt Mackall
changegroup: introduce bundler objects...
r13831 def fileheader(self, fname):
return chunkheader(len(fname)) + fname
Sune Foldager
bundle-ng: move group into the bundler...
r19200
Benoit Boissinot
bundle-ng: move progress handling out of the linkrev callback
r19208 def group(self, nodelist, revlog, lookup, units=None, reorder=None):
Sune Foldager
bundle-ng: move group into the bundler...
r19200 """Calculate a delta group, yielding a sequence of changegroup chunks
(strings).
Given a list of changeset revs, return a set of deltas and
metadata corresponding to nodes. The first delta is
first parent(nodelist[0]) -> nodelist[0], the receiver is
guaranteed to have this parent as it has all history before
these changesets. In the case firstparent is nullrev the
changegroup starts with a full revision.
Benoit Boissinot
bundle-ng: move progress handling out of the linkrev callback
r19208
If units is not None, progress detail will be generated, units specifies
the type of revlog that is touched (changelog, manifest, etc.).
Sune Foldager
bundle-ng: move group into the bundler...
r19200 """
# if we don't have any revisions touched by these changesets, bail
if len(nodelist) == 0:
yield self.close()
return
# for generaldelta revlogs, we linearize the revs; this will both be
# much quicker and generate a much smaller bundle
if (revlog._generaldelta and reorder is not False) or reorder:
dag = dagutil.revlogdag(revlog)
revs = set(revlog.rev(n) for n in nodelist)
revs = dag.linearize(revs)
else:
revs = sorted([revlog.rev(n) for n in nodelist])
# add the parent of the first rev
p = revlog.parentrevs(revs[0])[0]
revs.insert(0, p)
# build deltas
Benoit Boissinot
bundle-ng: move progress handling out of the linkrev callback
r19208 total = len(revs) - 1
msgbundling = _('bundling')
Sune Foldager
bundle-ng: move group into the bundler...
r19200 for r in xrange(len(revs) - 1):
Benoit Boissinot
bundle-ng: move progress handling out of the linkrev callback
r19208 if units is not None:
self._progress(msgbundling, r + 1, unit=units, total=total)
Sune Foldager
bundle-ng: move group into the bundler...
r19200 prev, curr = revs[r], revs[r + 1]
Benoit Boissinot
bundle-ng: simplify lookup and state handling...
r19207 linknode = lookup(revlog.node(curr))
for c in self.revchunk(revlog, curr, prev, linknode):
Sune Foldager
bundle-ng: move group into the bundler...
r19200 yield c
yield self.close()
Durham Goode
bundle: refactor changegroup prune to be its own function...
r19289 # filter any nodes that claim to be part of the known set
def prune(self, revlog, missing, commonrevs, source):
rr, rl = revlog.rev, revlog.linkrev
return [n for n in missing if rl(rr(n)) not in commonrevs]
Benoit Boissinot
bundle-ng: move bundle generation to changegroup.py
r19204 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
Sune Foldager
bundle-ng: move gengroup into bundler, pass repo object to bundler...
r19202 '''yield a sequence of changegroup chunks (strings)'''
repo = self._repo
cl = self._changelog
mf = self._manifest
reorder = self._reorder
Benoit Boissinot
bundle-ng: move progress handling out of the linkrev callback
r19208 progress = self._progress
# for progress output
msgbundling = _('bundling')
Benoit Boissinot
bundle-ng: move bundle generation to changegroup.py
r19204
mfs = {} # needed manifests
fnodes = {} # needed file nodes
changedfiles = set()
Benoit Boissinot
bundle-ng: simplify lookup and state handling...
r19207 # Callback for the changelog, used to collect changed files and manifest
# nodes.
# Returns the linkrev node (identity in the changelog case).
def lookupcl(x):
c = cl.read(x)
changedfiles.update(c[3])
# record the first changeset introducing this manifest version
mfs.setdefault(c[0], x)
return x
Benoit Boissinot
bundle-ng: move bundle generation to changegroup.py
r19204
Benoit Boissinot
bundle-ng: simplify lookup and state handling...
r19207 # Callback for the manifest, used to collect linkrevs for filelog
# revisions.
# Returns the linkrev node (collected in lookupcl).
def lookupmf(x):
clnode = mfs[x]
if not fastpathlinkrev:
mdata = mf.readfast(x)
for f, n in mdata.iteritems():
if f in changedfiles:
# record the first changeset introducing this filelog
# version
fnodes[f].setdefault(n, clnode)
return clnode
Sune Foldager
bundle-ng: simplify bundle10.generate...
r19206
Benoit Boissinot
bundle-ng: move progress handling out of the linkrev callback
r19208 for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets'),
reorder=reorder):
Sune Foldager
bundle-ng: simplify bundle10.generate...
r19206 yield chunk
Benoit Boissinot
bundle-ng: move progress handling out of the linkrev callback
r19208 progress(msgbundling, None)
Benoit Boissinot
bundle-ng: move bundle generation to changegroup.py
r19204
Sune Foldager
bundle-ng: simplify bundle10.generate...
r19206 for f in changedfiles:
fnodes[f] = {}
Durham Goode
bundle: refactor changegroup prune to be its own function...
r19289 mfnodes = self.prune(mf, mfs, commonrevs, source)
Benoit Boissinot
bundle-ng: move progress handling out of the linkrev callback
r19208 for chunk in self.group(mfnodes, mf, lookupmf, units=_('manifests'),
reorder=reorder):
Sune Foldager
bundle-ng: simplify bundle10.generate...
r19206 yield chunk
Benoit Boissinot
bundle-ng: move progress handling out of the linkrev callback
r19208 progress(msgbundling, None)
Sune Foldager
bundle-ng: simplify bundle10.generate...
r19206
mfs.clear()
Antoine Pitrou
bundle: fix performance regression when bundling file changes (issue4031)...
r19708 needed = set(cl.rev(x) for x in clnodes)
Sune Foldager
bundle-ng: simplify bundle10.generate...
r19206
Durham Goode
bundle: move file chunk generation to it's own function...
r19334 def linknodes(filerevlog, fname):
Benoit Boissinot
bundle-ng: move bundle generation to changegroup.py
r19204 if fastpathlinkrev:
ln, llr = filerevlog.node, filerevlog.linkrev
def genfilenodes():
for r in filerevlog:
linkrev = llr(r)
Matt Mackall
changegroup: fix fastpath during commit...
r19325 if linkrev in needed:
Benoit Boissinot
bundle-ng: move bundle generation to changegroup.py
r19204 yield filerevlog.node(r), cl.node(linkrev)
fnodes[fname] = dict(genfilenodes())
Durham Goode
bundle: move file chunk generation to it's own function...
r19334 return fnodes.get(fname, {})
Benoit Boissinot
bundle-ng: simplify lookup and state handling...
r19207
Durham Goode
bundle: move file chunk generation to it's own function...
r19334 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
source):
yield chunk
yield self.close()
progress(msgbundling, None)
if clnodes:
repo.hook('outgoing', node=hex(clnodes[0]), source=source)
def generatefiles(self, changedfiles, linknodes, commonrevs, source):
repo = self._repo
progress = self._progress
reorder = self._reorder
msgbundling = _('bundling')
total = len(changedfiles)
# for progress output
msgfiles = _('files')
for i, fname in enumerate(sorted(changedfiles)):
filerevlog = repo.file(fname)
if not filerevlog:
raise util.Abort(_("empty or missing revlog for %s") % fname)
linkrevnodes = linknodes(filerevlog, fname)
Benoit Boissinot
bundle-ng: simplify lookup and state handling...
r19207 # Lookup for filenodes, we collected the linkrev nodes above in the
# fastpath case and with lookupmf in the slowpath case.
def lookupfilelog(x):
return linkrevnodes[x]
Durham Goode
bundle: refactor changegroup prune to be its own function...
r19289 filenodes = self.prune(filerevlog, linkrevnodes, commonrevs, source)
Sune Foldager
bundle-ng: simplify bundle10.generate...
r19206 if filenodes:
Benoit Boissinot
bundle-ng: move progress handling out of the linkrev callback
r19208 progress(msgbundling, i + 1, item=fname, unit=msgfiles,
total=total)
Sune Foldager
bundle-ng: move gengroup into bundler, pass repo object to bundler...
r19202 yield self.fileheader(fname)
Benoit Boissinot
bundle-ng: simplify lookup and state handling...
r19207 for chunk in self.group(filenodes, filerevlog, lookupfilelog,
Benoit Boissinot
bundle-ng: move progress handling out of the linkrev callback
r19208 reorder=reorder):
Sune Foldager
bundle-ng: move gengroup into bundler, pass repo object to bundler...
r19202 yield chunk
Sune Foldager
bundle-ng: move group into the bundler...
r19200
Benoit Boissinot
bundle-ng: simplify lookup and state handling...
r19207 def revchunk(self, revlog, rev, prev, linknode):
Benoit Boissinot
changegroup: new bundler API
r14143 node = revlog.node(rev)
p1, p2 = revlog.parentrevs(rev)
base = prev
prefix = ''
if base == nullrev:
delta = revlog.revision(node)
prefix = mdiff.trivialdiffheader(len(delta))
else:
delta = revlog.revdiff(base, rev)
p1n, p2n = revlog.parents(node)
basenode = revlog.node(base)
meta = self.builddeltaheader(node, p1n, p2n, basenode, linknode)
meta += prefix
l = len(meta) + len(delta)
Matt Mackall
changegroup: introduce bundler objects...
r13831 yield chunkheader(l)
yield meta
Benoit Boissinot
changegroup: new bundler API
r14143 yield delta
def builddeltaheader(self, node, p1n, p2n, basenode, linknode):
# do nothing with basenode, it is implicitly the previous one in HG10
return struct.pack(self.deltaheader, node, p1n, p2n, linknode)