# changegroup.py - Mercurial changegroup manipulation functions # # Copyright 2006 Matt Mackall <mpm@selenic.com> # # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. from i18n import _ import util import struct, os, bz2, zlib, tempfile def getchunk(source): """return the next chunk from changegroup 'source' as a string""" d = source.read(4) if not d: return "" l = struct.unpack(">l", d)[0] if l <= 4: return "" d = source.read(l - 4) if len(d) < l - 4: raise util.Abort(_("premature EOF reading chunk" " (got %d bytes, expected %d)") % (len(d), l - 4)) return d def chunkiter(source, progress=None): """iterate through the chunks in source, yielding a sequence of chunks (strings)""" while 1: c = getchunk(source) if not c: break elif progress is not None: progress() yield c def chunkheader(length): """return a changegroup chunk header (string)""" return struct.pack(">l", length + 4) def closechunk(): """return a changegroup chunk header (string) for a zero-length chunk""" return struct.pack(">l", 0) class nocompress(object): def compress(self, x): return x def flush(self): return "" bundletypes = { "": ("", nocompress), "HG10UN": ("HG10UN", nocompress), "HG10BZ": ("HG10", lambda: bz2.BZ2Compressor()), "HG10GZ": ("HG10GZ", lambda: zlib.compressobj()), } def collector(cl, mmfs, files): # Gather information about changeset nodes going out in a bundle. # We want to gather manifests needed and filelogs affected. def collect(node): c = cl.read(node) files.update(c[3]) mmfs.setdefault(c[0], node) return collect # hgweb uses this list to communicate its preferred type bundlepriority = ['HG10GZ', 'HG10BZ', 'HG10UN'] def writebundle(cg, filename, bundletype): """Write a bundle file and return its filename. Existing files will not be overwritten. If no filename is specified, a temporary file is created. bz2 compression can be turned off. The bundle file will be deleted in case of errors. """ fh = None cleanup = None try: if filename: fh = open(filename, "wb") else: fd, filename = tempfile.mkstemp(prefix="hg-bundle-", suffix=".hg") fh = os.fdopen(fd, "wb") cleanup = filename header, compressor = bundletypes[bundletype] fh.write(header) z = compressor() # parse the changegroup data, otherwise we will block # in case of sshrepo because we don't know the end of the stream # an empty chunkiter is the end of the changegroup # a changegroup has at least 2 chunkiters (changelog and manifest). # after that, an empty chunkiter is the end of the changegroup empty = False count = 0 while not empty or count <= 2: empty = True count += 1 for chunk in chunkiter(cg): empty = False fh.write(z.compress(chunkheader(len(chunk)))) pos = 0 while pos < len(chunk): next = pos + 2**20 fh.write(z.compress(chunk[pos:next])) pos = next fh.write(z.compress(closechunk())) fh.write(z.flush()) cleanup = None return filename finally: if fh is not None: fh.close() if cleanup is not None: os.unlink(cleanup) def unbundle(header, fh): if header == 'HG10UN': return fh elif not header.startswith('HG'): # old client with uncompressed bundle def generator(f): yield header for chunk in f: yield chunk elif header == 'HG10GZ': def generator(f): zd = zlib.decompressobj() for chunk in f: yield zd.decompress(chunk) elif header == 'HG10BZ': def generator(f): zd = bz2.BZ2Decompressor() zd.decompress("BZ") for chunk in util.filechunkiter(f, 4096): yield zd.decompress(chunk) return util.chunkbuffer(generator(fh)) def readbundle(fh, fname): header = fh.read(6) if not header.startswith('HG'): raise util.Abort(_('%s: not a Mercurial bundle file') % fname) if not header.startswith('HG10'): raise util.Abort(_('%s: unknown bundle version') % fname) elif header not in bundletypes: raise util.Abort(_('%s: unknown bundle compression type') % fname) return unbundle(header, fh)