##// END OF EJS Templates
add support for streaming clone....
add support for streaming clone. existing clone code uses pull to get changes from remote repo. is very slow, uses lots of memory and cpu. new clone code has server write file data straight to client, client writes file data straight to disk. memory and cpu used are very low, clone is much faster over lan. new client can still clone with pull, can still clone from older servers. new server can still serve older clients.

File last commit:

r2612:ffb895f1 default
r2612:ffb895f1 default
Show More
streamclone.py
82 lines | 2.8 KiB | text/x-python | PythonLexer
# streamclone.py - streaming clone server support for mercurial
#
# Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
#
# This software may be used and distributed according to the terms
# of the GNU General Public License, incorporated herein by reference.
from demandload import demandload
from i18n import gettext as _
demandload(globals(), "os stat util")
# if server supports streaming clone, it advertises "stream"
# capability with value that is version+flags of repo it is serving.
# client only streams if it can read that repo format.
def walkrepo(root):
'''iterate over metadata files in repository.
walk in natural (sorted) order.
yields 2-tuples: name of .d or .i file, size of file.'''
strip_count = len(root) + len(os.sep)
def walk(path, recurse):
ents = os.listdir(path)
ents.sort()
for e in ents:
pe = os.path.join(path, e)
st = os.lstat(pe)
if stat.S_ISDIR(st.st_mode):
if recurse:
for x in walk(pe, True):
yield x
else:
if not stat.S_ISREG(st.st_mode) or len(e) < 2:
continue
sfx = e[-2:]
if sfx in ('.d', '.i'):
yield pe[strip_count:], st.st_size
# write file data first
for x in walk(os.path.join(root, 'data'), True):
yield x
# write manifest before changelog
meta = list(walk(root, False))
meta.sort(reverse=True)
for x in meta:
yield x
# stream file format is simple.
#
# server writes out line that says how many files, how many total
# bytes. separator is ascii space, byte counts are strings.
#
# then for each file:
#
# server writes out line that says file name, how many bytes in
# file. separator is ascii nul, byte count is string.
#
# server writes out raw file data.
def stream_out(repo, fileobj):
'''stream out all metadata files in repository.
writes to file-like object, must support write() and optional flush().'''
# get consistent snapshot of repo. lock during scan so lock not
# needed while we stream, and commits can happen.
lock = repo.lock()
repo.ui.debug('scanning\n')
entries = []
total_bytes = 0
for name, size in walkrepo(repo.path):
entries.append((name, size))
total_bytes += size
lock.release()
repo.ui.debug('%d files, %d bytes to transfer\n' %
(len(entries), total_bytes))
fileobj.write('%d %d\n' % (len(entries), total_bytes))
for name, size in entries:
repo.ui.debug('sending %s (%d bytes)\n' % (name, size))
fileobj.write('%s\0%d\n' % (name, size))
for chunk in util.filechunkiter(repo.opener(name), limit=size):
fileobj.write(chunk)
flush = getattr(fileobj, 'flush', None)
if flush: flush()