##// END OF EJS Templates
add support for streaming clone....
Vadim Gelfer -
r2612:ffb895f1 default
parent child Browse files
Show More
@@ -0,0 +1,82 b''
1 # streamclone.py - streaming clone server support for mercurial
2 #
3 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
4 #
5 # This software may be used and distributed according to the terms
6 # of the GNU General Public License, incorporated herein by reference.
7
8 from demandload import demandload
9 from i18n import gettext as _
10 demandload(globals(), "os stat util")
11
12 # if server supports streaming clone, it advertises "stream"
13 # capability with value that is version+flags of repo it is serving.
14 # client only streams if it can read that repo format.
15
16 def walkrepo(root):
17 '''iterate over metadata files in repository.
18 walk in natural (sorted) order.
19 yields 2-tuples: name of .d or .i file, size of file.'''
20
21 strip_count = len(root) + len(os.sep)
22 def walk(path, recurse):
23 ents = os.listdir(path)
24 ents.sort()
25 for e in ents:
26 pe = os.path.join(path, e)
27 st = os.lstat(pe)
28 if stat.S_ISDIR(st.st_mode):
29 if recurse:
30 for x in walk(pe, True):
31 yield x
32 else:
33 if not stat.S_ISREG(st.st_mode) or len(e) < 2:
34 continue
35 sfx = e[-2:]
36 if sfx in ('.d', '.i'):
37 yield pe[strip_count:], st.st_size
38 # write file data first
39 for x in walk(os.path.join(root, 'data'), True):
40 yield x
41 # write manifest before changelog
42 meta = list(walk(root, False))
43 meta.sort(reverse=True)
44 for x in meta:
45 yield x
46
47 # stream file format is simple.
48 #
49 # server writes out line that says how many files, how many total
50 # bytes. separator is ascii space, byte counts are strings.
51 #
52 # then for each file:
53 #
54 # server writes out line that says file name, how many bytes in
55 # file. separator is ascii nul, byte count is string.
56 #
57 # server writes out raw file data.
58
59 def stream_out(repo, fileobj):
60 '''stream out all metadata files in repository.
61 writes to file-like object, must support write() and optional flush().'''
62 # get consistent snapshot of repo. lock during scan so lock not
63 # needed while we stream, and commits can happen.
64 lock = repo.lock()
65 repo.ui.debug('scanning\n')
66 entries = []
67 total_bytes = 0
68 for name, size in walkrepo(repo.path):
69 entries.append((name, size))
70 total_bytes += size
71 lock.release()
72
73 repo.ui.debug('%d files, %d bytes to transfer\n' %
74 (len(entries), total_bytes))
75 fileobj.write('%d %d\n' % (len(entries), total_bytes))
76 for name, size in entries:
77 repo.ui.debug('sending %s (%d bytes)\n' % (name, size))
78 fileobj.write('%s\0%d\n' % (name, size))
79 for chunk in util.filechunkiter(repo.opener(name), limit=size):
80 fileobj.write(chunk)
81 flush = getattr(fileobj, 'flush', None)
82 if flush: flush()
@@ -0,0 +1,25 b''
1 #!/bin/sh
2
3 mkdir test
4 cd test
5 echo foo>foo
6 hg init
7 hg addremove
8 hg commit -m 1
9 hg verify
10 hg serve -p 20059 -d --pid-file=hg.pid
11 cat hg.pid >> $DAEMON_PIDS
12 cd ..
13
14 echo % clone via stream
15 http_proxy= hg clone http://localhost:20059/ copy 2>&1 | \
16 sed -e 's/[0-9][0-9.]*/XXX/g'
17 cd copy
18 hg verify
19
20 cd ..
21
22 echo % clone via pull
23 http_proxy= hg clone --pull http://localhost:20059/ copy-pull
24 cd copy-pull
25 hg verify
@@ -0,0 +1,29 b''
1 (the addremove command is deprecated; use add and remove --after instead)
2 adding foo
3 checking changesets
4 checking manifests
5 crosschecking files in changesets and manifests
6 checking files
7 1 files, 1 changesets, 1 total revisions
8 % clone via stream
9 streaming all changes
10 XXX files to transfer, XXX bytes of data
11 transferred XXX bytes in XXX seconds (XXX KB/sec)
12 XXX files updated, XXX files merged, XXX files removed, XXX files unresolved
13 checking changesets
14 checking manifests
15 crosschecking files in changesets and manifests
16 checking files
17 1 files, 1 changesets, 1 total revisions
18 % clone via pull
19 requesting all changes
20 adding changesets
21 adding manifests
22 adding file changes
23 added 1 changesets with 1 changes to 1 files
24 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
25 checking changesets
26 checking manifests
27 crosschecking files in changesets and manifests
28 checking files
29 1 files, 1 changesets, 1 total revisions
@@ -179,7 +179,7 b' def clone(ui, source, dest=None, pull=Fa'
179 revs = [src_repo.lookup(r) for r in rev]
179 revs = [src_repo.lookup(r) for r in rev]
180
180
181 if dest_repo.local():
181 if dest_repo.local():
182 dest_repo.pull(src_repo, heads=revs)
182 dest_repo.clone(src_repo, heads=revs, pull=pull)
183 elif src_repo.local():
183 elif src_repo.local():
184 src_repo.push(dest_repo, revs=revs)
184 src_repo.push(dest_repo, revs=revs)
185 else:
185 else:
@@ -11,7 +11,8 b' import os.path'
11 import mimetypes
11 import mimetypes
12 from mercurial.demandload import demandload
12 from mercurial.demandload import demandload
13 demandload(globals(), "re zlib ConfigParser mimetools cStringIO sys tempfile")
13 demandload(globals(), "re zlib ConfigParser mimetools cStringIO sys tempfile")
14 demandload(globals(), "mercurial:mdiff,ui,hg,util,archival,templater")
14 demandload(globals(), "mercurial:mdiff,ui,hg,util,archival,streamclone")
15 demandload(globals(), "mercurial:templater")
15 demandload(globals(), "mercurial.hgweb.common:get_mtime,staticfile")
16 demandload(globals(), "mercurial.hgweb.common:get_mtime,staticfile")
16 from mercurial.node import *
17 from mercurial.node import *
17 from mercurial.i18n import gettext as _
18 from mercurial.i18n import gettext as _
@@ -859,7 +860,7 b' class hgweb(object):'
859 or self.t("error", error="%r not found" % fname))
860 or self.t("error", error="%r not found" % fname))
860
861
861 def do_capabilities(self, req):
862 def do_capabilities(self, req):
862 resp = 'unbundle'
863 resp = 'unbundle stream=%d' % (self.repo.revlogversion,)
863 req.httphdr("application/mercurial-0.1", length=len(resp))
864 req.httphdr("application/mercurial-0.1", length=len(resp))
864 req.write(resp)
865 req.write(resp)
865
866
@@ -950,3 +951,7 b' class hgweb(object):'
950 finally:
951 finally:
951 fp.close()
952 fp.close()
952 os.unlink(tempname)
953 os.unlink(tempname)
954
955 def do_stream_out(self, req):
956 req.httphdr("application/mercurial-0.1")
957 streamclone.stream_out(self.repo, req)
@@ -326,6 +326,9 b' class httprepository(remoterepository):'
326 fp.close()
326 fp.close()
327 os.unlink(tempname)
327 os.unlink(tempname)
328
328
329 def stream_out(self):
330 return self.do_cmd('stream_out')
331
329 class httpsrepository(httprepository):
332 class httpsrepository(httprepository):
330 def __init__(self, ui, path):
333 def __init__(self, ui, path):
331 if not has_https:
334 if not has_https:
@@ -8,17 +8,19 b''
8 from node import *
8 from node import *
9 from i18n import gettext as _
9 from i18n import gettext as _
10 from demandload import *
10 from demandload import *
11 import repo
11 demandload(globals(), "appendfile changegroup")
12 demandload(globals(), "appendfile changegroup")
12 demandload(globals(), "changelog dirstate filelog manifest repo context")
13 demandload(globals(), "changelog dirstate filelog manifest context")
13 demandload(globals(), "re lock transaction tempfile stat mdiff errno ui")
14 demandload(globals(), "re lock transaction tempfile stat mdiff errno ui")
14 demandload(globals(), "os revlog util")
15 demandload(globals(), "os revlog time util")
15
16
16 class localrepository(object):
17 class localrepository(repo.repository):
17 capabilities = ()
18 capabilities = ()
18
19
19 def __del__(self):
20 def __del__(self):
20 self.transhandle = None
21 self.transhandle = None
21 def __init__(self, parentui, path=None, create=0):
22 def __init__(self, parentui, path=None, create=0):
23 repo.repository.__init__(self)
22 if not path:
24 if not path:
23 p = os.getcwd()
25 p = os.getcwd()
24 while not os.path.isdir(os.path.join(p, ".hg")):
26 while not os.path.isdir(os.path.join(p, ".hg")):
@@ -1183,7 +1185,7 b' class localrepository(object):'
1183 # unbundle assumes local user cannot lock remote repo (new ssh
1185 # unbundle assumes local user cannot lock remote repo (new ssh
1184 # servers, http servers).
1186 # servers, http servers).
1185
1187
1186 if 'unbundle' in remote.capabilities:
1188 if remote.capable('unbundle'):
1187 return self.push_unbundle(remote, force, revs)
1189 return self.push_unbundle(remote, force, revs)
1188 return self.push_addchangegroup(remote, force, revs)
1190 return self.push_addchangegroup(remote, force, revs)
1189
1191
@@ -2201,6 +2203,47 b' class localrepository(object):'
2201 self.ui.warn(_("%d integrity errors encountered!\n") % errors[0])
2203 self.ui.warn(_("%d integrity errors encountered!\n") % errors[0])
2202 return 1
2204 return 1
2203
2205
2206 def stream_in(self, remote):
2207 self.ui.status(_('streaming all changes\n'))
2208 fp = remote.stream_out()
2209 total_files, total_bytes = map(int, fp.readline().split(' ', 1))
2210 self.ui.status(_('%d files to transfer, %s of data\n') %
2211 (total_files, util.bytecount(total_bytes)))
2212 start = time.time()
2213 for i in xrange(total_files):
2214 name, size = fp.readline().split('\0', 1)
2215 size = int(size)
2216 self.ui.debug('adding %s (%s)\n' % (name, util.bytecount(size)))
2217 ofp = self.opener(name, 'w')
2218 for chunk in util.filechunkiter(fp, limit=size):
2219 ofp.write(chunk)
2220 ofp.close()
2221 elapsed = time.time() - start
2222 self.ui.status(_('transferred %s in %.1f seconds (%s/sec)\n') %
2223 (util.bytecount(total_bytes), elapsed,
2224 util.bytecount(total_bytes / elapsed)))
2225 self.reload()
2226 return len(self.heads()) + 1
2227
2228 def clone(self, remote, heads=[], pull=False):
2229 '''clone remote repository.
2230 if possible, changes are streamed from remote server.
2231
2232 keyword arguments:
2233 heads: list of revs to clone (forces use of pull)
2234 pull: force use of pull, even if remote can stream'''
2235
2236 # now, all clients that can stream can read repo formats
2237 # supported by all servers that can stream.
2238
2239 # if revlog format changes, client will have to check version
2240 # and format flags on "stream" capability, and stream only if
2241 # compatible.
2242
2243 if not pull and not heads and remote.capable('stream'):
2244 return self.stream_in(remote)
2245 return self.pull(remote, heads)
2246
2204 # used to avoid circular references so destructors work
2247 # used to avoid circular references so destructors work
2205 def aftertrans(base):
2248 def aftertrans(base):
2206 p = base
2249 p = base
@@ -5,7 +5,9 b''
5 # This software may be used and distributed according to the terms
5 # This software may be used and distributed according to the terms
6 # of the GNU General Public License, incorporated herein by reference.
6 # of the GNU General Public License, incorporated herein by reference.
7
7
8 class remoterepository(object):
8 import repo
9
10 class remoterepository(repo.repository):
9 def dev(self):
11 def dev(self):
10 return -1
12 return -1
11
13
@@ -5,4 +5,19 b''
5 # This software may be used and distributed according to the terms
5 # This software may be used and distributed according to the terms
6 # of the GNU General Public License, incorporated herein by reference.
6 # of the GNU General Public License, incorporated herein by reference.
7
7
8 class RepoError(Exception): pass
8 class RepoError(Exception):
9 pass
10
11 class repository(object):
12 def capable(self, name):
13 '''tell whether repo supports named capability.
14 return False if not supported.
15 if boolean capability, return True.
16 if string capability, return string.'''
17 name_eq = name + '='
18 for cap in self.capabilities:
19 if name == cap:
20 return True
21 if cap.startswith(name_eq):
22 return cap[len(name_eq):]
23 return False
@@ -198,3 +198,6 b' class sshrepository(remoterepository):'
198 if not r:
198 if not r:
199 return 1
199 return 1
200 return int(r)
200 return int(r)
201
202 def stream_out(self):
203 return self.do_cmd('stream_out')
@@ -8,7 +8,7 b''
8 from demandload import demandload
8 from demandload import demandload
9 from i18n import gettext as _
9 from i18n import gettext as _
10 from node import *
10 from node import *
11 demandload(globals(), "os sys tempfile util")
11 demandload(globals(), "os streamclone sys tempfile util")
12
12
13 class sshserver(object):
13 class sshserver(object):
14 def __init__(self, ui, repo):
14 def __init__(self, ui, repo):
@@ -60,7 +60,7 b' class sshserver(object):'
60 capabilities: space separated list of tokens
60 capabilities: space separated list of tokens
61 '''
61 '''
62
62
63 r = "capabilities: unbundle\n"
63 r = "capabilities: unbundle stream=%d\n" % (self.repo.revlogversion,)
64 self.respond(r)
64 self.respond(r)
65
65
66 def do_lock(self):
66 def do_lock(self):
@@ -167,3 +167,5 b' class sshserver(object):'
167 fp.close()
167 fp.close()
168 os.unlink(tempname)
168 os.unlink(tempname)
169
169
170 def do_stream_out(self):
171 streamclone.stream_out(self.repo, self.fout)
@@ -961,3 +961,24 b' def rcpath():'
961 else:
961 else:
962 _rcpath = os_rcpath()
962 _rcpath = os_rcpath()
963 return _rcpath
963 return _rcpath
964
965 def bytecount(nbytes):
966 '''return byte count formatted as readable string, with units'''
967
968 units = (
969 (100, 1<<30, _('%.0f GB')),
970 (10, 1<<30, _('%.1f GB')),
971 (1, 1<<30, _('%.2f GB')),
972 (100, 1<<20, _('%.0f MB')),
973 (10, 1<<20, _('%.1f MB')),
974 (1, 1<<20, _('%.2f MB')),
975 (100, 1<<10, _('%.0f KB')),
976 (10, 1<<10, _('%.1f KB')),
977 (1, 1<<10, _('%.2f KB')),
978 (1, 1, _('%.0f bytes')),
979 )
980
981 for multiplier, divisor, format in units:
982 if nbytes >= divisor * multiplier:
983 return format % (nbytes / float(divisor))
984 return units[-1][2] % nbytes
@@ -13,17 +13,27 b' echo $! > proxy.pid)'
13 cat proxy.pid >> $DAEMON_PIDS
13 cat proxy.pid >> $DAEMON_PIDS
14 sleep 2
14 sleep 2
15
15
16 echo %% url for proxy
16 echo %% url for proxy, stream
17 http_proxy=http://localhost:20060/ hg --config http_proxy.always=True clone http://localhost:20059/ b
17 http_proxy=http://localhost:20060/ hg --config http_proxy.always=True clone http://localhost:20059/ b | \
18 sed -e 's/[0-9][0-9.]*/XXX/g'
19 cd b
20 hg verify
21 cd ..
22
23 echo %% url for proxy, pull
24 http_proxy=http://localhost:20060/ hg --config http_proxy.always=True clone --pull http://localhost:20059/ b-pull
25 cd b-pull
26 hg verify
27 cd ..
18
28
19 echo %% host:port for proxy
29 echo %% host:port for proxy
20 http_proxy=localhost:20060 hg clone --config http_proxy.always=True http://localhost:20059/ c
30 http_proxy=localhost:20060 hg clone --pull --config http_proxy.always=True http://localhost:20059/ c
21
31
22 echo %% proxy url with user name and password
32 echo %% proxy url with user name and password
23 http_proxy=http://user:passwd@localhost:20060 hg clone --config http_proxy.always=True http://localhost:20059/ d
33 http_proxy=http://user:passwd@localhost:20060 hg clone --pull --config http_proxy.always=True http://localhost:20059/ d
24
34
25 echo %% url with user name and password
35 echo %% url with user name and password
26 http_proxy=http://user:passwd@localhost:20060 hg clone --config http_proxy.always=True http://user:passwd@localhost:20059/ e
36 http_proxy=http://user:passwd@localhost:20060 hg clone --pull --config http_proxy.always=True http://user:passwd@localhost:20059/ e
27
37
28 echo %% bad host:port for proxy
38 echo %% bad host:port for proxy
29 http_proxy=localhost:20061 hg clone --config http_proxy.always=True http://localhost:20059/ f
39 http_proxy=localhost:20061 hg clone --config http_proxy.always=True http://localhost:20059/ f
@@ -1,11 +1,26 b''
1 adding a
1 adding a
2 %% url for proxy
2 %% url for proxy, stream
3 streaming all changes
4 XXX files to transfer, XXX bytes of data
5 transferred XXX bytes in XXX seconds (XXX KB/sec)
6 XXX files updated, XXX files merged, XXX files removed, XXX files unresolved
7 checking changesets
8 checking manifests
9 crosschecking files in changesets and manifests
10 checking files
11 1 files, 1 changesets, 1 total revisions
12 %% url for proxy, pull
3 requesting all changes
13 requesting all changes
4 adding changesets
14 adding changesets
5 adding manifests
15 adding manifests
6 adding file changes
16 adding file changes
7 added 1 changesets with 1 changes to 1 files
17 added 1 changesets with 1 changes to 1 files
8 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
18 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
19 checking changesets
20 checking manifests
21 crosschecking files in changesets and manifests
22 checking files
23 1 files, 1 changesets, 1 total revisions
9 %% host:port for proxy
24 %% host:port for proxy
10 requesting all changes
25 requesting all changes
11 adding changesets
26 adding changesets
@@ -11,7 +11,7 b' hg serve -p 20059 -d --pid-file=hg.pid'
11 cat hg.pid >> $DAEMON_PIDS
11 cat hg.pid >> $DAEMON_PIDS
12 cd ..
12 cd ..
13
13
14 http_proxy= hg clone http://localhost:20059/ copy
14 http_proxy= hg clone --pull http://localhost:20059/ copy
15 cd copy
15 cd copy
16 hg verify
16 hg verify
17 hg co
17 hg co
@@ -30,8 +30,15 b' hg ci -A -m "init" -d "1000000 0" foo'
30
30
31 cd ..
31 cd ..
32
32
33 echo "# clone remote"
33 echo "# clone remote via stream"
34 hg clone -e ./dummyssh ssh://user@dummy/remote local
34 hg clone -e ./dummyssh ssh://user@dummy/remote local-stream 2>&1 | \
35 sed -e 's/[0-9][0-9.]*/XXX/g'
36 cd local-stream
37 hg verify
38 cd ..
39
40 echo "# clone remote via pull"
41 hg clone -e ./dummyssh --pull ssh://user@dummy/remote local
35
42
36 echo "# verify"
43 echo "# verify"
37 cd local
44 cd local
@@ -1,5 +1,15 b''
1 # creating 'remote'
1 # creating 'remote'
2 # clone remote
2 # clone remote via stream
3 streaming all changes
4 XXX files to transfer, XXX bytes of data
5 transferred XXX bytes in XXX seconds (XXX KB/sec)
6 XXX files updated, XXX files merged, XXX files removed, XXX files unresolved
7 checking changesets
8 checking manifests
9 crosschecking files in changesets and manifests
10 checking files
11 1 files, 1 changesets, 1 total revisions
12 # clone remote via pull
3 requesting all changes
13 requesting all changes
4 adding changesets
14 adding changesets
5 adding manifests
15 adding manifests
@@ -70,6 +80,7 b' remote: added 1 changesets with 1 change'
70 Got arguments 1:user@dummy 2:hg -R remote serve --stdio 3: 4: 5:
80 Got arguments 1:user@dummy 2:hg -R remote serve --stdio 3: 4: 5:
71 Got arguments 1:user@dummy 2:hg -R remote serve --stdio 3: 4: 5:
81 Got arguments 1:user@dummy 2:hg -R remote serve --stdio 3: 4: 5:
72 Got arguments 1:user@dummy 2:hg -R remote serve --stdio 3: 4: 5:
82 Got arguments 1:user@dummy 2:hg -R remote serve --stdio 3: 4: 5:
83 Got arguments 1:user@dummy 2:hg -R remote serve --stdio 3: 4: 5:
73 Got arguments 1:user@dummy 2:hg -R local serve --stdio 3: 4: 5:
84 Got arguments 1:user@dummy 2:hg -R local serve --stdio 3: 4: 5:
74 Got arguments 1:user@dummy 2:hg -R remote serve --stdio 3: 4: 5:
85 Got arguments 1:user@dummy 2:hg -R remote serve --stdio 3: 4: 5:
75 Got arguments 1:user@dummy 2:hg -R remote serve --stdio 3: 4: 5:
86 Got arguments 1:user@dummy 2:hg -R remote serve --stdio 3: 4: 5:
General Comments 0
You need to be logged in to leave comments. Login now