diff --git a/hgext/clonebundles.py b/hgext/clonebundles.py new file mode 100644 --- /dev/null +++ b/hgext/clonebundles.py @@ -0,0 +1,69 @@ +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +"""server side extension to advertise pre-generated bundles to seed clones. + +The extension essentially serves the content of a .hg/clonebundles.manifest +file to clients that request it. + +The clonebundles.manifest file contains a list of URLs and attributes. URLs +hold pre-generated bundles that a client fetches and applies. After applying +the pre-generated bundle, the client will connect back to the original server +and pull data not in the pre-generated bundle. + +Manifest File Format: + +The manifest file contains a newline (\n) delimited list of entries. + +Each line in this file defines an available bundle. Lines have the format: + + <URL> [<key>=<value] + +That is, a URL followed by extra metadata describing it. Metadata keys and +values should be URL encoded. + +This metadata is optional. It is up to server operators to populate this +metadata. + +Keys in UPPERCASE are reserved for use by Mercurial. All non-uppercase keys +can be used by site installations. + +The server operator is responsible for generating the bundle manifest file. + +Metadata Attributes: + +TBD +""" + +from mercurial import ( + extensions, + wireproto, +) + +testedwith = 'internal' + +def capabilities(orig, repo, proto): + caps = orig(repo, proto) + + # Only advertise if a manifest exists. This does add some I/O to requests. + # But this should be cheaper than a wasted network round trip due to + # missing file. + if repo.opener.exists('clonebundles.manifest'): + caps.append('clonebundles') + + return caps + +@wireproto.wireprotocommand('clonebundles', '') +def bundles(repo, proto): + """Server command for returning info for available bundles to seed clones. + + Clients will parse this response and determine what bundle to fetch. + + Other extensions may wrap this command to filter or dynamically emit + data depending on the request. e.g. you could advertise URLs for + the closest data center given the client's IP address. + """ + return repo.opener.tryread('clonebundles.manifest') + +def extsetup(ui): + extensions.wrapfunction(wireproto, '_capabilities', capabilities) diff --git a/mercurial/exchange.py b/mercurial/exchange.py --- a/mercurial/exchange.py +++ b/mercurial/exchange.py @@ -7,12 +7,13 @@ from i18n import _ from node import hex, nullid -import errno, urllib +import errno, urllib, urllib2 import util, scmutil, changegroup, base85, error import discovery, phases, obsolete, bookmarks as bookmod, bundle2, pushkey import lock as lockmod import streamclone import tags +import url as urlmod def readbundle(ui, fh, fname, vfs=None): header = changegroup.readexactly(fh, 4) @@ -973,6 +974,9 @@ def pull(repo, remote, heads=None, force try: pullop.trmanager = transactionmanager(repo, 'pull', remote.url()) streamclone.maybeperformlegacystreamclone(pullop) + # This should ideally be in _pullbundle2(). However, it needs to run + # before discovery to avoid extra work. + _maybeapplyclonebundle(pullop) _pulldiscovery(pullop) if pullop.canusebundle2: _pullbundle2(pullop) @@ -1499,3 +1503,88 @@ def unbundle(repo, cg, heads, source, ur if recordout is not None: recordout(repo.ui.popbuffer()) return r + +def _maybeapplyclonebundle(pullop): + """Apply a clone bundle from a remote, if possible.""" + + repo = pullop.repo + remote = pullop.remote + + if not repo.ui.configbool('experimental', 'clonebundles', False): + return + + if pullop.heads: + return + + if not remote.capable('clonebundles'): + return + + res = remote._call('clonebundles') + entries = parseclonebundlesmanifest(res) + + # TODO filter entries by supported features. + # TODO sort entries by user preferences. + + if not entries: + repo.ui.note(_('no clone bundles available on remote; ' + 'falling back to regular clone\n')) + return + + url = entries[0]['URL'] + repo.ui.status(_('applying clone bundle from %s\n') % url) + if trypullbundlefromurl(repo.ui, repo, url): + repo.ui.status(_('finished applying clone bundle\n')) + # Bundle failed. + # + # We abort by default to avoid the thundering herd of + # clients flooding a server that was expecting expensive + # clone load to be offloaded. + elif repo.ui.configbool('ui', 'clonebundlefallback', False): + repo.ui.warn(_('falling back to normal clone\n')) + else: + raise error.Abort(_('error applying bundle'), + hint=_('consider contacting the server ' + 'operator if this error persists')) + +def parseclonebundlesmanifest(s): + """Parses the raw text of a clone bundles manifest. + + Returns a list of dicts. The dicts have a ``URL`` key corresponding + to the URL and other keys are the attributes for the entry. + """ + m = [] + for line in s.splitlines(): + fields = line.split() + if not fields: + continue + attrs = {'URL': fields[0]} + for rawattr in fields[1:]: + key, value = rawattr.split('=', 1) + attrs[urllib.unquote(key)] = urllib.unquote(value) + + m.append(attrs) + + return m + +def trypullbundlefromurl(ui, repo, url): + """Attempt to apply a bundle from a URL.""" + lock = repo.lock() + try: + tr = repo.transaction('bundleurl') + try: + try: + fh = urlmod.open(ui, url) + cg = readbundle(ui, fh, 'stream') + changegroup.addchangegroup(repo, cg, 'clonebundles', url) + tr.close() + return True + except urllib2.HTTPError as e: + ui.warn(_('HTTP error fetching bundle: %s\n') % str(e)) + except urllib2.URLError as e: + ui.warn(_('error fetching bundle: %s\n') % e.reason) + + return False + finally: + tr.release() + finally: + lock.release() diff --git a/mercurial/help/config.txt b/mercurial/help/config.txt --- a/mercurial/help/config.txt +++ b/mercurial/help/config.txt @@ -1412,6 +1412,21 @@ User interface controls. default ``USER@HOST`` is used instead. (default: False) +``clonebundlefallback`` + Whether failure to apply an advertised "clone bundle" from a server + should result in fallback to a regular clone. + + This is disabled by default because servers advertising "clone + bundles" often do so to reduce server load. If advertised bundles + start mass failing and clients automatically fall back to a regular + clone, this would add significant and unexpected load to the server + since the server is expecting clone operations to be offloaded to + pre-generated bundles. Failing fast (the default behavior) ensures + clients don't overwhelm the server when "clone bundle" application + fails. + + (default: False) + ``commitsubrepos`` Whether to commit modified subrepositories when committing the parent repository. If False and one subrepository has uncommitted diff --git a/tests/test-clonebundles.t b/tests/test-clonebundles.t new file mode 100644 --- /dev/null +++ b/tests/test-clonebundles.t @@ -0,0 +1,143 @@ +Set up a server + + $ hg init server + $ cd server + $ cat >> .hg/hgrc << EOF + > [extensions] + > clonebundles = + > EOF + + $ touch foo + $ hg -q commit -A -m 'add foo' + $ touch bar + $ hg -q commit -A -m 'add bar' + + $ hg serve -d -p $HGPORT --pid-file hg.pid --accesslog access.log + $ cat hg.pid >> $DAEMON_PIDS + $ cd .. + +Feature disabled by default +(client should not request manifest) + + $ hg clone -U http://localhost:$HGPORT feature-disabled + requesting all changes + adding changesets + adding manifests + adding file changes + added 2 changesets with 2 changes to 2 files + + $ cat server/access.log + * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob) + * - - [*] "GET /?cmd=batch HTTP/1.1" 200 - x-hgarg-1:cmds=heads+%3Bknown+nodes%3D (glob) + * - - [*] "GET /?cmd=getbundle HTTP/1.1" 200 - x-hgarg-1:bundlecaps=HG20%2Cbundle2%3DHG20%250Achangegroup%253D01%252C02%250Adigests%253Dmd5%252Csha1%252Csha512%250Aerror%253Dabort%252Cunsupportedcontent%252Cpushraced%252Cpushkey%250Ahgtagsfnodes%250Alistkeys%250Apushkey%250Aremote-changegroup%253Dhttp%252Chttps&cg=1&common=0000000000000000000000000000000000000000&heads=aaff8d2ffbbf07a46dd1f05d8ae7877e3f56e2a2&listkeys=phase%2Cbookmarks (glob) + * - - [*] "GET /?cmd=listkeys HTTP/1.1" 200 - x-hgarg-1:namespace=phases (glob) + + $ cat >> $HGRCPATH << EOF + > [experimental] + > clonebundles = true + > EOF + +Missing manifest should not result in server lookup + + $ hg --verbose clone -U http://localhost:$HGPORT no-manifest + requesting all changes + adding changesets + adding manifests + adding file changes + added 2 changesets with 2 changes to 2 files + + $ tail -4 server/access.log + * - - [*] "GET /?cmd=capabilities HTTP/1.1" 200 - (glob) + * - - [*] "GET /?cmd=batch HTTP/1.1" 200 - x-hgarg-1:cmds=heads+%3Bknown+nodes%3D (glob) + * - - [*] "GET /?cmd=getbundle HTTP/1.1" 200 - x-hgarg-1:bundlecaps=HG20%2Cbundle2%3DHG20%250Achangegroup%253D01%252C02%250Adigests%253Dmd5%252Csha1%252Csha512%250Aerror%253Dabort%252Cunsupportedcontent%252Cpushraced%252Cpushkey%250Ahgtagsfnodes%250Alistkeys%250Apushkey%250Aremote-changegroup%253Dhttp%252Chttps&cg=1&common=0000000000000000000000000000000000000000&heads=aaff8d2ffbbf07a46dd1f05d8ae7877e3f56e2a2&listkeys=phase%2Cbookmarks (glob) + * - - [*] "GET /?cmd=listkeys HTTP/1.1" 200 - x-hgarg-1:namespace=phases (glob) + +Empty manifest file results in retrieval +(the extension only checks if the manifest file exists) + + $ touch server/.hg/clonebundles.manifest + $ hg --verbose clone -U http://localhost:$HGPORT empty-manifest + no clone bundles available on remote; falling back to regular clone + requesting all changes + adding changesets + adding manifests + adding file changes + added 2 changesets with 2 changes to 2 files + +Manifest file with invalid URL aborts + + $ echo 'http://does.not.exist/bundle.hg' > server/.hg/clonebundles.manifest + $ hg clone http://localhost:$HGPORT 404-url + applying clone bundle from http://does.not.exist/bundle.hg + error fetching bundle: [Errno -2] Name or service not known + abort: error applying bundle + (consider contacting the server operator if this error persists) + [255] + +Server is not running aborts + + $ echo "http://localhost:$HGPORT1/bundle.hg" > server/.hg/clonebundles.manifest + $ hg clone http://localhost:$HGPORT server-not-runner + applying clone bundle from http://localhost:$HGPORT1/bundle.hg + error fetching bundle: [Errno 111] Connection refused + abort: error applying bundle + (consider contacting the server operator if this error persists) + [255] + +Server returns 404 + + $ python $TESTDIR/dumbhttp.py -p $HGPORT1 --pid http.pid + $ cat http.pid >> $DAEMON_PIDS + $ hg clone http://localhost:$HGPORT running-404 + applying clone bundle from http://localhost:$HGPORT1/bundle.hg + HTTP error fetching bundle: HTTP Error 404: File not found + abort: error applying bundle + (consider contacting the server operator if this error persists) + [255] + +We can override failure to fall back to regular clone + + $ hg --config ui.clonebundlefallback=true clone -U http://localhost:$HGPORT 404-fallback + applying clone bundle from http://localhost:$HGPORT1/bundle.hg + HTTP error fetching bundle: HTTP Error 404: File not found + falling back to normal clone + requesting all changes + adding changesets + adding manifests + adding file changes + added 2 changesets with 2 changes to 2 files + +Bundle with partial content works + + $ hg -R server bundle --type gzip --base null -r 53245c60e682 partial.hg + 1 changesets found + + $ echo "http://localhost:$HGPORT1/partial.hg" > server/.hg/clonebundles.manifest + $ hg clone -U http://localhost:$HGPORT partial-bundle + applying clone bundle from http://localhost:$HGPORT1/partial.hg + adding changesets + adding manifests + adding file changes + added 1 changesets with 1 changes to 1 files + finished applying clone bundle + searching for changes + adding changesets + adding manifests + adding file changes + added 1 changesets with 1 changes to 1 files + +Bundle with full content works + + $ hg -R server bundle --type gzip --base null -r tip full.hg + 2 changesets found + + $ echo "http://localhost:$HGPORT1/full.hg" > server/.hg/clonebundles.manifest + $ hg clone -U http://localhost:$HGPORT full-bundle + applying clone bundle from http://localhost:$HGPORT1/full.hg + adding changesets + adding manifests + adding file changes + added 2 changesets with 2 changes to 2 files + finished applying clone bundle + searching for changes + no changes found diff --git a/tests/test-help.t b/tests/test-help.t --- a/tests/test-help.t +++ b/tests/test-help.t @@ -249,6 +249,8 @@ Test extension help: bugzilla hooks for integrating with the Bugzilla bug tracker censor erase file content at a given revision churn command to display statistics about repository history + clonebundles server side extension to advertise pre-generated bundles to + seed clones. color colorize output from some commands convert import revisions from foreign VCS repositories into Mercurial @@ -1069,6 +1071,8 @@ Test keyword search help Extensions: + clonebundles server side extension to advertise pre-generated bundles to seed + clones. prefixedname matched against word "clone" relink recreates hardlinks between repository clones