# HG changeset patch # User Mathias De Mare # Date 2023-03-08 13:23:43 # Node ID 60f9602b413e66df0c133b5225038efcb74903e0 # Parent c814101560d91a9425137d9469a84f97d4d39255 clonebundles: add support for inline (streaming) clonebundles The idea behind inline clonebundles is to send them through the ssh or https connection to the Mercurial server. We've been using this specifically for streaming clonebundles, although it works for 'regular' clonebundles as well (but is less relevant, since pullbundles exist). We've had this enabled for around 9 months for a part of our users. A few benefits are: - no need to secure an external system, since everything goes through the same Mercurial server - easier scaling (in our case: no risk of inconsistencies between multiple mercurial-server mirrors and nginx clonebundles hosts) Remaining topics/questions right now: - The inline clonebundles don't work for https yet. This is because httppeer doesn't seem to support sending client capabilities. I didn't focus on that as my main goal was to get this working for ssh. diff --git a/hgext/clonebundles.py b/hgext/clonebundles.py --- a/hgext/clonebundles.py +++ b/hgext/clonebundles.py @@ -202,6 +202,18 @@ instructions when a failure occurs, thus Mercurial server when the bundle hosting service fails. +inline clonebundles +------------------- + +It is possible to transmit clonebundles inline in case repositories are +accessed over SSH. This avoids having to setup an external HTTPS server +and results in the same access control as already present for the SSH setup. + +Inline clonebundles should be placed into the `.hg/bundle-cache` directory. +A clonebundle at `.hg/bundle-cache/mybundle.bundle` is referred to +in the `clonebundles.manifest` file as `peer-bundle-cache://mybundle.bundle`. + + auto-generation of clone bundles -------------------------------- diff --git a/mercurial/bundlecaches.py b/mercurial/bundlecaches.py --- a/mercurial/bundlecaches.py +++ b/mercurial/bundlecaches.py @@ -23,7 +23,9 @@ from .utils import stringutil urlreq = util.urlreq +BUNDLE_CACHE_DIR = b'bundle-cache' CB_MANIFEST_FILE = b'clonebundles.manifest' +CLONEBUNDLESCHEME = b"peer-bundle-cache://" def get_manifest(repo): diff --git a/mercurial/exchange.py b/mercurial/exchange.py --- a/mercurial/exchange.py +++ b/mercurial/exchange.py @@ -2834,7 +2834,7 @@ def _maybeapplyclonebundle(pullop): url = entries[0][b'URL'] repo.ui.status(_(b'applying clone bundle from %s\n') % url) - if trypullbundlefromurl(repo.ui, repo, url): + if trypullbundlefromurl(repo.ui, repo, url, remote): repo.ui.status(_(b'finished applying clone bundle\n')) # Bundle failed. # @@ -2855,11 +2855,22 @@ def _maybeapplyclonebundle(pullop): ) -def trypullbundlefromurl(ui, repo, url): +def inline_clone_bundle_open(ui, url, peer): + if not peer: + raise error.Abort(_(b'no remote repository supplied for %s' % url)) + clonebundleid = url[len(bundlecaches.CLONEBUNDLESCHEME) :] + peerclonebundle = peer.get_inline_clone_bundle(clonebundleid) + return util.chunkbuffer(peerclonebundle) + + +def trypullbundlefromurl(ui, repo, url, peer): """Attempt to apply a bundle from a URL.""" with repo.lock(), repo.transaction(b'bundleurl') as tr: try: - fh = urlmod.open(ui, url) + if url.startswith(bundlecaches.CLONEBUNDLESCHEME): + fh = inline_clone_bundle_open(ui, url, peer) + else: + fh = urlmod.open(ui, url) cg = readbundle(ui, fh, b'stream') if isinstance(cg, streamclone.streamcloneapplier): diff --git a/mercurial/helptext/config.txt b/mercurial/helptext/config.txt --- a/mercurial/helptext/config.txt +++ b/mercurial/helptext/config.txt @@ -1318,6 +1318,12 @@ be ``$HG_HOOKTYPE=incoming`` and ``$HG_H changeset to tag is in ``$HG_NODE``. The name of tag is in ``$HG_TAG``. The tag is local if ``$HG_LOCAL=1``, or in the repository if ``$HG_LOCAL=0``. +``pretransmit-inline-clone-bundle`` + Run before transferring an inline clonebundle to the peer. + If the exit status is 0, the inline clonebundle will be allowed to be + transferred. A non-zero status will cause the transfer to fail. + The path of the inline clonebundle is in ``$HG_CLONEBUNDLEPATH``. + ``pretxnopen`` Run before any new repository transaction is open. The reason for the transaction will be in ``$HG_TXNNAME``, and a unique identifier for the diff --git a/mercurial/httppeer.py b/mercurial/httppeer.py --- a/mercurial/httppeer.py +++ b/mercurial/httppeer.py @@ -441,6 +441,13 @@ class httppeer(wireprotov1peer.wirepeer) def capabilities(self): return self._caps + def _finish_inline_clone_bundle(self, stream): + # HTTP streams must hit the end to process the last empty + # chunk of Chunked-Encoding so the connection can be reused. + chunk = stream.read(1) + if chunk: + self._abort(error.ResponseError(_(b"unexpected response:"), chunk)) + # End of ipeercommands interface. def _callstream(self, cmd, _compressible=False, **args): diff --git a/mercurial/interfaces/repository.py b/mercurial/interfaces/repository.py --- a/mercurial/interfaces/repository.py +++ b/mercurial/interfaces/repository.py @@ -176,6 +176,12 @@ class ipeercommands(interfaceutil.Interf Returns a set of string capabilities. """ + def get_inline_clone_bundle(path): + """Retrieve clonebundle across the wire. + + Returns a chunkbuffer + """ + def clonebundles(): """Obtains the clone bundles manifest for the repo. diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py --- a/mercurial/localrepo.py +++ b/mercurial/localrepo.py @@ -348,6 +348,10 @@ class localpeer(repository.peer): def capabilities(self): return self._caps + def get_inline_clone_bundle(self, path): + # not needed with local peer + raise NotImplementedError + def clonebundles(self): return bundlecaches.get_manifest(self._repo) diff --git a/mercurial/sshpeer.py b/mercurial/sshpeer.py --- a/mercurial/sshpeer.py +++ b/mercurial/sshpeer.py @@ -213,7 +213,7 @@ def _clientcapabilities(): Returns a list of capabilities that are supported by this client. """ - protoparams = {b'partial-pull'} + protoparams = {b'partial-pull', b'inlineclonebundles'} comps = [ e.wireprotosupport().name for e in util.compengines.supportedwireengines(util.CLIENTROLE) diff --git a/mercurial/streamclone.py b/mercurial/streamclone.py --- a/mercurial/streamclone.py +++ b/mercurial/streamclone.py @@ -428,7 +428,16 @@ def consumev1(repo, fp, filecount, bytec with repo.svfs.backgroundclosing(repo.ui, expectedcount=filecount): for i in range(filecount): # XXX doesn't support '\n' or '\r' in filenames - l = fp.readline() + if util.safehasattr(fp, 'readline'): + l = fp.readline() + else: + # inline clonebundles use a chunkbuffer, so no readline + # --> this should be small anyway, the first line + # only contains the size of the bundle + l_buf = [] + while not (l_buf and l_buf[-1] == b'\n'): + l_buf.append(fp.read(1)) + l = b''.join(l_buf) try: name, size = l.split(b'\0', 1) size = int(size) diff --git a/mercurial/wireprotov1peer.py b/mercurial/wireprotov1peer.py --- a/mercurial/wireprotov1peer.py +++ b/mercurial/wireprotov1peer.py @@ -341,6 +341,19 @@ class wirepeer(repository.peer): self.requirecap(b'clonebundles', _(b'clone bundles')) return self._call(b'clonebundles') + def _finish_inline_clone_bundle(self, stream): + pass # allow override for httppeer + + def get_inline_clone_bundle(self, path): + stream = self._callstream(b"get_inline_clone_bundle", path=path) + length = util.uvarintdecodestream(stream) + + # SSH streams will block if reading more than length + for chunk in util.filechunkiter(stream, limit=length): + yield chunk + + self._finish_inline_clone_bundle(stream) + @batchable def lookup(self, key): self.requirecap(b'lookup', _(b'look up remote revision')) diff --git a/mercurial/wireprotov1server.py b/mercurial/wireprotov1server.py --- a/mercurial/wireprotov1server.py +++ b/mercurial/wireprotov1server.py @@ -21,6 +21,7 @@ from . import ( encoding, error, exchange, + hook, pushkey as pushkeymod, pycompat, repoview, @@ -264,6 +265,40 @@ def branches(repo, proto, nodes): return wireprototypes.bytesresponse(b''.join(r)) +@wireprotocommand(b'get_inline_clone_bundle', b'path', permission=b'pull') +def get_inline_clone_bundle(repo, proto, path): + """ + Server command to send a clonebundle to the client + """ + if hook.hashook(repo.ui, b'pretransmit-inline-clone-bundle'): + hook.hook( + repo.ui, + repo, + b'pretransmit-inline-clone-bundle', + throw=True, + clonebundlepath=path, + ) + + bundle_dir = repo.vfs.join(bundlecaches.BUNDLE_CACHE_DIR) + clonebundlepath = repo.vfs.join(bundle_dir, path) + if not repo.vfs.exists(clonebundlepath): + raise error.Abort(b'clonebundle %s does not exist' % path) + + clonebundles_dir = os.path.realpath(bundle_dir) + if not os.path.realpath(clonebundlepath).startswith(clonebundles_dir): + raise error.Abort(b'clonebundle %s is using an illegal path' % path) + + def generator(vfs, bundle_path): + with vfs(bundle_path) as f: + length = os.fstat(f.fileno())[6] + yield util.uvarintencode(length) + for chunk in util.filechunkiter(f): + yield chunk + + stream = generator(repo.vfs, clonebundlepath) + return wireprototypes.streamres(gen=stream, prefer_uncompressed=True) + + @wireprotocommand(b'clonebundles', b'', permission=b'pull') def clonebundles(repo, proto): """Server command for returning info for available bundles to seed clones. @@ -273,9 +308,21 @@ def clonebundles(repo, proto): Extensions may wrap this command to filter or dynamically emit data depending on the request. e.g. you could advertise URLs for the closest data center given the client's IP address. + + The only filter on the server side is filtering out inline clonebundles + in case a client does not support them. + Otherwise, older clients would retrieve and error out on those. """ - manifest = bundlecaches.get_manifest(repo) - return wireprototypes.bytesresponse(manifest) + manifest_contents = bundlecaches.get_manifest(repo) + clientcapabilities = proto.getprotocaps() + if b'inlineclonebundles' in clientcapabilities: + return wireprototypes.bytesresponse(manifest_contents) + modified_manifest = [] + for line in manifest_contents.splitlines(): + if line.startswith(bundlecaches.CLONEBUNDLESCHEME): + continue + modified_manifest.append(line) + return wireprototypes.bytesresponse(b'\n'.join(modified_manifest)) wireprotocaps = [ diff --git a/tests/test-clonebundles.t b/tests/test-clonebundles.t --- a/tests/test-clonebundles.t +++ b/tests/test-clonebundles.t @@ -219,6 +219,59 @@ Feature works over SSH no changes found 2 local changesets published +Feature works over SSH with inline bundle + $ mkdir server/.hg/bundle-cache/ + $ cp full.hg server/.hg/bundle-cache/ + $ echo "peer-bundle-cache://full.hg" > server/.hg/clonebundles.manifest + $ hg clone -U ssh://user@dummy/server ssh-inline-clone + applying clone bundle from peer-bundle-cache://full.hg + adding changesets + adding manifests + adding file changes + added 2 changesets with 2 changes to 2 files + finished applying clone bundle + searching for changes + no changes found + 2 local changesets published + +Hooks work with inline bundle + $ cp server/.hg/hgrc server/.hg/hgrc-beforeinlinehooks + $ echo "[hooks]" >> server/.hg/hgrc + $ echo "pretransmit-inline-clone-bundle=echo foo" >> server/.hg/hgrc + $ hg clone -U ssh://user@dummy/server ssh-inline-clone-hook + applying clone bundle from peer-bundle-cache://full.hg + remote: foo + adding changesets + adding manifests + adding file changes + added 2 changesets with 2 changes to 2 files + finished applying clone bundle + searching for changes + no changes found + 2 local changesets published + +Hooks can make an inline bundle fail + $ cp server/.hg/hgrc-beforeinlinehooks server/.hg/hgrc + $ echo "[hooks]" >> server/.hg/hgrc + $ echo "pretransmit-inline-clone-bundle=echo bar && false" >> server/.hg/hgrc + $ hg clone -U ssh://user@dummy/server ssh-inline-clone-hook-fail + applying clone bundle from peer-bundle-cache://full.hg + remote: bar + remote: abort: pretransmit-inline-clone-bundle hook exited with status 1 + abort: stream ended unexpectedly (got 0 bytes, expected 1) + [255] + $ cp server/.hg/hgrc-beforeinlinehooks server/.hg/hgrc + +Feature does not use inline bundle over HTTP(S) because there is no protocaps support +(so no way for the client to announce that it supports inline clonebundles) + $ hg clone -U http://localhost:$HGPORT http-inline-clone + requesting all changes + adding changesets + adding manifests + adding file changes + added 2 changesets with 2 changes to 2 files + new changesets 53245c60e682:aaff8d2ffbbf + Entry with unknown BUNDLESPEC is filtered and not used $ cat > server/.hg/clonebundles.manifest << EOF