# HG changeset patch # User Gregory Szorc # Date 2018-09-27 00:16:56 # Node ID c537144fdbef3c5707731f7cf9d9bae5e0799de8 # Parent ed919b90acda19988972dfa981d274963de0b3c9 wireprotov2: support response caching One of the things I've learned from managing VCS servers over the years is that they are hard to scale. It is well known that some companies have very beefy (read: very expensive) servers to power their VCS needs. It is also known that specialized servers for various VCS exist in order to facilitate scaling servers. (Mercurial is in this boat.) One of the aspects that make a VCS server hard to scale is the high CPU load incurred by constant client clone/pull operations. To alleviate the scaling pain associated with data retrieval operations, I want to integrate caching into the Mercurial wire protocol server as robustly as possible such that servers can aggressively cache responses and defer as much server load as possible. This commit represents the initial implementation of a general caching layer in wire protocol version 2. We define a new interface and behavior for a wire protocol cacher in repository.py. (This is probably where a reviewer should look first to understand what is going on.) The bulk of the added code is in wireprotov2server.py, where we define how a command can opt in to being cached and integrate caching into command dispatching. From a very high-level: * A command can declare itself as cacheable by providing a callable that can be used to derive a cache key. * At dispatch time, if a command is cacheable, we attempt to construct a cacher and use it for serving the request and/or caching the request. * The dispatch layer handles the bulk of the business logic for caching, making cachers mostly "dumb content stores." * The mechanism for invalidating cached entries (one of the harder parts about caching in general) is by varying the cache key when state changes. As such, cachers don't need to be concerned with cache invalidation. Initially, we've hooked up support for caching "manifestdata" and "filedata" commands. These are the simplest to cache, as they should be immutable over time. Caching of commands related to changeset data is a bit harder (because cache validation is impacted by changes to bookmarks, phases, etc). This will be implemented later. (Strictly speaking, censoring a file should invalidate caches. I've added an inline TODO to track this edge case.) To prove it works, this commit implements a test-only extension providing in-memory caching backed by an lrucachedict. A new test showing this extension behaving properly is added. FWIW, the cacher is ~50 lines of code, demonstrating the relative ease with which a cache can be added to a server. While the test cacher is not suitable for production workloads, just for kicks I performed a clone of just the changeset and manifest data for the mozilla-unified repository. With a fully warmed cache (of just the manifest data since changeset data is not cached), server-side CPU usage dropped from ~73s to ~28s. That's pretty significant and demonstrates the potential that response caching has on server scalability! Differential Revision: https://phab.mercurial-scm.org/D4773 diff --git a/mercurial/repository.py b/mercurial/repository.py --- a/mercurial/repository.py +++ b/mercurial/repository.py @@ -1657,3 +1657,160 @@ class ilocalrepositorymain(interfaceutil class completelocalrepository(ilocalrepositorymain, ilocalrepositoryfilestorage): """Complete interface for a local repository.""" + +class iwireprotocolcommandcacher(interfaceutil.Interface): + """Represents a caching backend for wire protocol commands. + + Wire protocol version 2 supports transparent caching of many commands. + To leverage this caching, servers can activate objects that cache + command responses. Objects handle both cache writing and reading. + This interface defines how that response caching mechanism works. + + Wire protocol version 2 commands emit a series of objects that are + serialized and sent to the client. The caching layer exists between + the invocation of the command function and the sending of its output + objects to an output layer. + + Instances of this interface represent a binding to a cache that + can serve a response (in place of calling a command function) and/or + write responses to a cache for subsequent use. + + When a command request arrives, the following happens with regards + to this interface: + + 1. The server determines whether the command request is cacheable. + 2. If it is, an instance of this interface is spawned. + 3. The cacher is activated in a context manager (``__enter__`` is called). + 4. A cache *key* for that request is derived. This will call the + instance's ``adjustcachekeystate()`` method so the derivation + can be influenced. + 5. The cacher is informed of the derived cache key via a call to + ``setcachekey()``. + 6. The cacher's ``lookup()`` method is called to test for presence of + the derived key in the cache. + 7. If ``lookup()`` returns a hit, that cached result is used in place + of invoking the command function. ``__exit__`` is called and the instance + is discarded. + 8. The command function is invoked. + 9. ``onobject()`` is called for each object emitted by the command + function. + 10. After the final object is seen, ``onoutputfinished()`` is called. + 11. ``__exit__`` is called to signal the end of use of the instance. + + Cache *key* derivation can be influenced by the instance. + + Cache keys are initially derived by a deterministic representation of + the command request. This includes the command name, arguments, protocol + version, etc. This initial key derivation is performed by CBOR-encoding a + data structure and feeding that output into a hasher. + + Instances of this interface can influence this initial key derivation + via ``adjustcachekeystate()``. + + The instance is informed of the derived cache key via a call to + ``setcachekey()``. The instance must store the key locally so it can + be consulted on subsequent operations that may require it. + + When constructed, the instance has access to a callable that can be used + for encoding response objects. This callable receives as its single + argument an object emitted by a command function. It returns an iterable + of bytes chunks representing the encoded object. Unless the cacher is + caching native Python objects in memory or has a way of reconstructing + the original Python objects, implementations typically call this function + to produce bytes from the output objects and then store those bytes in + the cache. When it comes time to re-emit those bytes, they are wrapped + in a ``wireprototypes.encodedresponse`` instance to tell the output + layer that they are pre-encoded. + + When receiving the objects emitted by the command function, instances + can choose what to do with those objects. The simplest thing to do is + re-emit the original objects. They will be forwarded to the output + layer and will be processed as if the cacher did not exist. + + Implementations could also choose to not emit objects - instead locally + buffering objects or their encoded representation. They could then emit + a single "coalesced" object when ``onoutputfinished()`` is called. In + this way, the implementation would function as a filtering layer of + sorts. + + When caching objects, typically the encoded form of the object will + be stored. Keep in mind that if the original object is forwarded to + the output layer, it will need to be encoded there as well. For large + output, this redundant encoding could add overhead. Implementations + could wrap the encoded object data in ``wireprototypes.encodedresponse`` + instances to avoid this overhead. + """ + def __enter__(): + """Marks the instance as active. + + Should return self. + """ + + def __exit__(exctype, excvalue, exctb): + """Called when cacher is no longer used. + + This can be used by implementations to perform cleanup actions (e.g. + disconnecting network sockets, aborting a partially cached response. + """ + + def adjustcachekeystate(state): + """Influences cache key derivation by adjusting state to derive key. + + A dict defining the state used to derive the cache key is passed. + + Implementations can modify this dict to record additional state that + is wanted to influence key derivation. + + Implementations are *highly* encouraged to not modify or delete + existing keys. + """ + + def setcachekey(key): + """Record the derived cache key for this request. + + Instances may mutate the key for internal usage, as desired. e.g. + instances may wish to prepend the repo name, introduce path + components for filesystem or URL addressing, etc. Behavior is up to + the cache. + + Returns a bool indicating if the request is cacheable by this + instance. + """ + + def lookup(): + """Attempt to resolve an entry in the cache. + + The instance is instructed to look for the cache key that it was + informed about via the call to ``setcachekey()``. + + If there's no cache hit or the cacher doesn't wish to use the cached + entry, ``None`` should be returned. + + Else, a dict defining the cached result should be returned. The + dict may have the following keys: + + objs + An iterable of objects that should be sent to the client. That + iterable of objects is expected to be what the command function + would return if invoked or an equivalent representation thereof. + """ + + def onobject(obj): + """Called when a new object is emitted from the command function. + + Receives as its argument the object that was emitted from the + command function. + + This method returns an iterator of objects to forward to the output + layer. The easiest implementation is a generator that just + ``yield obj``. + """ + + def onfinished(): + """Called after all objects have been emitted from the command function. + + Implementations should return an iterator of objects to forward to + the output layer. + + This method can be a generator. + """ diff --git a/mercurial/wireprototypes.py b/mercurial/wireprototypes.py --- a/mercurial/wireprototypes.py +++ b/mercurial/wireprototypes.py @@ -232,11 +232,12 @@ class baseprotocolhandler(interfaceutil. class commandentry(object): """Represents a declared wire protocol command.""" def __init__(self, func, args='', transports=None, - permission='push'): + permission='push', cachekeyfn=None): self.func = func self.args = args self.transports = transports or set() self.permission = permission + self.cachekeyfn = cachekeyfn def _merge(self, func, args): """Merge this instance with an incoming 2-tuple. diff --git a/mercurial/wireprotov2server.py b/mercurial/wireprotov2server.py --- a/mercurial/wireprotov2server.py +++ b/mercurial/wireprotov2server.py @@ -7,6 +7,7 @@ from __future__ import absolute_import import contextlib +import hashlib from .i18n import _ from .node import ( @@ -25,6 +26,7 @@ from . import ( wireprototypes, ) from .utils import ( + cborutil, interfaceutil, stringutil, ) @@ -35,6 +37,11 @@ HTTP_WIREPROTO_V2 = wireprototypes.HTTP_ COMMANDS = wireprototypes.commanddict() +# Value inserted into cache key computation function. Change the value to +# force new cache keys for every command request. This should be done when +# there is a change to how caching works, etc. +GLOBAL_CACHE_VERSION = 1 + def handlehttpv2request(rctx, req, res, checkperm, urlparts): from .hgweb import common as hgwebcommon @@ -333,12 +340,64 @@ def getdispatchrepo(repo, proto, command return repo.filtered('served') def dispatch(repo, proto, command): + """Run a wire protocol command. + + Returns an iterable of objects that will be sent to the client. + """ repo = getdispatchrepo(repo, proto, command) - func, spec = COMMANDS[command] + entry = COMMANDS[command] + func = entry.func + spec = entry.args + args = proto.getargs(spec) - return func(repo, proto, **pycompat.strkwargs(args)) + # There is some duplicate boilerplate code here for calling the command and + # emitting objects. It is either that or a lot of indented code that looks + # like a pyramid (since there are a lot of code paths that result in not + # using the cacher). + callcommand = lambda: func(repo, proto, **pycompat.strkwargs(args)) + + # Request is not cacheable. Don't bother instantiating a cacher. + if not entry.cachekeyfn: + for o in callcommand(): + yield o + return + + cacher = makeresponsecacher(repo, proto, command, args, + cborutil.streamencode) + + # But we have no cacher. Do default handling. + if not cacher: + for o in callcommand(): + yield o + return + + with cacher: + cachekey = entry.cachekeyfn(repo, proto, cacher, **args) + + # No cache key or the cacher doesn't like it. Do default handling. + if cachekey is None or not cacher.setcachekey(cachekey): + for o in callcommand(): + yield o + return + + # Serve it from the cache, if possible. + cached = cacher.lookup() + + if cached: + for o in cached['objs']: + yield o + return + + # Else call the command and feed its output into the cacher, allowing + # the cacher to buffer/mutate objects as it desires. + for o in callcommand(): + for o in cacher.onobject(o): + yield o + + for o in cacher.onfinished(): + yield o @interfaceutil.implementer(wireprototypes.baseprotocolhandler) class httpv2protocolhandler(object): @@ -460,7 +519,7 @@ def _capabilitiesv2(repo, proto): return proto.addcapabilities(repo, caps) -def wireprotocommand(name, args=None, permission='push'): +def wireprotocommand(name, args=None, permission='push', cachekeyfn=None): """Decorator to declare a wire protocol command. ``name`` is the name of the wire protocol command being provided. @@ -489,11 +548,21 @@ def wireprotocommand(name, args=None, pe because otherwise commands not declaring their permissions could modify a repository that is supposed to be read-only. + ``cachekeyfn`` defines an optional callable that can derive the + cache key for this request. + Wire protocol commands are generators of objects to be serialized and sent to the client. If a command raises an uncaught exception, this will be translated into a command error. + + All commands can opt in to being cacheable by defining a function + (``cachekeyfn``) that is called to derive a cache key. This function + receives the same arguments as the command itself plus a ``cacher`` + argument containing the active cacher for the request and returns a bytes + containing the key in a cache the response to this command may be cached + under. """ transports = {k for k, v in wireprototypes.TRANSPORTS.items() if v['version'] == 2} @@ -543,12 +612,97 @@ def wireprotocommand(name, args=None, pe 'for version 2' % name) COMMANDS[name] = wireprototypes.commandentry( - func, args=args, transports=transports, permission=permission) + func, args=args, transports=transports, permission=permission, + cachekeyfn=cachekeyfn) return func return register +def makecommandcachekeyfn(command, localversion=None, allargs=False): + """Construct a cache key derivation function with common features. + + By default, the cache key is a hash of: + + * The command name. + * A global cache version number. + * A local cache version number (passed via ``localversion``). + * All the arguments passed to the command. + * The media type used. + * Wire protocol version string. + * The repository path. + """ + if not allargs: + raise error.ProgrammingError('only allargs=True is currently supported') + + if localversion is None: + raise error.ProgrammingError('must set localversion argument value') + + def cachekeyfn(repo, proto, cacher, **args): + spec = COMMANDS[command] + + # Commands that mutate the repo can not be cached. + if spec.permission == 'push': + return None + + # TODO config option to disable caching. + + # Our key derivation strategy is to construct a data structure + # holding everything that could influence cacheability and to hash + # the CBOR representation of that. Using CBOR seems like it might + # be overkill. However, simpler hashing mechanisms are prone to + # duplicate input issues. e.g. if you just concatenate two values, + # "foo"+"bar" is identical to "fo"+"obar". Using CBOR provides + # "padding" between values and prevents these problems. + + # Seed the hash with various data. + state = { + # To invalidate all cache keys. + b'globalversion': GLOBAL_CACHE_VERSION, + # More granular cache key invalidation. + b'localversion': localversion, + # Cache keys are segmented by command. + b'command': pycompat.sysbytes(command), + # Throw in the media type and API version strings so changes + # to exchange semantics invalid cache. + b'mediatype': FRAMINGTYPE, + b'version': HTTP_WIREPROTO_V2, + # So same requests for different repos don't share cache keys. + b'repo': repo.root, + } + + # The arguments passed to us will have already been normalized. + # Default values will be set, etc. This is important because it + # means that it doesn't matter if clients send an explicit argument + # or rely on the default value: it will all normalize to the same + # set of arguments on the server and therefore the same cache key. + # + # Arguments by their very nature must support being encoded to CBOR. + # And the CBOR encoder is deterministic. So we hash the arguments + # by feeding the CBOR of their representation into the hasher. + if allargs: + state[b'args'] = pycompat.byteskwargs(args) + + cacher.adjustcachekeystate(state) + + hasher = hashlib.sha1() + for chunk in cborutil.streamencode(state): + hasher.update(chunk) + + return pycompat.sysbytes(hasher.hexdigest()) + + return cachekeyfn + +def makeresponsecacher(repo, proto, command, args, objencoderfn): + """Construct a cacher for a cacheable command. + + Returns an ``iwireprotocolcommandcacher`` instance. + + Extensions can monkeypatch this function to provide custom caching + backends. + """ + return None + @wireprotocommand('branchmap', permission='pull') def branchmapv2(repo, proto): yield {encoding.fromlocal(k): v @@ -755,7 +909,11 @@ def getfilestore(repo, proto, path): 'example': b'foo.txt', } }, - permission='pull') + permission='pull', + # TODO censoring a file revision won't invalidate the cache. + # Figure out a way to take censoring into account when deriving + # the cache key. + cachekeyfn=makecommandcachekeyfn('filedata', 1, allargs=True)) def filedata(repo, proto, haveparents, nodes, fields, path): try: # Extensions may wish to access the protocol handler. @@ -893,7 +1051,8 @@ def lookupv2(repo, proto, key): 'example': b'', }, }, - permission='pull') + permission='pull', + cachekeyfn=makecommandcachekeyfn('manifestdata', 1, allargs=True)) def manifestdata(repo, proto, haveparents, nodes, fields, tree): store = repo.manifestlog.getstorage(tree) diff --git a/tests/test-wireproto-caching.t b/tests/test-wireproto-caching.t new file mode 100644 --- /dev/null +++ b/tests/test-wireproto-caching.t @@ -0,0 +1,645 @@ + $ . $TESTDIR/wireprotohelpers.sh + $ cat >> $HGRCPATH << EOF + > [extensions] + > blackbox = + > [blackbox] + > track = simplecache + > EOF + $ hg init server + $ enablehttpv2 server + $ cd server + $ cat >> .hg/hgrc << EOF + > [extensions] + > simplecache = $TESTDIR/wireprotosimplecache.py + > EOF + + $ echo a0 > a + $ echo b0 > b + $ hg -q commit -A -m 'commit 0' + $ echo a1 > a + $ hg commit -m 'commit 1' + $ echo b1 > b + $ hg commit -m 'commit 2' + $ echo a2 > a + $ echo b2 > b + $ hg commit -m 'commit 3' + + $ hg log -G -T '{rev}:{node} {desc}' + @ 3:50590a86f3ff5d1e9a1624a7a6957884565cc8e8 commit 3 + | + o 2:4d01eda50c6ac5f7e89cbe1880143a32f559c302 commit 2 + | + o 1:4432d83626e8a98655f062ec1f2a43b07f7fbbb0 commit 1 + | + o 0:3390ef850073fbc2f0dfff2244342c8e9229013a commit 0 + + + $ hg --debug debugindex -m + rev linkrev nodeid p1 p2 + 0 0 992f4779029a3df8d0666d00bb924f69634e2641 0000000000000000000000000000000000000000 0000000000000000000000000000000000000000 + 1 1 a988fb43583e871d1ed5750ee074c6d840bbbfc8 992f4779029a3df8d0666d00bb924f69634e2641 0000000000000000000000000000000000000000 + 2 2 a8853dafacfca6fc807055a660d8b835141a3bb4 a988fb43583e871d1ed5750ee074c6d840bbbfc8 0000000000000000000000000000000000000000 + 3 3 3fe11dfbb13645782b0addafbe75a87c210ffddc a8853dafacfca6fc807055a660d8b835141a3bb4 0000000000000000000000000000000000000000 + + $ hg serve -p $HGPORT -d --pid-file hg.pid -E error.log + $ cat hg.pid > $DAEMON_PIDS + +Performing the same request should result in same result, with 2nd response +coming from cache. + + $ sendhttpv2peer << EOF + > command manifestdata + > nodes eval:[b'\x99\x2f\x47\x79\x02\x9a\x3d\xf8\xd0\x66\x6d\x00\xbb\x92\x4f\x69\x63\x4e\x26\x41'] + > tree eval:b'' + > fields eval:[b'parents'] + > EOF + creating http peer for wire protocol version 2 + sending manifestdata command + s> POST /api/exp-http-v2-0002/ro/manifestdata HTTP/1.1\r\n + s> Accept-Encoding: identity\r\n + s> accept: application/mercurial-exp-framing-0005\r\n + s> content-type: application/mercurial-exp-framing-0005\r\n + s> content-length: 83\r\n + s> host: $LOCALIP:$HGPORT\r\n (glob) + s> user-agent: Mercurial debugwireproto\r\n + s> \r\n + s> K\x00\x00\x01\x00\x01\x01\x11\xa2Dargs\xa3Ffields\x81GparentsEnodes\x81T\x99/Gy\x02\x9a=\xf8\xd0fm\x00\xbb\x92OicN&ADtree@DnameLmanifestdata + s> makefile('rb', None) + s> HTTP/1.1 200 OK\r\n + s> Server: testing stub value\r\n + s> Date: $HTTP_DATE$\r\n + s> Content-Type: application/mercurial-exp-framing-0005\r\n + s> Transfer-Encoding: chunked\r\n + s> \r\n + s> 13\r\n + s> \x0b\x00\x00\x01\x00\x02\x011 + s> \xa1FstatusBok + s> \r\n + received frame(size=11; request=1; stream=2; streamflags=stream-begin; type=command-response; flags=continuation) + s> 63\r\n + s> [\x00\x00\x01\x00\x02\x001 + s> \xa1Jtotalitems\x01\xa2DnodeT\x99/Gy\x02\x9a=\xf8\xd0fm\x00\xbb\x92OicN&AGparents\x82T\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00T\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00 + s> \r\n + received frame(size=91; request=1; stream=2; streamflags=; type=command-response; flags=continuation) + s> 8\r\n + s> \x00\x00\x00\x01\x00\x02\x002 + s> \r\n + s> 0\r\n + s> \r\n + received frame(size=0; request=1; stream=2; streamflags=; type=command-response; flags=eos) + response: gen[ + { + b'totalitems': 1 + }, + { + b'node': b'\x99/Gy\x02\x9a=\xf8\xd0fm\x00\xbb\x92OicN&A', + b'parents': [ + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + ] + } + ] + + $ sendhttpv2peer << EOF + > command manifestdata + > nodes eval:[b'\x99\x2f\x47\x79\x02\x9a\x3d\xf8\xd0\x66\x6d\x00\xbb\x92\x4f\x69\x63\x4e\x26\x41'] + > tree eval:b'' + > fields eval:[b'parents'] + > EOF + creating http peer for wire protocol version 2 + sending manifestdata command + s> POST /api/exp-http-v2-0002/ro/manifestdata HTTP/1.1\r\n + s> Accept-Encoding: identity\r\n + s> accept: application/mercurial-exp-framing-0005\r\n + s> content-type: application/mercurial-exp-framing-0005\r\n + s> content-length: 83\r\n + s> host: $LOCALIP:$HGPORT\r\n (glob) + s> user-agent: Mercurial debugwireproto\r\n + s> \r\n + s> K\x00\x00\x01\x00\x01\x01\x11\xa2Dargs\xa3Ffields\x81GparentsEnodes\x81T\x99/Gy\x02\x9a=\xf8\xd0fm\x00\xbb\x92OicN&ADtree@DnameLmanifestdata + s> makefile('rb', None) + s> HTTP/1.1 200 OK\r\n + s> Server: testing stub value\r\n + s> Date: $HTTP_DATE$\r\n + s> Content-Type: application/mercurial-exp-framing-0005\r\n + s> Transfer-Encoding: chunked\r\n + s> \r\n + s> 13\r\n + s> \x0b\x00\x00\x01\x00\x02\x011 + s> \xa1FstatusBok + s> \r\n + received frame(size=11; request=1; stream=2; streamflags=stream-begin; type=command-response; flags=continuation) + s> 63\r\n + s> [\x00\x00\x01\x00\x02\x001 + s> \xa1Jtotalitems\x01\xa2DnodeT\x99/Gy\x02\x9a=\xf8\xd0fm\x00\xbb\x92OicN&AGparents\x82T\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00T\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00 + s> \r\n + received frame(size=91; request=1; stream=2; streamflags=; type=command-response; flags=continuation) + s> 8\r\n + s> \x00\x00\x00\x01\x00\x02\x002 + s> \r\n + s> 0\r\n + s> \r\n + received frame(size=0; request=1; stream=2; streamflags=; type=command-response; flags=eos) + response: gen[ + { + b'totalitems': 1 + }, + { + b'node': b'\x99/Gy\x02\x9a=\xf8\xd0fm\x00\xbb\x92OicN&A', + b'parents': [ + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + ] + } + ] + +Sending different request doesn't yield cache hit. + + $ sendhttpv2peer << EOF + > command manifestdata + > nodes eval:[b'\x99\x2f\x47\x79\x02\x9a\x3d\xf8\xd0\x66\x6d\x00\xbb\x92\x4f\x69\x63\x4e\x26\x41', b'\xa9\x88\xfb\x43\x58\x3e\x87\x1d\x1e\xd5\x75\x0e\xe0\x74\xc6\xd8\x40\xbb\xbf\xc8'] + > tree eval:b'' + > fields eval:[b'parents'] + > EOF + creating http peer for wire protocol version 2 + sending manifestdata command + s> POST /api/exp-http-v2-0002/ro/manifestdata HTTP/1.1\r\n + s> Accept-Encoding: identity\r\n + s> accept: application/mercurial-exp-framing-0005\r\n + s> content-type: application/mercurial-exp-framing-0005\r\n + s> content-length: 104\r\n + s> host: $LOCALIP:$HGPORT\r\n (glob) + s> user-agent: Mercurial debugwireproto\r\n + s> \r\n + s> `\x00\x00\x01\x00\x01\x01\x11\xa2Dargs\xa3Ffields\x81GparentsEnodes\x82T\x99/Gy\x02\x9a=\xf8\xd0fm\x00\xbb\x92OicN&AT\xa9\x88\xfbCX>\x87\x1d\x1e\xd5u\x0e\xe0t\xc6\xd8@\xbb\xbf\xc8Dtree@DnameLmanifestdata + s> makefile('rb', None) + s> HTTP/1.1 200 OK\r\n + s> Server: testing stub value\r\n + s> Date: $HTTP_DATE$\r\n + s> Content-Type: application/mercurial-exp-framing-0005\r\n + s> Transfer-Encoding: chunked\r\n + s> \r\n + s> 13\r\n + s> \x0b\x00\x00\x01\x00\x02\x011 + s> \xa1FstatusBok + s> \r\n + received frame(size=11; request=1; stream=2; streamflags=stream-begin; type=command-response; flags=continuation) + s> b1\r\n + s> \xa9\x00\x00\x01\x00\x02\x001 + s> \xa1Jtotalitems\x02\xa2DnodeT\x99/Gy\x02\x9a=\xf8\xd0fm\x00\xbb\x92OicN&AGparents\x82T\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00T\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa2DnodeT\xa9\x88\xfbCX>\x87\x1d\x1e\xd5u\x0e\xe0t\xc6\xd8@\xbb\xbf\xc8Gparents\x82T\x99/Gy\x02\x9a=\xf8\xd0fm\x00\xbb\x92OicN&AT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00 + s> \r\n + received frame(size=169; request=1; stream=2; streamflags=; type=command-response; flags=continuation) + s> 8\r\n + s> \x00\x00\x00\x01\x00\x02\x002 + s> \r\n + s> 0\r\n + s> \r\n + received frame(size=0; request=1; stream=2; streamflags=; type=command-response; flags=eos) + response: gen[ + { + b'totalitems': 2 + }, + { + b'node': b'\x99/Gy\x02\x9a=\xf8\xd0fm\x00\xbb\x92OicN&A', + b'parents': [ + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + ] + }, + { + b'node': b'\xa9\x88\xfbCX>\x87\x1d\x1e\xd5u\x0e\xe0t\xc6\xd8@\xbb\xbf\xc8', + b'parents': [ + b'\x99/Gy\x02\x9a=\xf8\xd0fm\x00\xbb\x92OicN&A', + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + ] + } + ] + + $ cat .hg/blackbox.log + *> cacher constructed for manifestdata (glob) + *> cache miss for c045a581599d58608efd3d93d8129841f2af04a0 (glob) + *> storing cache entry for c045a581599d58608efd3d93d8129841f2af04a0 (glob) + *> cacher constructed for manifestdata (glob) + *> cache hit for c045a581599d58608efd3d93d8129841f2af04a0 (glob) + *> cacher constructed for manifestdata (glob) + *> cache miss for 6ed2f740a1cdd12c9e99c4f27695543143c26a11 (glob) + *> storing cache entry for 6ed2f740a1cdd12c9e99c4f27695543143c26a11 (glob) + + $ cat error.log + + $ killdaemons.py + $ rm .hg/blackbox.log + +Try with object caching mode + + $ cat >> .hg/hgrc << EOF + > [simplecache] + > cacheobjects = true + > EOF + + $ hg serve -p $HGPORT -d --pid-file hg.pid -E error.log + $ cat hg.pid > $DAEMON_PIDS + + $ sendhttpv2peer << EOF + > command manifestdata + > nodes eval:[b'\x99\x2f\x47\x79\x02\x9a\x3d\xf8\xd0\x66\x6d\x00\xbb\x92\x4f\x69\x63\x4e\x26\x41'] + > tree eval:b'' + > fields eval:[b'parents'] + > EOF + creating http peer for wire protocol version 2 + sending manifestdata command + s> POST /api/exp-http-v2-0002/ro/manifestdata HTTP/1.1\r\n + s> Accept-Encoding: identity\r\n + s> accept: application/mercurial-exp-framing-0005\r\n + s> content-type: application/mercurial-exp-framing-0005\r\n + s> content-length: 83\r\n + s> host: $LOCALIP:$HGPORT\r\n (glob) + s> user-agent: Mercurial debugwireproto\r\n + s> \r\n + s> K\x00\x00\x01\x00\x01\x01\x11\xa2Dargs\xa3Ffields\x81GparentsEnodes\x81T\x99/Gy\x02\x9a=\xf8\xd0fm\x00\xbb\x92OicN&ADtree@DnameLmanifestdata + s> makefile('rb', None) + s> HTTP/1.1 200 OK\r\n + s> Server: testing stub value\r\n + s> Date: $HTTP_DATE$\r\n + s> Content-Type: application/mercurial-exp-framing-0005\r\n + s> Transfer-Encoding: chunked\r\n + s> \r\n + s> 13\r\n + s> \x0b\x00\x00\x01\x00\x02\x011 + s> \xa1FstatusBok + s> \r\n + received frame(size=11; request=1; stream=2; streamflags=stream-begin; type=command-response; flags=continuation) + s> 63\r\n + s> [\x00\x00\x01\x00\x02\x001 + s> \xa1Jtotalitems\x01\xa2DnodeT\x99/Gy\x02\x9a=\xf8\xd0fm\x00\xbb\x92OicN&AGparents\x82T\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00T\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00 + s> \r\n + received frame(size=91; request=1; stream=2; streamflags=; type=command-response; flags=continuation) + s> 8\r\n + s> \x00\x00\x00\x01\x00\x02\x002 + s> \r\n + s> 0\r\n + s> \r\n + received frame(size=0; request=1; stream=2; streamflags=; type=command-response; flags=eos) + response: gen[ + { + b'totalitems': 1 + }, + { + b'node': b'\x99/Gy\x02\x9a=\xf8\xd0fm\x00\xbb\x92OicN&A', + b'parents': [ + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + ] + } + ] + + $ sendhttpv2peer << EOF + > command manifestdata + > nodes eval:[b'\x99\x2f\x47\x79\x02\x9a\x3d\xf8\xd0\x66\x6d\x00\xbb\x92\x4f\x69\x63\x4e\x26\x41'] + > tree eval:b'' + > fields eval:[b'parents'] + > EOF + creating http peer for wire protocol version 2 + sending manifestdata command + s> POST /api/exp-http-v2-0002/ro/manifestdata HTTP/1.1\r\n + s> Accept-Encoding: identity\r\n + s> accept: application/mercurial-exp-framing-0005\r\n + s> content-type: application/mercurial-exp-framing-0005\r\n + s> content-length: 83\r\n + s> host: $LOCALIP:$HGPORT\r\n (glob) + s> user-agent: Mercurial debugwireproto\r\n + s> \r\n + s> K\x00\x00\x01\x00\x01\x01\x11\xa2Dargs\xa3Ffields\x81GparentsEnodes\x81T\x99/Gy\x02\x9a=\xf8\xd0fm\x00\xbb\x92OicN&ADtree@DnameLmanifestdata + s> makefile('rb', None) + s> HTTP/1.1 200 OK\r\n + s> Server: testing stub value\r\n + s> Date: $HTTP_DATE$\r\n + s> Content-Type: application/mercurial-exp-framing-0005\r\n + s> Transfer-Encoding: chunked\r\n + s> \r\n + s> 13\r\n + s> \x0b\x00\x00\x01\x00\x02\x011 + s> \xa1FstatusBok + s> \r\n + received frame(size=11; request=1; stream=2; streamflags=stream-begin; type=command-response; flags=continuation) + s> 63\r\n + s> [\x00\x00\x01\x00\x02\x001 + s> \xa1Jtotalitems\x01\xa2DnodeT\x99/Gy\x02\x9a=\xf8\xd0fm\x00\xbb\x92OicN&AGparents\x82T\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00T\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00 + s> \r\n + received frame(size=91; request=1; stream=2; streamflags=; type=command-response; flags=continuation) + s> 8\r\n + s> \x00\x00\x00\x01\x00\x02\x002 + s> \r\n + s> 0\r\n + s> \r\n + received frame(size=0; request=1; stream=2; streamflags=; type=command-response; flags=eos) + response: gen[ + { + b'totalitems': 1 + }, + { + b'node': b'\x99/Gy\x02\x9a=\xf8\xd0fm\x00\xbb\x92OicN&A', + b'parents': [ + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + ] + } + ] + + $ cat .hg/blackbox.log + *> cacher constructed for manifestdata (glob) + *> cache miss for c045a581599d58608efd3d93d8129841f2af04a0 (glob) + *> storing cache entry for c045a581599d58608efd3d93d8129841f2af04a0 (glob) + *> cacher constructed for manifestdata (glob) + *> cache hit for c045a581599d58608efd3d93d8129841f2af04a0 (glob) + + $ cat error.log + + $ killdaemons.py + $ rm .hg/blackbox.log + +A non-cacheable command does not instantiate cacher + + $ hg serve -p $HGPORT -d --pid-file hg.pid -E error.log + $ cat hg.pid > $DAEMON_PIDS + $ sendhttpv2peer << EOF + > command capabilities + > EOF + creating http peer for wire protocol version 2 + sending capabilities command + s> POST /api/exp-http-v2-0002/ro/capabilities HTTP/1.1\r\n + s> Accept-Encoding: identity\r\n + s> accept: application/mercurial-exp-framing-0005\r\n + s> content-type: application/mercurial-exp-framing-0005\r\n + s> content-length: 27\r\n + s> host: $LOCALIP:$HGPORT\r\n (glob) + s> user-agent: Mercurial debugwireproto\r\n + s> \r\n + s> \x13\x00\x00\x01\x00\x01\x01\x11\xa1DnameLcapabilities + s> makefile('rb', None) + s> HTTP/1.1 200 OK\r\n + s> Server: testing stub value\r\n + s> Date: $HTTP_DATE$\r\n + s> Content-Type: application/mercurial-exp-framing-0005\r\n + s> Transfer-Encoding: chunked\r\n + s> \r\n + s> 13\r\n + s> \x0b\x00\x00\x01\x00\x02\x011 + s> \xa1FstatusBok + s> \r\n + received frame(size=11; request=1; stream=2; streamflags=stream-begin; type=command-response; flags=continuation) + s> 52b\r\n + s> #\x05\x00\x01\x00\x02\x001 + s> \xa5Hcommands\xaaIbranchmap\xa2Dargs\xa0Kpermissions\x81DpullLcapabilities\xa2Dargs\xa0Kpermissions\x81DpullMchangesetdata\xa2Dargs\xa4Ffields\xa4Gdefault\xd9\x01\x02\x80Hrequired\xf4DtypeCsetKvalidvalues\xd9\x01\x02\x84IbookmarksGparentsEphaseHrevisionInoderange\xa3Gdefault\xf6Hrequired\xf4DtypeDlistEnodes\xa3Gdefault\xf6Hrequired\xf4DtypeDlistJnodesdepth\xa3Gdefault\xf6Hrequired\xf4DtypeCintKpermissions\x81DpullHfiledata\xa2Dargs\xa4Ffields\xa4Gdefault\xd9\x01\x02\x80Hrequired\xf4DtypeCsetKvalidvalues\xd9\x01\x02\x82GparentsHrevisionKhaveparents\xa3Gdefault\xf4Hrequired\xf4DtypeDboolEnodes\xa2Hrequired\xf5DtypeDlistDpath\xa2Hrequired\xf5DtypeEbytesKpermissions\x81DpullEheads\xa2Dargs\xa1Jpubliconly\xa3Gdefault\xf4Hrequired\xf4DtypeDboolKpermissions\x81DpullEknown\xa2Dargs\xa1Enodes\xa3Gdefault\x80Hrequired\xf4DtypeDlistKpermissions\x81DpullHlistkeys\xa2Dargs\xa1Inamespace\xa2Hrequired\xf5DtypeEbytesKpermissions\x81DpullFlookup\xa2Dargs\xa1Ckey\xa2Hrequired\xf5DtypeEbytesKpermissions\x81DpullLmanifestdata\xa2Dargs\xa4Ffields\xa4Gdefault\xd9\x01\x02\x80Hrequired\xf4DtypeCsetKvalidvalues\xd9\x01\x02\x82GparentsHrevisionKhaveparents\xa3Gdefault\xf4Hrequired\xf4DtypeDboolEnodes\xa2Hrequired\xf5DtypeDlistDtree\xa2Hrequired\xf5DtypeEbytesKpermissions\x81DpullGpushkey\xa2Dargs\xa4Ckey\xa2Hrequired\xf5DtypeEbytesInamespace\xa2Hrequired\xf5DtypeEbytesCnew\xa2Hrequired\xf5DtypeEbytesCold\xa2Hrequired\xf5DtypeEbytesKpermissions\x81DpushKcompression\x82\xa1DnameDzstd\xa1DnameDzlibQframingmediatypes\x81X&application/mercurial-exp-framing-0005Rpathfilterprefixes\xd9\x01\x02\x82Epath:Lrootfilesin:Nrawrepoformats\x82LgeneraldeltaHrevlogv1 + s> \r\n + received frame(size=1315; request=1; stream=2; streamflags=; type=command-response; flags=continuation) + s> 8\r\n + s> \x00\x00\x00\x01\x00\x02\x002 + s> \r\n + s> 0\r\n + s> \r\n + received frame(size=0; request=1; stream=2; streamflags=; type=command-response; flags=eos) + response: gen[ + { + b'commands': { + b'branchmap': { + b'args': {}, + b'permissions': [ + b'pull' + ] + }, + b'capabilities': { + b'args': {}, + b'permissions': [ + b'pull' + ] + }, + b'changesetdata': { + b'args': { + b'fields': { + b'default': set([]), + b'required': False, + b'type': b'set', + b'validvalues': set([ + b'bookmarks', + b'parents', + b'phase', + b'revision' + ]) + }, + b'noderange': { + b'default': None, + b'required': False, + b'type': b'list' + }, + b'nodes': { + b'default': None, + b'required': False, + b'type': b'list' + }, + b'nodesdepth': { + b'default': None, + b'required': False, + b'type': b'int' + } + }, + b'permissions': [ + b'pull' + ] + }, + b'filedata': { + b'args': { + b'fields': { + b'default': set([]), + b'required': False, + b'type': b'set', + b'validvalues': set([ + b'parents', + b'revision' + ]) + }, + b'haveparents': { + b'default': False, + b'required': False, + b'type': b'bool' + }, + b'nodes': { + b'required': True, + b'type': b'list' + }, + b'path': { + b'required': True, + b'type': b'bytes' + } + }, + b'permissions': [ + b'pull' + ] + }, + b'heads': { + b'args': { + b'publiconly': { + b'default': False, + b'required': False, + b'type': b'bool' + } + }, + b'permissions': [ + b'pull' + ] + }, + b'known': { + b'args': { + b'nodes': { + b'default': [], + b'required': False, + b'type': b'list' + } + }, + b'permissions': [ + b'pull' + ] + }, + b'listkeys': { + b'args': { + b'namespace': { + b'required': True, + b'type': b'bytes' + } + }, + b'permissions': [ + b'pull' + ] + }, + b'lookup': { + b'args': { + b'key': { + b'required': True, + b'type': b'bytes' + } + }, + b'permissions': [ + b'pull' + ] + }, + b'manifestdata': { + b'args': { + b'fields': { + b'default': set([]), + b'required': False, + b'type': b'set', + b'validvalues': set([ + b'parents', + b'revision' + ]) + }, + b'haveparents': { + b'default': False, + b'required': False, + b'type': b'bool' + }, + b'nodes': { + b'required': True, + b'type': b'list' + }, + b'tree': { + b'required': True, + b'type': b'bytes' + } + }, + b'permissions': [ + b'pull' + ] + }, + b'pushkey': { + b'args': { + b'key': { + b'required': True, + b'type': b'bytes' + }, + b'namespace': { + b'required': True, + b'type': b'bytes' + }, + b'new': { + b'required': True, + b'type': b'bytes' + }, + b'old': { + b'required': True, + b'type': b'bytes' + } + }, + b'permissions': [ + b'push' + ] + } + }, + b'compression': [ + { + b'name': b'zstd' + }, + { + b'name': b'zlib' + } + ], + b'framingmediatypes': [ + b'application/mercurial-exp-framing-0005' + ], + b'pathfilterprefixes': set([ + b'path:', + b'rootfilesin:' + ]), + b'rawrepoformats': [ + b'generaldelta', + b'revlogv1' + ] + } + ] + + $ test -f .hg/blackbox.log + [1] + +An error is not cached + + $ sendhttpv2peer << EOF + > command manifestdata + > nodes eval:[b'\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa'] + > tree eval:b'' + > fields eval:[b'parents'] + > EOF + creating http peer for wire protocol version 2 + sending manifestdata command + s> POST /api/exp-http-v2-0002/ro/manifestdata HTTP/1.1\r\n + s> Accept-Encoding: identity\r\n + s> accept: application/mercurial-exp-framing-0005\r\n + s> content-type: application/mercurial-exp-framing-0005\r\n + s> content-length: 83\r\n + s> host: $LOCALIP:$HGPORT\r\n (glob) + s> user-agent: Mercurial debugwireproto\r\n + s> \r\n + s> K\x00\x00\x01\x00\x01\x01\x11\xa2Dargs\xa3Ffields\x81GparentsEnodes\x81T\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaaDtree@DnameLmanifestdata + s> makefile('rb', None) + s> HTTP/1.1 200 OK\r\n + s> Server: testing stub value\r\n + s> Date: $HTTP_DATE$\r\n + s> Content-Type: application/mercurial-exp-framing-0005\r\n + s> Transfer-Encoding: chunked\r\n + s> \r\n + s> 51\r\n + s> I\x00\x00\x01\x00\x02\x012 + s> \xa2Eerror\xa2Dargs\x81T\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaaGmessagePunknown node: %sFstatusEerror + s> \r\n + received frame(size=73; request=1; stream=2; streamflags=stream-begin; type=command-response; flags=eos) + s> 0\r\n + s> \r\n + abort: unknown node: \xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa! (esc) + [255] + + $ cat .hg/blackbox.log + *> cacher constructed for manifestdata (glob) + *> cache miss for 9d1bb421d99e913d45f2d099aa49728514292dd2 (glob) + *> cacher exiting due to error (glob) + + $ killdaemons.py + $ rm .hg/blackbox.log diff --git a/tests/wireprotosimplecache.py b/tests/wireprotosimplecache.py new file mode 100644 --- /dev/null +++ b/tests/wireprotosimplecache.py @@ -0,0 +1,100 @@ +# wireprotosimplecache.py - Extension providing in-memory wire protocol cache +# +# Copyright 2018 Gregory Szorc +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +from __future__ import absolute_import + +from mercurial import ( + extensions, + registrar, + repository, + util, + wireprototypes, + wireprotov2server, +) +from mercurial.utils import ( + interfaceutil, +) + +CACHE = None + +configtable = {} +configitem = registrar.configitem(configtable) + +configitem('simplecache', 'cacheobjects', + default=False) + +@interfaceutil.implementer(repository.iwireprotocolcommandcacher) +class memorycacher(object): + def __init__(self, ui, command, encodefn): + self.ui = ui + self.encodefn = encodefn + self.key = None + self.cacheobjects = ui.configbool('simplecache', 'cacheobjects') + self.buffered = [] + + ui.log('simplecache', 'cacher constructed for %s\n', command) + + def __enter__(self): + return self + + def __exit__(self, exctype, excvalue, exctb): + if exctype: + self.ui.log('simplecache', 'cacher exiting due to error\n') + + def adjustcachekeystate(self, state): + # Needed in order to make tests deterministic. Don't copy this + # pattern for production caches! + del state[b'repo'] + + def setcachekey(self, key): + self.key = key + return True + + def lookup(self): + if self.key not in CACHE: + self.ui.log('simplecache', 'cache miss for %s\n', self.key) + return None + + entry = CACHE[self.key] + self.ui.log('simplecache', 'cache hit for %s\n', self.key) + + if self.cacheobjects: + return { + 'objs': entry, + } + else: + return { + 'objs': [wireprototypes.encodedresponse(entry)], + } + + def onobject(self, obj): + if self.cacheobjects: + self.buffered.append(obj) + else: + self.buffered.extend(self.encodefn(obj)) + + yield obj + + def onfinished(self): + self.ui.log('simplecache', 'storing cache entry for %s\n', self.key) + if self.cacheobjects: + CACHE[self.key] = self.buffered + else: + CACHE[self.key] = b''.join(self.buffered) + + return [] + +def makeresponsecacher(orig, repo, proto, command, args, objencoderfn): + return memorycacher(repo.ui, command, objencoderfn) + +def extsetup(ui): + global CACHE + + CACHE = util.lrucachedict(10000) + + extensions.wrapfunction(wireprotov2server, 'makeresponsecacher', + makeresponsecacher)