##// END OF EJS Templates
util: compression APIs to support revlog compression...
util: compression APIs to support revlog compression As part of "zstd all of the things," we need to teach revlogs to use non-zlib compression formats. Because we're routing all compression via the "compression manager" and "compression engine" APIs, we need to introduction functionality there for performing revlog operations. Ideally, revlog compression and decompression operations would be implemented in terms of simple "compress" and "decompress" primitives. However, there are a few considerations that make us want to have a specialized primitive for handling revlogs: 1) Performance. Revlogs tend to do compression and especially decompression operations in batches. Any overhead for e.g. instantiating a "context" for performing an operation can be noticed. For this reason, our "revlog compressor" primitive is reusable. For zstd, we reuse the same compression "context" for multiple operations. I've measured this to have a performance impact versus constructing new contexts for each operation. 2) Specialization. By having a primitive dedicated to revlog use, we can make revlog-specific choices and leave the door open for more functionality in the future. For example, the zstd revlog compressor may one day make use of dictionary compression. A future patch will introduce a decompress() on the compressor object. The code for the zlib compressor is basically copied from revlog.compress(). Although it doesn't handle the empty input case, the null first byte case, and the 'u' prefix case. These cases will continue to be handled in revlog.py once that code is ported to use this API.

File last commit:

r30764:e75463e3 default
r30794:31e1f0d4 default
Show More
protocol.py
198 lines | 6.5 KiB | text/x-python | PythonLexer
#
# Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
# Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
from __future__ import absolute_import
import cgi
import struct
from .common import (
HTTP_OK,
)
from .. import (
util,
wireproto,
)
stringio = util.stringio
urlerr = util.urlerr
urlreq = util.urlreq
HGTYPE = 'application/mercurial-0.1'
HGTYPE2 = 'application/mercurial-0.2'
HGERRTYPE = 'application/hg-error'
def decodevaluefromheaders(req, headerprefix):
"""Decode a long value from multiple HTTP request headers."""
chunks = []
i = 1
while True:
v = req.env.get('HTTP_%s_%d' % (
headerprefix.upper().replace('-', '_'), i))
if v is None:
break
chunks.append(v)
i += 1
return ''.join(chunks)
class webproto(wireproto.abstractserverproto):
def __init__(self, req, ui):
self.req = req
self.response = ''
self.ui = ui
self.name = 'http'
def getargs(self, args):
knownargs = self._args()
data = {}
keys = args.split()
for k in keys:
if k == '*':
star = {}
for key in knownargs.keys():
if key != 'cmd' and key not in keys:
star[key] = knownargs[key][0]
data['*'] = star
else:
data[k] = knownargs[k][0]
return [data[k] for k in keys]
def _args(self):
args = self.req.form.copy()
postlen = int(self.req.env.get('HTTP_X_HGARGS_POST', 0))
if postlen:
args.update(cgi.parse_qs(
self.req.read(postlen), keep_blank_values=True))
return args
argvalue = decodevaluefromheaders(self.req, 'X-HgArg')
args.update(cgi.parse_qs(argvalue, keep_blank_values=True))
return args
def getfile(self, fp):
length = int(self.req.env['CONTENT_LENGTH'])
for s in util.filechunkiter(self.req, limit=length):
fp.write(s)
def redirect(self):
self.oldio = self.ui.fout, self.ui.ferr
self.ui.ferr = self.ui.fout = stringio()
def restore(self):
val = self.ui.fout.getvalue()
self.ui.ferr, self.ui.fout = self.oldio
return val
def _client(self):
return 'remote:%s:%s:%s' % (
self.req.env.get('wsgi.url_scheme') or 'http',
urlreq.quote(self.req.env.get('REMOTE_HOST', '')),
urlreq.quote(self.req.env.get('REMOTE_USER', '')))
def responsetype(self, v1compressible=False):
"""Determine the appropriate response type and compression settings.
The ``v1compressible`` argument states whether the response with
application/mercurial-0.1 media types should be zlib compressed.
Returns a tuple of (mediatype, compengine, engineopts).
"""
# For now, if it isn't compressible in the old world, it's never
# compressible. We can change this to send uncompressed 0.2 payloads
# later.
if not v1compressible:
return HGTYPE, None, None
# Determine the response media type and compression engine based
# on the request parameters.
protocaps = decodevaluefromheaders(self.req, 'X-HgProto').split(' ')
if '0.2' in protocaps:
# Default as defined by wire protocol spec.
compformats = ['zlib', 'none']
for cap in protocaps:
if cap.startswith('comp='):
compformats = cap[5:].split(',')
break
# Now find an agreed upon compression format.
for engine in wireproto.supportedcompengines(self.ui, self,
util.SERVERROLE):
if engine.wireprotosupport().name in compformats:
opts = {}
level = self.ui.configint('server',
'%slevel' % engine.name())
if level is not None:
opts['level'] = level
return HGTYPE2, engine, opts
# No mutually supported compression format. Fall back to the
# legacy protocol.
# Don't allow untrusted settings because disabling compression or
# setting a very high compression level could lead to flooding
# the server's network or CPU.
opts = {'level': self.ui.configint('server', 'zliblevel', -1)}
return HGTYPE, util.compengines['zlib'], opts
def iscmd(cmd):
return cmd in wireproto.commands
def call(repo, req, cmd):
p = webproto(req, repo.ui)
def genversion2(gen, compress, engine, engineopts):
# application/mercurial-0.2 always sends a payload header
# identifying the compression engine.
name = engine.wireprotosupport().name
assert 0 < len(name) < 256
yield struct.pack('B', len(name))
yield name
if compress:
for chunk in engine.compressstream(gen, opts=engineopts):
yield chunk
else:
for chunk in gen:
yield chunk
rsp = wireproto.dispatch(repo, p, cmd)
if isinstance(rsp, str):
req.respond(HTTP_OK, HGTYPE, body=rsp)
return []
elif isinstance(rsp, wireproto.streamres):
if rsp.reader:
gen = iter(lambda: rsp.reader.read(32768), '')
else:
gen = rsp.gen
# This code for compression should not be streamres specific. It
# is here because we only compress streamres at the moment.
mediatype, engine, engineopts = p.responsetype(rsp.v1compressible)
if mediatype == HGTYPE and rsp.v1compressible:
gen = engine.compressstream(gen, engineopts)
elif mediatype == HGTYPE2:
gen = genversion2(gen, rsp.v1compressible, engine, engineopts)
req.respond(HTTP_OK, mediatype)
return gen
elif isinstance(rsp, wireproto.pushres):
val = p.restore()
rsp = '%d\n%s' % (rsp.res, val)
req.respond(HTTP_OK, HGTYPE, body=rsp)
return []
elif isinstance(rsp, wireproto.pusherr):
# drain the incoming bundle
req.drain()
p.restore()
rsp = '0\n%s\n' % rsp.res
req.respond(HTTP_OK, HGTYPE, body=rsp)
return []
elif isinstance(rsp, wireproto.ooberror):
rsp = rsp.message
req.respond(HTTP_OK, HGERRTYPE, body=rsp)
return []