# HG changeset patch # User Pierre-Yves David # Date 2021-05-03 19:13:24 # Node ID ff9fd7107d118efc7a1e82f1656a17f3a237f1e2 # Parent eac3591abbf4ee2111d1197f38c73c2e16b17837 revlog: implement a "default compression" mode The revlog docker is now storing a default compression engine. When a chunk use that compression, a dedicated mode is used in the revlog entry and we can directly route it to the right decompressor. We should probably make PLAIN and DEFAULT mode the only available mode for revlogv2, but this is something for later. Differential Revision: https://phab.mercurial-scm.org/D10652 diff --git a/mercurial/revlog.py b/mercurial/revlog.py --- a/mercurial/revlog.py +++ b/mercurial/revlog.py @@ -35,6 +35,7 @@ from .i18n import _ from .pycompat import getattr from .revlogutils.constants import ( ALL_KINDS, + COMP_MODE_DEFAULT, COMP_MODE_INLINE, COMP_MODE_PLAIN, FEATURES_BY_VERSION, @@ -708,6 +709,15 @@ class revlog(object): engine = util.compengines[self._compengine] return engine.revlogcompressor(self._compengineopts) + @util.propertycache + def _decompressor(self): + """the default decompressor""" + if self._docket is None: + return None + t = self._docket.default_compression_header + c = self._get_decompressor(t) + return c.decompress + def _indexfp(self): """file object for the revlog's index file""" return self.opener(self._indexfile, mode=b"r") @@ -1776,6 +1786,8 @@ class revlog(object): data = self._getsegmentforrevs(rev, rev, df=df)[1] if compression_mode == COMP_MODE_PLAIN: return data + elif compression_mode == COMP_MODE_DEFAULT: + return self._decompressor(data) elif compression_mode == COMP_MODE_INLINE: return self.decompress(data) else: @@ -1829,6 +1841,8 @@ class revlog(object): return [self._chunk(rev, df=df) for rev in revschunk] decomp = self.decompress + # self._decompressor might be None, but will not be used in that case + def_decomp = self._decompressor for rev in revschunk: chunkstart = start(rev) if inline: @@ -1840,6 +1854,8 @@ class revlog(object): ladd(c) elif comp_mode == COMP_MODE_INLINE: ladd(decomp(c)) + elif comp_mode == COMP_MODE_DEFAULT: + ladd(def_decomp(c)) else: msg = 'unknown compression mode %d' msg %= comp_mode @@ -2489,8 +2505,12 @@ class revlog(object): if not h and not d: # not data to store at all... declare them uncompressed compression_mode = COMP_MODE_PLAIN - elif not h and d[0:1] == b'\0': - compression_mode = COMP_MODE_PLAIN + elif not h: + t = d[0:1] + if t == b'\0': + compression_mode = COMP_MODE_PLAIN + elif t == self._docket.default_compression_header: + compression_mode = COMP_MODE_DEFAULT elif h == b'u': # we have a more efficient way to declare uncompressed h = b'' diff --git a/mercurial/revlogutils/constants.py b/mercurial/revlogutils/constants.py --- a/mercurial/revlogutils/constants.py +++ b/mercurial/revlogutils/constants.py @@ -123,6 +123,16 @@ REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_I # chunk value. Without any header information prefixed. COMP_MODE_PLAIN = 0 +# Chunk use the "default compression" for the revlog (usually defined in the +# revlog docket). A header is still used. +# +# XXX: keeping a header is probably not useful and we should probably drop it. +# +# XXX: The value of allow mixed type of compression in the revlog is unclear +# and we should consider making PLAIN/DEFAULT the only available mode for +# revlog v2, disallowing INLINE mode. +COMP_MODE_DEFAULT = 1 + # Chunk use a compression mode stored "inline" at the start of the chunk # itself. This is the mode always used for revlog version "0" and "1" COMP_MODE_INLINE = 2 diff --git a/mercurial/revlogutils/docket.py b/mercurial/revlogutils/docket.py --- a/mercurial/revlogutils/docket.py +++ b/mercurial/revlogutils/docket.py @@ -21,6 +21,7 @@ import struct from .. import ( error, + util, ) from . import ( @@ -36,7 +37,8 @@ from . import ( # * 8 bytes: pending size of index-data # * 8 bytes: size of data # * 8 bytes: pending size of data -S_HEADER = struct.Struct(constants.INDEX_HEADER.format + 'LLLL') +# * 1 bytes: default compression header +S_HEADER = struct.Struct(constants.INDEX_HEADER.format + 'LLLLc') class RevlogDocket(object): @@ -51,6 +53,7 @@ class RevlogDocket(object): pending_index_end=0, data_end=0, pending_data_end=0, + default_compression_header=None, ): self._version_header = version_header self._read_only = bool(use_pending) @@ -71,6 +74,7 @@ class RevlogDocket(object): else: self._index_end = self._initial_index_end self._data_end = self._initial_data_end + self.default_compression_header = default_compression_header def index_filepath(self): """file path to the current index file associated to this docket""" @@ -134,6 +138,7 @@ class RevlogDocket(object): self._index_end, official_data_end, self._data_end, + self.default_compression_header, ) return S_HEADER.pack(*data) @@ -142,7 +147,12 @@ def default_docket(revlog, version_heade """given a revlog version a new docket object for the given revlog""" if (version_header & 0xFFFF) != constants.REVLOGV2: return None - docket = RevlogDocket(revlog, version_header=version_header) + comp = util.compengines[revlog._compengine].revlogheader() + docket = RevlogDocket( + revlog, + version_header=version_header, + default_compression_header=comp, + ) docket._dirty = True return docket @@ -155,6 +165,7 @@ def parse_docket(revlog, data, use_pendi pending_index_size = header[2] data_size = header[3] pending_data_size = header[4] + default_compression_header = header[5] docket = RevlogDocket( revlog, use_pending=use_pending, @@ -163,5 +174,6 @@ def parse_docket(revlog, data, use_pendi pending_index_end=pending_index_size, data_end=data_size, pending_data_end=pending_data_size, + default_compression_header=default_compression_header, ) return docket