# HG changeset patch # User Pierre-Yves David # Date 2021-05-03 17:46:25 # Node ID b876f0bf73662a72e49cdc4ecaa174ab9f6e3b7a # Parent e340b556a13e8d7f11c501207746395c90bd1cdb revlog: introduce a plain compression mode That mode is simple it means the chunk contains uncompressed data and can be used directly. Differential Revision: https://phab.mercurial-scm.org/D10650 diff --git a/mercurial/revlog.py b/mercurial/revlog.py --- a/mercurial/revlog.py +++ b/mercurial/revlog.py @@ -36,6 +36,7 @@ from .pycompat import getattr from .revlogutils.constants import ( ALL_KINDS, COMP_MODE_INLINE, + COMP_MODE_PLAIN, FEATURES_BY_VERSION, FLAG_GENERALDELTA, FLAG_INLINE_DATA, @@ -1757,7 +1758,16 @@ class revlog(object): Returns a str holding uncompressed data for the requested revision. """ - return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1]) + compression_mode = self.index[rev][10] + data = self._getsegmentforrevs(rev, rev, df=df)[1] + if compression_mode == COMP_MODE_PLAIN: + return data + elif compression_mode == COMP_MODE_INLINE: + return self.decompress(data) + else: + msg = 'unknown compression mode %d' + msg %= compression_mode + raise error.RevlogError(msg) def _chunks(self, revs, df=None, targetsize=None): """Obtain decompressed chunks for the specified revisions. @@ -1810,8 +1820,16 @@ class revlog(object): if inline: chunkstart += (rev + 1) * iosize chunklength = length(rev) + comp_mode = self.index[rev][10] c = buffer(data, chunkstart - offset, chunklength) - ladd(decomp(c)) + if comp_mode == COMP_MODE_PLAIN: + ladd(c) + elif comp_mode == COMP_MODE_INLINE: + ladd(decomp(c)) + else: + msg = 'unknown compression mode %d' + msg %= comp_mode + raise error.RevlogError(msg) return l @@ -2461,6 +2479,20 @@ class revlog(object): deltainfo = deltacomputer.finddeltainfo(revinfo, fh) + compression_mode = COMP_MODE_INLINE + if self._docket is not None: + h, d = deltainfo.data + if not h and not d: + # not data to store at all... declare them uncompressed + compression_mode = COMP_MODE_PLAIN + elif not h and d[0:1] == b'\0': + compression_mode = COMP_MODE_PLAIN + elif h == b'u': + # we have a more efficient way to declare uncompressed + h = b'' + compression_mode = COMP_MODE_PLAIN + deltainfo = deltautil.drop_u_compression(deltainfo) + if sidedata and self.hassidedata: serialized_sidedata = sidedatautil.serialize_sidedata(sidedata) sidedata_offset = offset + deltainfo.deltalen @@ -2482,7 +2514,7 @@ class revlog(object): node, sidedata_offset, len(serialized_sidedata), - COMP_MODE_INLINE, + compression_mode, ) self.index.append(e) diff --git a/mercurial/revlogutils/constants.py b/mercurial/revlogutils/constants.py --- a/mercurial/revlogutils/constants.py +++ b/mercurial/revlogutils/constants.py @@ -119,6 +119,10 @@ REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_I # These constants are used in revlog version >=2 to denote the compression used # for a chunk. +# Chunk use no compression, the data stored on disk can be directly use as +# chunk value. Without any header information prefixed. +COMP_MODE_PLAIN = 0 + # Chunk use a compression mode stored "inline" at the start of the chunk # itself. This is the mode always used for revlog version "0" and "1" COMP_MODE_INLINE = 2 diff --git a/mercurial/revlogutils/deltas.py b/mercurial/revlogutils/deltas.py --- a/mercurial/revlogutils/deltas.py +++ b/mercurial/revlogutils/deltas.py @@ -553,6 +553,24 @@ class _deltainfo(object): snapshotdepth = attr.ib() +def drop_u_compression(delta): + """turn into a "u" (no-compression) into no-compression without header + + This is useful for revlog format that has better compression method. + """ + assert delta.data[0] == b'u', delta.data[0] + return _deltainfo( + delta.distance, + delta.deltalen - 1, + (b'', delta.data[1]), + delta.base, + delta.chainbase, + delta.chainlen, + delta.compresseddeltalen, + delta.snapshotdepth, + ) + + def isgooddeltainfo(revlog, deltainfo, revinfo): """Returns True if the given delta is good. Good means that it is within the disk span, disk size, and chain length bounds that we know to be