# HG changeset patch # User Pierre-Yves David # Date 2021-05-03 19:34:02 # Node ID 2b69555e4875b924c33899e03943d65a4bd58378 # Parent ff9fd7107d118efc7a1e82f1656a17f3a237f1e2 revlog: introduce a compression mode for sidedata in the revlog index We will use this for compression for the sidedata payload. Differential Revision: https://phab.mercurial-scm.org/D10653 diff --git a/mercurial/bundlerepo.py b/mercurial/bundlerepo.py --- a/mercurial/bundlerepo.py +++ b/mercurial/bundlerepo.py @@ -106,6 +106,7 @@ class bundlerevlog(revlog.revlog): 0, 0, revlog_constants.COMP_MODE_INLINE, + revlog_constants.COMP_MODE_INLINE, ) self.index.append(e) self.bundlerevs.add(n) diff --git a/mercurial/cext/revlog.c b/mercurial/cext/revlog.c --- a/mercurial/cext/revlog.c +++ b/mercurial/cext/revlog.c @@ -118,9 +118,9 @@ static Py_ssize_t inline_scan(indexObjec static int index_find_node(indexObject *self, const char *node); #if LONG_MAX == 0x7fffffffL -static const char *const tuple_format = PY23("Kiiiiiis#KiB", "Kiiiiiiy#KiB"); +static const char *const tuple_format = PY23("Kiiiiiis#KiBB", "Kiiiiiiy#KiBB"); #else -static const char *const tuple_format = PY23("kiiiiiis#kiB", "kiiiiiiy#kiB"); +static const char *const tuple_format = PY23("kiiiiiis#kiBB", "kiiiiiiy#kiBB"); #endif /* A RevlogNG v1 index entry is 64 bytes long. */ @@ -296,7 +296,7 @@ static PyObject *index_get(indexObject * uint64_t offset_flags, sidedata_offset; int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2, sidedata_comp_len; - char data_comp_mode; + char data_comp_mode, sidedata_comp_mode; const char *c_node_id; const char *data; Py_ssize_t length = index_length(self); @@ -339,16 +339,18 @@ static PyObject *index_get(indexObject * sidedata_offset = 0; sidedata_comp_len = 0; data_comp_mode = comp_mode_inline; + sidedata_comp_mode = comp_mode_inline; } else { sidedata_offset = getbe64(data + 64); sidedata_comp_len = getbe32(data + 72); - data_comp_mode = data[76]; + data_comp_mode = data[76] & 3; + sidedata_comp_mode = ((data[76] >> 2) & 3); } return Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2, c_node_id, self->nodelen, sidedata_offset, sidedata_comp_len, - data_comp_mode); + data_comp_mode, sidedata_comp_mode); } /* * Pack header information in binary @@ -449,16 +451,17 @@ static PyObject *index_append(indexObjec { uint64_t offset_flags, sidedata_offset; int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2; - char data_comp_mode; + char data_comp_mode, sidedata_comp_mode; Py_ssize_t c_node_id_len, sidedata_comp_len; const char *c_node_id; + char comp_field; char *data; if (!PyArg_ParseTuple(obj, tuple_format, &offset_flags, &comp_len, &uncomp_len, &base_rev, &link_rev, &parent_1, &parent_2, &c_node_id, &c_node_id_len, &sidedata_offset, &sidedata_comp_len, - &data_comp_mode)) { + &data_comp_mode, &sidedata_comp_mode)) { PyErr_SetString(PyExc_TypeError, "11-tuple required"); return NULL; } @@ -467,12 +470,20 @@ static PyObject *index_append(indexObjec PyErr_SetString(PyExc_TypeError, "invalid node"); return NULL; } - if (self->format_version == format_v1 && - data_comp_mode != comp_mode_inline) { - PyErr_Format(PyExc_ValueError, - "invalid data compression mode: %i", - data_comp_mode); - return NULL; + if (self->format_version == format_v1) { + + if (data_comp_mode != comp_mode_inline) { + PyErr_Format(PyExc_ValueError, + "invalid data compression mode: %i", + data_comp_mode); + return NULL; + } + if (sidedata_comp_mode != comp_mode_inline) { + PyErr_Format(PyExc_ValueError, + "invalid sidedata compression mode: %i", + sidedata_comp_mode); + return NULL; + } } if (self->new_length == self->added_length) { @@ -501,7 +512,9 @@ static PyObject *index_append(indexObjec if (self->format_version == format_v2) { putbe64(sidedata_offset, data + 64); putbe32(sidedata_comp_len, data + 72); - data[76] = (char)data_comp_mode; + comp_field = data_comp_mode & 3; + comp_field = comp_field | (sidedata_comp_mode & 3) << 2; + data[76] = comp_field; /* Padding for 96 bytes alignment */ memset(data + 77, 0, self->entry_size - 77); } @@ -2777,9 +2790,9 @@ static int index_init(indexObject *self, self->entry_size = v1_entry_size; } - self->nullentry = Py_BuildValue(PY23("iiiiiiis#iiB", "iiiiiiiy#iiB"), 0, - 0, 0, -1, -1, -1, -1, nullid, - self->nodelen, 0, 0, comp_mode_inline); + self->nullentry = Py_BuildValue( + PY23("iiiiiiis#iiBB", "iiiiiiiy#iiBB"), 0, 0, 0, -1, -1, -1, -1, + nullid, self->nodelen, 0, 0, comp_mode_inline, comp_mode_inline); if (!self->nullentry) return -1; diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py --- a/mercurial/pure/parsers.py +++ b/mercurial/pure/parsers.py @@ -66,6 +66,7 @@ class BaseIndexObject(object): 0, 0, revlog_constants.COMP_MODE_INLINE, + revlog_constants.COMP_MODE_INLINE, ) @util.propertycache @@ -147,7 +148,12 @@ class BaseIndexObject(object): def _unpack_entry(self, data): r = self.index_format.unpack(data) - r = r + (0, 0, revlog_constants.COMP_MODE_INLINE) + r = r + ( + 0, + 0, + revlog_constants.COMP_MODE_INLINE, + revlog_constants.COMP_MODE_INLINE, + ) return r def pack_header(self, header): @@ -315,10 +321,19 @@ class Index2Mixin(object): self._extra[rev - self._lgt] = new def _unpack_entry(self, data): - return self.index_format.unpack(data) + data = self.index_format.unpack(data) + entry = data[:10] + data_comp = data[10] & 3 + sidedata_comp = (data[10] & (3 << 2)) >> 2 + return entry + (data_comp, sidedata_comp) def _pack_entry(self, entry): - return self.index_format.pack(*entry[:11]) + data = entry[:10] + data_comp = entry[10] & 3 + sidedata_comp = (entry[11] & 3) << 2 + data += (data_comp | sidedata_comp,) + + return self.index_format.pack(*data) def entry_binary(self, rev): """return the raw binary string representing a revision""" diff --git a/mercurial/revlog.py b/mercurial/revlog.py --- a/mercurial/revlog.py +++ b/mercurial/revlog.py @@ -345,6 +345,9 @@ class revlog(object): (see "COMP_MODE_*" constants for details). For revlog version 0 and 1 this will always be COMP_MODE_INLINE. + [11] side-data compression mode: + two bits that detail the way the sidedata chunk is compressed on disk. + (see "COMP_MODE_*" constants for details) """ _flagserrorclass = error.RevlogError @@ -2517,7 +2520,9 @@ class revlog(object): compression_mode = COMP_MODE_PLAIN deltainfo = deltautil.drop_u_compression(deltainfo) + sidedata_compression_mode = COMP_MODE_INLINE if sidedata and self.hassidedata: + sidedata_compression_mode = COMP_MODE_PLAIN serialized_sidedata = sidedatautil.serialize_sidedata(sidedata) sidedata_offset = offset + deltainfo.deltalen else: @@ -2539,6 +2544,7 @@ class revlog(object): sidedata_offset, len(serialized_sidedata), compression_mode, + sidedata_compression_mode, ) self.index.append(e) diff --git a/mercurial/revlogutils/revlogv0.py b/mercurial/revlogutils/revlogv0.py --- a/mercurial/revlogutils/revlogv0.py +++ b/mercurial/revlogutils/revlogv0.py @@ -55,6 +55,7 @@ class revlogoldindex(list): 0, 0, COMP_MODE_INLINE, + COMP_MODE_INLINE, ) @property diff --git a/mercurial/unionrepo.py b/mercurial/unionrepo.py --- a/mercurial/unionrepo.py +++ b/mercurial/unionrepo.py @@ -70,6 +70,7 @@ class unionrevlog(revlog.revlog): _sdo, _sds, _dcm, + _sdcm, ) = rev flags = _start & 0xFFFF @@ -105,6 +106,7 @@ class unionrevlog(revlog.revlog): 0, # sidedata offset 0, # sidedata size revlog_constants.COMP_MODE_INLINE, + revlog_constants.COMP_MODE_INLINE, ) self.index.append(e) self.bundlerevs.add(n) diff --git a/tests/test-parseindex2.py b/tests/test-parseindex2.py --- a/tests/test-parseindex2.py +++ b/tests/test-parseindex2.py @@ -52,7 +52,12 @@ def py_parseindex(data, inline): cache = (0, data) while off <= l: e = struct.unpack(indexformatng, data[off : off + s]) - e = e + (0, 0, constants.COMP_MODE_INLINE) + e = e + ( + 0, + 0, + constants.COMP_MODE_INLINE, + constants.COMP_MODE_INLINE, + ) nodemap[e[7]] = n append(e) n += 1 @@ -62,7 +67,12 @@ def py_parseindex(data, inline): else: while off <= l: e = struct.unpack(indexformatng, data[off : off + s]) - e = e + (0, 0, constants.COMP_MODE_INLINE) + e = e + ( + 0, + 0, + constants.COMP_MODE_INLINE, + constants.COMP_MODE_INLINE, + ) nodemap[e[7]] = n append(e) n += 1 @@ -257,6 +267,7 @@ class parseindex2tests(unittest.TestCase 0, 0, constants.COMP_MODE_INLINE, + constants.COMP_MODE_INLINE, ) index, junk = parsers.parse_index2(data_inlined, True) got = index[-1] @@ -291,6 +302,7 @@ class parseindex2tests(unittest.TestCase 0, 0, constants.COMP_MODE_INLINE, + constants.COMP_MODE_INLINE, ) index.append(e)