# HG changeset patch # User Pierre-Yves David # Date 2021-05-03 16:19:16 # Node ID 130c9f7ed9147092a0132af57d095363f49bba31 # Parent 013c645dd28c3ba57e1524a2b0cbb0f43aaaf8db revlog: add a "data compression mode" entry in the index tuple That will make it possible to keep track of compression information in the revlog index, opening the way to more efficient revision restoration (in native code, but the python usage is already defeating performance work). We start with adding a new entry to the index tuple, using a value matching the current behavior. We will introduce storage and other value in later changesets. Differential Revision: https://phab.mercurial-scm.org/D10646 diff --git a/mercurial/bundlerepo.py b/mercurial/bundlerepo.py --- a/mercurial/bundlerepo.py +++ b/mercurial/bundlerepo.py @@ -105,6 +105,7 @@ class bundlerevlog(revlog.revlog): node, 0, 0, + revlog_constants.COMP_MODE_INLINE, ) self.index.append(e) self.bundlerevs.add(n) diff --git a/mercurial/cext/parsers.c b/mercurial/cext/parsers.c --- a/mercurial/cext/parsers.c +++ b/mercurial/cext/parsers.c @@ -668,7 +668,7 @@ void dirs_module_init(PyObject *mod); void manifest_module_init(PyObject *mod); void revlog_module_init(PyObject *mod); -static const int version = 18; +static const int version = 19; static void module_init(PyObject *mod) { diff --git a/mercurial/cext/revlog.c b/mercurial/cext/revlog.c --- a/mercurial/cext/revlog.c +++ b/mercurial/cext/revlog.c @@ -118,9 +118,9 @@ static Py_ssize_t inline_scan(indexObjec static int index_find_node(indexObject *self, const char *node); #if LONG_MAX == 0x7fffffffL -static const char *const tuple_format = PY23("Kiiiiiis#Ki", "Kiiiiiiy#Ki"); +static const char *const tuple_format = PY23("Kiiiiiis#KiB", "Kiiiiiiy#KiB"); #else -static const char *const tuple_format = PY23("kiiiiiis#ki", "kiiiiiiy#ki"); +static const char *const tuple_format = PY23("kiiiiiis#kiB", "kiiiiiiy#kiB"); #endif /* A RevlogNG v1 index entry is 64 bytes long. */ @@ -132,6 +132,8 @@ static const long v2_entry_size = 96; static const long format_v1 = 1; /* Internal only, could be any number */ static const long format_v2 = 2; /* Internal only, could be any number */ +static const char comp_mode_inline = 2; + static void raise_revlog_error(void) { PyObject *mod = NULL, *dict = NULL, *errclass = NULL; @@ -294,6 +296,7 @@ static PyObject *index_get(indexObject * uint64_t offset_flags, sidedata_offset; int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2, sidedata_comp_len; + char data_comp_mode; const char *c_node_id; const char *data; Py_ssize_t length = index_length(self); @@ -340,9 +343,11 @@ static PyObject *index_get(indexObject * sidedata_comp_len = getbe32(data + 72); } + data_comp_mode = comp_mode_inline; return Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2, c_node_id, - self->nodelen, sidedata_offset, sidedata_comp_len); + self->nodelen, sidedata_offset, sidedata_comp_len, + data_comp_mode); } /* * Pack header information in binary @@ -443,6 +448,7 @@ static PyObject *index_append(indexObjec { uint64_t offset_flags, sidedata_offset; int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2; + char data_comp_mode; Py_ssize_t c_node_id_len, sidedata_comp_len; const char *c_node_id; char *data; @@ -450,8 +456,9 @@ static PyObject *index_append(indexObjec if (!PyArg_ParseTuple(obj, tuple_format, &offset_flags, &comp_len, &uncomp_len, &base_rev, &link_rev, &parent_1, &parent_2, &c_node_id, &c_node_id_len, - &sidedata_offset, &sidedata_comp_len)) { - PyErr_SetString(PyExc_TypeError, "10-tuple required"); + &sidedata_offset, &sidedata_comp_len, + &data_comp_mode)) { + PyErr_SetString(PyExc_TypeError, "11-tuple required"); return NULL; } @@ -459,6 +466,12 @@ static PyObject *index_append(indexObjec PyErr_SetString(PyExc_TypeError, "invalid node"); return NULL; } + if (data_comp_mode != comp_mode_inline) { + PyErr_Format(PyExc_ValueError, + "invalid data compression mode: %i", + data_comp_mode); + return NULL; + } if (self->new_length == self->added_length) { size_t new_added_length = @@ -2761,9 +2774,9 @@ static int index_init(indexObject *self, self->entry_size = v1_entry_size; } - self->nullentry = - Py_BuildValue(PY23("iiiiiiis#ii", "iiiiiiiy#ii"), 0, 0, 0, -1, -1, - -1, -1, nullid, self->nodelen, 0, 0); + self->nullentry = Py_BuildValue(PY23("iiiiiiis#iiB", "iiiiiiiy#iiB"), 0, + 0, 0, -1, -1, -1, -1, nullid, + self->nodelen, 0, 0, comp_mode_inline); if (!self->nullentry) return -1; diff --git a/mercurial/policy.py b/mercurial/policy.py --- a/mercurial/policy.py +++ b/mercurial/policy.py @@ -80,7 +80,7 @@ def _importfrom(pkgname, modname): ('cext', 'bdiff'): 3, ('cext', 'mpatch'): 1, ('cext', 'osutil'): 4, - ('cext', 'parsers'): 18, + ('cext', 'parsers'): 19, } # map import request to other package or module diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py --- a/mercurial/pure/parsers.py +++ b/mercurial/pure/parsers.py @@ -54,7 +54,19 @@ class BaseIndexObject(object): # Size of a C long int, platform independent int_size = struct.calcsize(b'>i') # An empty index entry, used as a default value to be overridden, or nullrev - null_item = (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid, 0, 0) + null_item = ( + 0, + 0, + 0, + -1, + -1, + -1, + -1, + sha1nodeconstants.nullid, + 0, + 0, + revlog_constants.COMP_MODE_INLINE, + ) @util.propertycache def entry_size(self): @@ -135,7 +147,7 @@ class BaseIndexObject(object): def _unpack_entry(self, data): r = self.index_format.unpack(data) - r = r + (0, 0) + r = r + (0, 0, revlog_constants.COMP_MODE_INLINE) return r def pack_header(self, header): @@ -303,16 +315,17 @@ class Index2Mixin(object): self._extra[rev - self._lgt] = new def _unpack_entry(self, data): - return self.index_format.unpack(data) + return self.index_format.unpack(data) + ( + revlog_constants.COMP_MODE_INLINE, + ) def _pack_entry(self, entry): - return self.index_format.pack(*entry) + return self.index_format.pack(*entry[:10]) def entry_binary(self, rev): """return the raw binary string representing a revision""" entry = self[rev] - p = revlog_constants.INDEX_ENTRY_V2.pack(*entry) - return p + return self._pack_entry(entry) def pack_header(self, header): """pack header information as binary""" diff --git a/mercurial/revlog.py b/mercurial/revlog.py --- a/mercurial/revlog.py +++ b/mercurial/revlog.py @@ -35,6 +35,7 @@ from .i18n import _ from .pycompat import getattr from .revlogutils.constants import ( ALL_KINDS, + COMP_MODE_INLINE, FEATURES_BY_VERSION, FLAG_GENERALDELTA, FLAG_INLINE_DATA, @@ -336,6 +337,12 @@ class revlog(object): [9] sidedata chunk length: The size, in bytes, of the revision's side-data chunk. + + [10] data compression mode: + two bits that detail the way the data chunk is compressed on disk. + (see "COMP_MODE_*" constants for details). For revlog version 0 and + 1 this will always be COMP_MODE_INLINE. + """ _flagserrorclass = error.RevlogError @@ -2474,6 +2481,7 @@ class revlog(object): node, sidedata_offset, len(serialized_sidedata), + COMP_MODE_INLINE, ) self.index.append(e) diff --git a/mercurial/revlogutils/constants.py b/mercurial/revlogutils/constants.py --- a/mercurial/revlogutils/constants.py +++ b/mercurial/revlogutils/constants.py @@ -1,4 +1,4 @@ -# revlogdeltas.py - constant used for revlog logic +# revlogdeltas.py - constant used for revlog logic. # # Copyright 2005-2007 Olivia Mackall # Copyright 2018 Octobus @@ -114,6 +114,14 @@ REVIDX_FLAGS_ORDER = [ # bitmark for flags that could cause rawdata content change REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED +## chunk compression mode constants: +# These constants are used in revlog version >=2 to denote the compression used +# for a chunk. + +# Chunk use a compression mode stored "inline" at the start of the chunk +# itself. This is the mode always used for revlog version "0" and "1" +COMP_MODE_INLINE = 2 + SUPPORTED_FLAGS = { REVLOGV0: REVLOGV0_FLAGS, REVLOGV1: REVLOGV1_FLAGS, @@ -152,4 +160,5 @@ FEATURES_BY_VERSION = { }, } + SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000 diff --git a/mercurial/revlogutils/revlogv0.py b/mercurial/revlogutils/revlogv0.py --- a/mercurial/revlogutils/revlogv0.py +++ b/mercurial/revlogutils/revlogv0.py @@ -9,6 +9,7 @@ from __future__ import absolute_import from ..node import sha1nodeconstants from .constants import ( + COMP_MODE_INLINE, INDEX_ENTRY_V0, ) from ..i18n import _ @@ -42,7 +43,19 @@ def offset_type(offset, type): class revlogoldindex(list): entry_size = INDEX_ENTRY_V0.size - null_item = (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid, 0, 0) + null_item = ( + 0, + 0, + 0, + -1, + -1, + -1, + -1, + sha1nodeconstants.nullid, + 0, + 0, + COMP_MODE_INLINE, + ) @property def nodemap(self): @@ -138,6 +151,7 @@ def parse_index_v0(data, inline): e[6], 0, # no side data support 0, # no side data support + COMP_MODE_INLINE, ) index.append(e2) nodemap[e[6]] = n diff --git a/mercurial/unionrepo.py b/mercurial/unionrepo.py --- a/mercurial/unionrepo.py +++ b/mercurial/unionrepo.py @@ -31,6 +31,10 @@ from . import ( vfs as vfsmod, ) +from .revlogutils import ( + constants as revlog_constants, +) + class unionrevlog(revlog.revlog): def __init__(self, opener, radix, revlog2, linkmapper): @@ -65,6 +69,7 @@ class unionrevlog(revlog.revlog): node, _sdo, _sds, + _dcm, ) = rev flags = _start & 0xFFFF @@ -99,6 +104,7 @@ class unionrevlog(revlog.revlog): node, 0, # sidedata offset 0, # sidedata size + revlog_constants.COMP_MODE_INLINE, ) self.index.append(e) self.bundlerevs.add(n) diff --git a/tests/test-parseindex2.py b/tests/test-parseindex2.py --- a/tests/test-parseindex2.py +++ b/tests/test-parseindex2.py @@ -21,6 +21,9 @@ from mercurial import ( policy, pycompat, ) +from mercurial.revlogutils import ( + constants, +) parsers = policy.importmod('parsers') @@ -49,7 +52,7 @@ def py_parseindex(data, inline): cache = (0, data) while off <= l: e = struct.unpack(indexformatng, data[off : off + s]) - e = e + (0, 0) + e = e + (0, 0, constants.COMP_MODE_INLINE) nodemap[e[7]] = n append(e) n += 1 @@ -59,7 +62,7 @@ def py_parseindex(data, inline): else: while off <= l: e = struct.unpack(indexformatng, data[off : off + s]) - e = e + (0, 0) + e = e + (0, 0, constants.COMP_MODE_INLINE) nodemap[e[7]] = n append(e) n += 1 @@ -242,7 +245,19 @@ class parseindex2tests(unittest.TestCase break def testminusone(self): - want = (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid, 0, 0) + want = ( + 0, + 0, + 0, + -1, + -1, + -1, + -1, + sha1nodeconstants.nullid, + 0, + 0, + constants.COMP_MODE_INLINE, + ) index, junk = parsers.parse_index2(data_inlined, True) got = index[-1] self.assertEqual(want, got) # inline data @@ -264,7 +279,20 @@ class parseindex2tests(unittest.TestCase # node won't matter for this test, let's just make sure # they don't collide. Other data don't matter either. node = hexrev(p1) + hexrev(p2) + b'.' * 12 - index.append((0, 0, 12, 1, 34, p1, p2, node, 0, 0)) + e = ( + 0, + 0, + 12, + 1, + 34, + p1, + p2, + node, + 0, + 0, + constants.COMP_MODE_INLINE, + ) + index.append(e) appendrev(4) appendrev(5)