# HG changeset patch # User Pierre-Yves David # Date 2021-04-01 09:31:54 # Node ID d57386e5c80e7f08b7b111c4777a8575011779e4 # Parent 5e64c93d5f9407d0d29909c14bb3d39fb9817aca revlog: have an explicit "pack_header" method Having to pass the version header when retrieving the binary version of every single entry is a bit silly. So we extract that special logic in its own method. This also prepare the move to newer revlog format, not storing the header within an actual entry… Differential Revision: https://phab.mercurial-scm.org/D10510 diff --git a/mercurial/cext/revlog.c b/mercurial/cext/revlog.c --- a/mercurial/cext/revlog.c +++ b/mercurial/cext/revlog.c @@ -343,18 +343,28 @@ static PyObject *index_get(indexObject * } } /* + * Pack header information in binary + */ +static PyObject *index_pack_header(indexObject *self, PyObject *args) +{ + int header; + char out[4]; + if (!PyArg_ParseTuple(args, "I", &header)) { + return NULL; + } + putbe32(header, out); + return PyBytes_FromStringAndSize(out, 4); +} +/* * Return the raw binary string representing a revision */ -static PyObject *index_entry_binary(indexObject *self, PyObject *args) +static PyObject *index_entry_binary(indexObject *self, PyObject *value) { long rev; - int header; const char *data; - char entry[v2_hdrsize]; - Py_ssize_t length = index_length(self); - if (!PyArg_ParseTuple(args, "lI", &rev, &header)) { + if (!pylong_to_long(value, &rev)) { return NULL; } if (rev < 0 || rev >= length) { @@ -367,10 +377,8 @@ static PyObject *index_entry_binary(inde if (data == NULL) return NULL; if (rev == 0) { - /* put the header at the start of the first entry */ - memcpy(entry, data, self->hdrsize); - putbe32(header, entry); - return PyBytes_FromStringAndSize(entry, self->hdrsize); + /* the header is eating the start of the first entry */ + return PyBytes_FromStringAndSize(data + 4, self->hdrsize - 4); } return PyBytes_FromStringAndSize(data, self->hdrsize); } @@ -2891,8 +2899,10 @@ static PyMethodDef index_methods[] = { {"shortest", (PyCFunction)index_shortest, METH_VARARGS, "find length of shortest hex nodeid of a binary ID"}, {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"}, - {"entry_binary", (PyCFunction)index_entry_binary, METH_VARARGS, + {"entry_binary", (PyCFunction)index_entry_binary, METH_O, "return an entry in binary form"}, + {"pack_header", (PyCFunction)index_pack_header, METH_VARARGS, + "pack the revlog header information into binary"}, {NULL} /* Sentinel */ }; diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py --- a/mercurial/pure/parsers.py +++ b/mercurial/pure/parsers.py @@ -127,14 +127,17 @@ class BaseIndexObject(object): r = (offset_type(0, gettype(r[0])),) + r[1:] return r - def entry_binary(self, rev, header): + def pack_header(self, header): + """pack header information as binary""" + v_fmt = revlog_constants.INDEX_HEADER + return v_fmt.pack(header) + + def entry_binary(self, rev): """return the raw binary string representing a revision""" entry = self[rev] p = revlog_constants.INDEX_ENTRY_V1.pack(*entry) if rev == 0: - v_fmt = revlog_constants.INDEX_HEADER - v_bin = v_fmt.pack(header) - p = v_bin + p[v_fmt.size :] + p = p[revlog_constants.INDEX_HEADER.size :] return p @@ -286,14 +289,12 @@ class Index2Mixin(object): msg = b"cannot rewrite entries outside of this transaction" raise KeyError(msg) - def entry_binary(self, rev, header): + def entry_binary(self, rev): """return the raw binary string representing a revision""" entry = self[rev] p = revlog_constants.INDEX_ENTRY_V2.pack(*entry) if rev == 0: - v_fmt = revlog_constants.INDEX_HEADER - v_bin = v_fmt.pack(header) - p = v_bin + p[v_fmt.size :] + p = p[revlog_constants.INDEX_HEADER.size :] return p diff --git a/mercurial/revlog.py b/mercurial/revlog.py --- a/mercurial/revlog.py +++ b/mercurial/revlog.py @@ -266,7 +266,7 @@ class revlogoldindex(list): return (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid) return list.__getitem__(self, i) - def entry_binary(self, rev, header): + def entry_binary(self, rev): """return the raw binary string representing a revision""" entry = self[rev] if gettype(entry[0]): @@ -284,6 +284,10 @@ class revlogoldindex(list): ) return INDEX_ENTRY_V0.pack(*e2) + def pack_header(self, header): + """Pack header information in binary""" + return b'' + def parse_index_v0(data, inline): s = INDEX_ENTRY_V0.size @@ -2041,7 +2045,10 @@ class revlog(object): self.version &= ~FLAG_INLINE_DATA self._inline = False for i in self: - e = self.index.entry_binary(i, self.version) + e = self.index.entry_binary(i) + if i == 0: + header = self.index.pack_header(self.version) + e = header + e fp.write(e) # the temp file replace the real index when we exit the context @@ -2363,7 +2370,10 @@ class revlog(object): e = e[:8] self.index.append(e) - entry = self.index.entry_binary(curr, self.version) + entry = self.index.entry_binary(curr) + if curr == 0: + header = self.index.pack_header(self.version) + entry = header + entry self._writeentry( transaction, ifh, @@ -3216,5 +3226,8 @@ class revlog(object): for i, entry in enumerate(new_entries): rev = startrev + i self.index.replace_sidedata_info(rev, entry[8], entry[9]) - packed = self.index.entry_binary(rev, self.version) + packed = self.index.entry_binary(rev) + if rev == 0: + header = self.index.pack_header(self.version) + packed = header + packed fp.write(packed) diff --git a/rust/hg-cpython/src/revlog.rs b/rust/hg-cpython/src/revlog.rs --- a/rust/hg-cpython/src/revlog.rs +++ b/rust/hg-cpython/src/revlog.rs @@ -177,6 +177,11 @@ py_class!(pub class MixedIndex |py| { self.call_cindex(py, "entry_binary", args, kw) } + /// return a binary packed version of the header + def pack_header(&self, *args, **kw) -> PyResult { + self.call_cindex(py, "pack_header", args, kw) + } + /// get an index entry def get(&self, *args, **kw) -> PyResult { self.call_cindex(py, "get", args, kw)