# HG changeset patch # User Jun Wu # Date 2016-12-06 11:44:49 # Node ID 6146d5acee69f6e52650a239b442b299eeac2675 # Parent 541949a10a68f4c2a415f73b961c1e92af95c293 parsers: use buffer to store revlog index Previously, the revlog index passed to parse_index2 must be a "string", which means we have to read the whole revlog index into memory. This patch makes the code accept a generic Py_buffer, to be more flexible - it could be a "string", or anything that implements the buffer interface, like a mmap-ed region. Note: ideally we want to remove the "data" field. However, it is still used in parse_index2: if (idx->inlined) { cache = Py_BuildValue("iO", 0, idx->data); .... } .... tuple = Py_BuildValue("NN", idx, cache); .... return tuple; Its only users are revlogio.parseindex and revlog.__init__: # revlogio.parseindex index, cache = parsers.parse_index2(data, inline) return index, getattr(index, 'nodemap', None), cache # revlog.__init__ d = self._io.parseindex(indexdata, self._inline) self.index, nodemap, self._chunkcache = d Maybe we could move the logic (testing inline and returnning "data" object) to revlog.py. But that should be a separate patch. diff --git a/mercurial/parsers.c b/mercurial/parsers.c --- a/mercurial/parsers.c +++ b/mercurial/parsers.c @@ -753,6 +753,7 @@ typedef struct { PyObject_HEAD /* Type-specific fields go here. */ PyObject *data; /* raw bytes of index */ + Py_buffer buf; /* buffer of data */ PyObject **cache; /* cached tuples */ const char **offsets; /* populated on demand */ Py_ssize_t raw_length; /* original number of elements */ @@ -808,7 +809,7 @@ static const char *index_deref(indexObje return self->offsets[pos]; } - return PyBytes_AS_STRING(self->data) + pos * v1_hdrsize; + return (const char *)(self->buf.buf) + pos * v1_hdrsize; } static inline int index_get_parents(indexObject *self, Py_ssize_t rev, @@ -2389,9 +2390,9 @@ static int index_assign_subscript(indexO */ static Py_ssize_t inline_scan(indexObject *self, const char **offsets) { - const char *data = PyBytes_AS_STRING(self->data); + const char *data = (const char *)self->buf.buf; Py_ssize_t pos = 0; - Py_ssize_t end = PyBytes_GET_SIZE(self->data); + Py_ssize_t end = self->buf.len; long incr = v1_hdrsize; Py_ssize_t len = 0; @@ -2425,6 +2426,7 @@ static int index_init(indexObject *self, self->added = NULL; self->cache = NULL; self->data = NULL; + memset(&self->buf, 0, sizeof(self->buf)); self->headrevs = NULL; self->filteredrevs = Py_None; Py_INCREF(Py_None); @@ -2433,11 +2435,15 @@ static int index_init(indexObject *self, if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj)) return -1; - if (!PyBytes_Check(data_obj)) { - PyErr_SetString(PyExc_TypeError, "data is not a string"); + if (!PyObject_CheckBuffer(data_obj)) { + PyErr_SetString(PyExc_TypeError, + "data does not support buffer interface"); return -1; } - size = PyBytes_GET_SIZE(data_obj); + + if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1) + return -1; + size = self->buf.len; self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj); self->data = data_obj; @@ -2478,6 +2484,10 @@ static void index_dealloc(indexObject *s { _index_clearcaches(self); Py_XDECREF(self->filteredrevs); + if (self->buf.buf) { + PyBuffer_Release(&self->buf); + memset(&self->buf, 0, sizeof(self->buf)); + } Py_XDECREF(self->data); Py_XDECREF(self->added); PyObject_Del(self); @@ -2577,7 +2587,8 @@ static PyTypeObject indexType = { * follows: * * index: an index object that lazily parses RevlogNG records - * cache: if data is inlined, a tuple (index_file_content, 0), else None + * cache: if data is inlined, a tuple (0, index_file_content), else None + * index_file_content could be a string, or a buffer * * added complications are for backwards compatibility */