##// END OF EJS Templates
revlog: store new index entries as binary...
Joerg Sonnenberger -
r46548:0ce15a8c default
parent child Browse files
Show More
@@ -82,9 +82,10 b' struct indexObjectStruct {'
82 PyObject *data; /* raw bytes of index */
82 PyObject *data; /* raw bytes of index */
83 Py_buffer buf; /* buffer of data */
83 Py_buffer buf; /* buffer of data */
84 const char **offsets; /* populated on demand */
84 const char **offsets; /* populated on demand */
85 Py_ssize_t raw_length; /* original number of elements */
85 Py_ssize_t length; /* current on-disk number of elements */
86 Py_ssize_t length; /* current number of elements */
86 unsigned new_length; /* number of added elements */
87 PyObject *added; /* populated on demand */
87 unsigned added_length; /* space reserved for added elements */
88 char *added; /* populated on demand */
88 PyObject *headrevs; /* cache, invalidated on changes */
89 PyObject *headrevs; /* cache, invalidated on changes */
89 PyObject *filteredrevs; /* filtered revs set */
90 PyObject *filteredrevs; /* filtered revs set */
90 nodetree nt; /* base-16 trie */
91 nodetree nt; /* base-16 trie */
@@ -97,9 +98,7 b' struct indexObjectStruct {'
97
98
98 static Py_ssize_t index_length(const indexObject *self)
99 static Py_ssize_t index_length(const indexObject *self)
99 {
100 {
100 if (self->added == NULL)
101 return self->length + self->new_length;
101 return self->length;
102 return self->length + PyList_GET_SIZE(self->added);
103 }
102 }
104
103
105 static PyObject *nullentry = NULL;
104 static PyObject *nullentry = NULL;
@@ -155,11 +154,14 b' cleanup:'
155 */
154 */
156 static const char *index_deref(indexObject *self, Py_ssize_t pos)
155 static const char *index_deref(indexObject *self, Py_ssize_t pos)
157 {
156 {
157 if (pos >= self->length)
158 return self->added + (pos - self->length) * v1_hdrsize;
159
158 if (self->inlined && pos > 0) {
160 if (self->inlined && pos > 0) {
159 if (self->offsets == NULL) {
161 if (self->offsets == NULL) {
160 Py_ssize_t ret;
162 Py_ssize_t ret;
161 self->offsets = PyMem_Malloc(self->raw_length *
163 self->offsets =
162 sizeof(*self->offsets));
164 PyMem_Malloc(self->length * sizeof(*self->offsets));
163 if (self->offsets == NULL)
165 if (self->offsets == NULL)
164 return (const char *)PyErr_NoMemory();
166 return (const char *)PyErr_NoMemory();
165 ret = inline_scan(self, self->offsets);
167 ret = inline_scan(self, self->offsets);
@@ -182,23 +184,11 b' static const char *index_deref(indexObje'
182 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
184 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
183 int maxrev)
185 int maxrev)
184 {
186 {
185 if (rev >= self->length) {
186 long tmp;
187 PyObject *tuple =
188 PyList_GET_ITEM(self->added, rev - self->length);
189 if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 5), &tmp)) {
190 return -1;
191 }
192 ps[0] = (int)tmp;
193 if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 6), &tmp)) {
194 return -1;
195 }
196 ps[1] = (int)tmp;
197 } else {
198 const char *data = index_deref(self, rev);
187 const char *data = index_deref(self, rev);
188
199 ps[0] = getbe32(data + 24);
189 ps[0] = getbe32(data + 24);
200 ps[1] = getbe32(data + 28);
190 ps[1] = getbe32(data + 28);
201 }
191
202 /* If index file is corrupted, ps[] may point to invalid revisions. So
192 /* If index file is corrupted, ps[] may point to invalid revisions. So
203 * there is a risk of buffer overflow to trust them unconditionally. */
193 * there is a risk of buffer overflow to trust them unconditionally. */
204 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
194 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
@@ -237,29 +227,13 b' static int HgRevlogIndex_GetParents(PyOb'
237
227
238 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
228 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
239 {
229 {
230 const char *data;
240 uint64_t offset;
231 uint64_t offset;
241 if (rev == nullrev) {
232
233 if (rev == nullrev)
242 return 0;
234 return 0;
243 }
235
244 if (rev >= self->length) {
236 data = index_deref(self, rev);
245 PyObject *tuple;
246 PyObject *pylong;
247 PY_LONG_LONG tmp;
248 tuple = PyList_GET_ITEM(self->added, rev - self->length);
249 pylong = PyTuple_GET_ITEM(tuple, 0);
250 tmp = PyLong_AsLongLong(pylong);
251 if (tmp == -1 && PyErr_Occurred()) {
252 return -1;
253 }
254 if (tmp < 0) {
255 PyErr_Format(PyExc_OverflowError,
256 "revlog entry size out of bound (%lld)",
257 (long long)tmp);
258 return -1;
259 }
260 offset = (uint64_t)tmp;
261 } else {
262 const char *data = index_deref(self, rev);
263 offset = getbe32(data + 4);
237 offset = getbe32(data + 4);
264 if (rev == 0) {
238 if (rev == 0) {
265 /* mask out version number for the first entry */
239 /* mask out version number for the first entry */
@@ -268,44 +242,27 b' static inline int64_t index_get_start(in'
268 uint32_t offset_high = getbe32(data);
242 uint32_t offset_high = getbe32(data);
269 offset |= ((uint64_t)offset_high) << 32;
243 offset |= ((uint64_t)offset_high) << 32;
270 }
244 }
271 }
272 return (int64_t)(offset >> 16);
245 return (int64_t)(offset >> 16);
273 }
246 }
274
247
275 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
248 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
276 {
249 {
277 if (rev == nullrev) {
250 const char *data;
251 int tmp;
252
253 if (rev == nullrev)
278 return 0;
254 return 0;
279 }
255
280 if (rev >= self->length) {
256 data = index_deref(self, rev);
281 PyObject *tuple;
257
282 PyObject *pylong;
258 tmp = (int)getbe32(data + 8);
283 long ret;
284 tuple = PyList_GET_ITEM(self->added, rev - self->length);
285 pylong = PyTuple_GET_ITEM(tuple, 1);
286 ret = PyInt_AsLong(pylong);
287 if (ret == -1 && PyErr_Occurred()) {
288 return -1;
289 }
290 if (ret < 0 || ret > (long)INT_MAX) {
291 PyErr_Format(PyExc_OverflowError,
292 "revlog entry size out of bound (%ld)",
293 ret);
294 return -1;
295 }
296 return (int)ret;
297 } else {
298 const char *data = index_deref(self, rev);
299 int tmp = (int)getbe32(data + 8);
300 if (tmp < 0) {
259 if (tmp < 0) {
301 PyErr_Format(PyExc_OverflowError,
260 PyErr_Format(PyExc_OverflowError,
302 "revlog entry size out of bound (%d)",
261 "revlog entry size out of bound (%d)", tmp);
303 tmp);
304 return -1;
262 return -1;
305 }
263 }
306 return tmp;
264 return tmp;
307 }
265 }
308 }
309
266
310 /*
267 /*
311 * RevlogNG format (all in big endian, data may be inlined):
268 * RevlogNG format (all in big endian, data may be inlined):
@@ -337,19 +294,16 b' static PyObject *index_get(indexObject *'
337 return NULL;
294 return NULL;
338 }
295 }
339
296
340 if (pos >= self->length) {
341 PyObject *obj;
342 obj = PyList_GET_ITEM(self->added, pos - self->length);
343 Py_INCREF(obj);
344 return obj;
345 }
346
347 data = index_deref(self, pos);
297 data = index_deref(self, pos);
348 if (data == NULL)
298 if (data == NULL)
349 return NULL;
299 return NULL;
350
300
351 offset_flags = getbe32(data + 4);
301 offset_flags = getbe32(data + 4);
352 if (pos == 0) /* mask out version number for the first entry */
302 /*
303 * The first entry on-disk needs the version number masked out,
304 * but this doesn't apply if entries are added to an empty index.
305 */
306 if (self->length && pos == 0)
353 offset_flags &= 0xFFFF;
307 offset_flags &= 0xFFFF;
354 else {
308 else {
355 uint32_t offset_high = getbe32(data);
309 uint32_t offset_high = getbe32(data);
@@ -383,13 +337,6 b' static const char *index_node(indexObjec'
383 if (pos >= length)
337 if (pos >= length)
384 return NULL;
338 return NULL;
385
339
386 if (pos >= self->length) {
387 PyObject *tuple, *str;
388 tuple = PyList_GET_ITEM(self->added, pos - self->length);
389 str = PyTuple_GetItem(tuple, 7);
390 return str ? PyBytes_AS_STRING(str) : NULL;
391 }
392
393 data = index_deref(self, pos);
340 data = index_deref(self, pos);
394 return data ? data + 32 : NULL;
341 return data ? data + 32 : NULL;
395 }
342 }
@@ -423,30 +370,48 b' static int node_check(PyObject *obj, cha'
423
370
424 static PyObject *index_append(indexObject *self, PyObject *obj)
371 static PyObject *index_append(indexObject *self, PyObject *obj)
425 {
372 {
426 char *node;
373 unsigned long offset_flags;
427 Py_ssize_t len;
374 int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
375 Py_ssize_t c_node_id_len;
376 const char *c_node_id;
377 char *data;
428
378
429 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
379 if (!PyArg_ParseTuple(obj, tuple_format, &offset_flags, &comp_len,
380 &uncomp_len, &base_rev, &link_rev, &parent_1,
381 &parent_2, &c_node_id, &c_node_id_len)) {
430 PyErr_SetString(PyExc_TypeError, "8-tuple required");
382 PyErr_SetString(PyExc_TypeError, "8-tuple required");
431 return NULL;
383 return NULL;
432 }
384 }
385 if (c_node_id_len != 20 && c_node_id_len != 32) {
386 PyErr_SetString(PyExc_TypeError, "invalid node");
387 return NULL;
388 }
433
389
434 if (node_check(PyTuple_GET_ITEM(obj, 7), &node) == -1)
390 if (self->new_length == self->added_length) {
435 return NULL;
391 size_t new_added_length =
436
392 self->added_length ? self->added_length * 2 : 4096;
437 len = index_length(self);
393 void *new_added =
438
394 PyMem_Realloc(self->added, new_added_length * v1_hdrsize);
439 if (self->added == NULL) {
395 if (!new_added)
440 self->added = PyList_New(0);
396 return PyErr_NoMemory();
441 if (self->added == NULL)
397 self->added = new_added;
442 return NULL;
398 self->added_length = new_added_length;
443 }
399 }
444
400 rev = self->length + self->new_length;
445 if (PyList_Append(self->added, obj) == -1)
401 data = self->added + v1_hdrsize * self->new_length++;
446 return NULL;
402 putbe32(offset_flags >> 32, data);
403 putbe32(offset_flags & 0xffffffffU, data + 4);
404 putbe32(comp_len, data + 8);
405 putbe32(uncomp_len, data + 12);
406 putbe32(base_rev, data + 16);
407 putbe32(link_rev, data + 20);
408 putbe32(parent_1, data + 24);
409 putbe32(parent_2, data + 28);
410 memcpy(data + 32, c_node_id, c_node_id_len);
411 memset(data + 32 + c_node_id_len, 0, 32 - c_node_id_len);
447
412
448 if (self->ntinitialized)
413 if (self->ntinitialized)
449 nt_insert(&self->nt, node, (int)len);
414 nt_insert(&self->nt, c_node_id, rev);
450
415
451 Py_CLEAR(self->headrevs);
416 Py_CLEAR(self->headrevs);
452 Py_RETURN_NONE;
417 Py_RETURN_NONE;
@@ -473,20 +438,8 b' static PyObject *index_stats(indexObject'
473 Py_CLEAR(t); \
438 Py_CLEAR(t); \
474 } while (0)
439 } while (0)
475
440
476 if (self->added) {
441 if (self->added_length)
477 Py_ssize_t len = PyList_GET_SIZE(self->added);
442 istat(new_length, "index entries added");
478 s = PyBytes_FromString("index entries added");
479 t = PyInt_FromSsize_t(len);
480 if (!s || !t)
481 goto bail;
482 if (PyDict_SetItem(obj, s, t) == -1)
483 goto bail;
484 Py_CLEAR(s);
485 Py_CLEAR(t);
486 }
487
488 if (self->raw_length != self->length)
489 istat(raw_length, "revs on disk");
490 istat(length, "revs in memory");
443 istat(length, "revs in memory");
491 istat(ntlookups, "node trie lookups");
444 istat(ntlookups, "node trie lookups");
492 istat(ntmisses, "node trie misses");
445 istat(ntmisses, "node trie misses");
@@ -998,22 +951,11 b' static inline int index_baserev(indexObj'
998 const char *data;
951 const char *data;
999 int result;
952 int result;
1000
953
1001 if (rev >= self->length) {
954 data = index_deref(self, rev);
1002 PyObject *tuple =
955 if (data == NULL)
1003 PyList_GET_ITEM(self->added, rev - self->length);
1004 long ret;
1005 if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 3), &ret)) {
1006 return -2;
956 return -2;
1007 }
957 result = getbe32(data + 16);
1008 result = (int)ret;
1009 } else {
1010 data = index_deref(self, rev);
1011 if (data == NULL) {
1012 return -2;
1013 }
1014
958
1015 result = getbe32(data + 16);
1016 }
1017 if (result > rev) {
959 if (result > rev) {
1018 PyErr_Format(
960 PyErr_Format(
1019 PyExc_ValueError,
961 PyExc_ValueError,
@@ -1854,7 +1796,7 b' static PyTypeObject nodetreeType = {'
1854 static int index_init_nt(indexObject *self)
1796 static int index_init_nt(indexObject *self)
1855 {
1797 {
1856 if (!self->ntinitialized) {
1798 if (!self->ntinitialized) {
1857 if (nt_init(&self->nt, self, (int)self->raw_length) == -1) {
1799 if (nt_init(&self->nt, self, (int)self->length) == -1) {
1858 nt_dealloc(&self->nt);
1800 nt_dealloc(&self->nt);
1859 return -1;
1801 return -1;
1860 }
1802 }
@@ -2479,17 +2421,17 b' static PyObject *index_ancestors(indexOb'
2479 */
2421 */
2480 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2422 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2481 {
2423 {
2482 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
2424 Py_ssize_t i, len;
2483
2484 for (i = start; i < len; i++) {
2485 PyObject *tuple = PyList_GET_ITEM(self->added, i);
2486 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
2487
2425
2488 nt_delete_node(&self->nt, PyBytes_AS_STRING(node));
2426 len = self->length + self->new_length;
2489 }
2427 i = start - self->length;
2428 if (i < 0)
2429 return;
2490
2430
2491 if (start == 0)
2431 for (i = start; i < len; i++)
2492 Py_CLEAR(self->added);
2432 nt_delete_node(&self->nt, index_deref(self, i) + 32);
2433
2434 self->new_length = start - self->length;
2493 }
2435 }
2494
2436
2495 /*
2437 /*
@@ -2547,28 +2489,25 b' static int index_slice_del(indexObject *'
2547
2489
2548 nt_delete_node(&self->nt, node);
2490 nt_delete_node(&self->nt, node);
2549 }
2491 }
2550 if (self->added)
2492 if (self->new_length)
2551 index_invalidate_added(self, 0);
2493 index_invalidate_added(self, self->length);
2552 if (self->ntrev > start)
2494 if (self->ntrev > start)
2553 self->ntrev = (int)start;
2495 self->ntrev = (int)start;
2554 } else if (self->added) {
2496 } else if (self->new_length) {
2555 Py_CLEAR(self->added);
2497 self->new_length = 0;
2556 }
2498 }
2557
2499
2558 self->length = start;
2500 self->length = start;
2559 if (start < self->raw_length)
2560 self->raw_length = start;
2561 goto done;
2501 goto done;
2562 }
2502 }
2563
2503
2564 if (self->ntinitialized) {
2504 if (self->ntinitialized) {
2565 index_invalidate_added(self, start - self->length);
2505 index_invalidate_added(self, start);
2566 if (self->ntrev > start)
2506 if (self->ntrev > start)
2567 self->ntrev = (int)start;
2507 self->ntrev = (int)start;
2508 } else {
2509 self->new_length = start - self->length;
2568 }
2510 }
2569 if (self->added)
2570 ret = PyList_SetSlice(self->added, start - self->length,
2571 PyList_GET_SIZE(self->added), NULL);
2572 done:
2511 done:
2573 Py_CLEAR(self->headrevs);
2512 Py_CLEAR(self->headrevs);
2574 return ret;
2513 return ret;
@@ -2647,8 +2586,9 b' static int index_init(indexObject *self,'
2647
2586
2648 /* Initialize before argument-checking to avoid index_dealloc() crash.
2587 /* Initialize before argument-checking to avoid index_dealloc() crash.
2649 */
2588 */
2650 self->raw_length = 0;
2651 self->added = NULL;
2589 self->added = NULL;
2590 self->new_length = 0;
2591 self->added_length = 0;
2652 self->data = NULL;
2592 self->data = NULL;
2653 memset(&self->buf, 0, sizeof(self->buf));
2593 memset(&self->buf, 0, sizeof(self->buf));
2654 self->headrevs = NULL;
2594 self->headrevs = NULL;
@@ -2680,15 +2620,13 b' static int index_init(indexObject *self,'
2680 Py_ssize_t len = inline_scan(self, NULL);
2620 Py_ssize_t len = inline_scan(self, NULL);
2681 if (len == -1)
2621 if (len == -1)
2682 goto bail;
2622 goto bail;
2683 self->raw_length = len;
2684 self->length = len;
2623 self->length = len;
2685 } else {
2624 } else {
2686 if (size % v1_hdrsize) {
2625 if (size % v1_hdrsize) {
2687 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2626 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2688 goto bail;
2627 goto bail;
2689 }
2628 }
2690 self->raw_length = size / v1_hdrsize;
2629 self->length = size / v1_hdrsize;
2691 self->length = self->raw_length;
2692 }
2630 }
2693
2631
2694 return 0;
2632 return 0;
@@ -2732,7 +2670,7 b' static void index_dealloc(indexObject *s'
2732 memset(&self->buf, 0, sizeof(self->buf));
2670 memset(&self->buf, 0, sizeof(self->buf));
2733 }
2671 }
2734 Py_XDECREF(self->data);
2672 Py_XDECREF(self->data);
2735 Py_XDECREF(self->added);
2673 PyMem_Free(self->added);
2736 PyObject_Del(self);
2674 PyObject_Del(self);
2737 }
2675 }
2738
2676
@@ -94,7 +94,8 b' class BaseIndexObject(object):'
94 def append(self, tup):
94 def append(self, tup):
95 if '_nodemap' in vars(self):
95 if '_nodemap' in vars(self):
96 self._nodemap[tup[7]] = len(self)
96 self._nodemap[tup[7]] = len(self)
97 self._extra.append(tup)
97 data = _pack(indexformatng, *tup)
98 self._extra.append(data)
98
99
99 def _check_index(self, i):
100 def _check_index(self, i):
100 if not isinstance(i, int):
101 if not isinstance(i, int):
@@ -107,14 +108,13 b' class BaseIndexObject(object):'
107 return nullitem
108 return nullitem
108 self._check_index(i)
109 self._check_index(i)
109 if i >= self._lgt:
110 if i >= self._lgt:
110 return self._extra[i - self._lgt]
111 data = self._extra[i - self._lgt]
112 else:
111 index = self._calculate_index(i)
113 index = self._calculate_index(i)
112 r = struct.unpack(indexformatng, self._data[index : index + indexsize])
114 data = self._data[index : index + indexsize]
113 if i == 0:
115 r = _unpack(indexformatng, data)
114 e = list(r)
116 if self._lgt and i == 0:
115 type = gettype(e[0])
117 r = (offset_type(0, gettype(r[0])),) + r[1:]
116 e[0] = offset_type(0, type)
117 return tuple(e)
118 return r
118 return r
119
119
120
120
General Comments 0
You need to be logged in to leave comments. Login now