##// END OF EJS Templates
revlog: store new index entries as binary...
Joerg Sonnenberger -
r46548:0ce15a8c default
parent child Browse files
Show More
@@ -82,9 +82,10 b' struct indexObjectStruct {'
82 82 PyObject *data; /* raw bytes of index */
83 83 Py_buffer buf; /* buffer of data */
84 84 const char **offsets; /* populated on demand */
85 Py_ssize_t raw_length; /* original number of elements */
86 Py_ssize_t length; /* current number of elements */
87 PyObject *added; /* populated on demand */
85 Py_ssize_t length; /* current on-disk number of elements */
86 unsigned new_length; /* number of added elements */
87 unsigned added_length; /* space reserved for added elements */
88 char *added; /* populated on demand */
88 89 PyObject *headrevs; /* cache, invalidated on changes */
89 90 PyObject *filteredrevs; /* filtered revs set */
90 91 nodetree nt; /* base-16 trie */
@@ -97,9 +98,7 b' struct indexObjectStruct {'
97 98
98 99 static Py_ssize_t index_length(const indexObject *self)
99 100 {
100 if (self->added == NULL)
101 return self->length;
102 return self->length + PyList_GET_SIZE(self->added);
101 return self->length + self->new_length;
103 102 }
104 103
105 104 static PyObject *nullentry = NULL;
@@ -155,11 +154,14 b' cleanup:'
155 154 */
156 155 static const char *index_deref(indexObject *self, Py_ssize_t pos)
157 156 {
157 if (pos >= self->length)
158 return self->added + (pos - self->length) * v1_hdrsize;
159
158 160 if (self->inlined && pos > 0) {
159 161 if (self->offsets == NULL) {
160 162 Py_ssize_t ret;
161 self->offsets = PyMem_Malloc(self->raw_length *
162 sizeof(*self->offsets));
163 self->offsets =
164 PyMem_Malloc(self->length * sizeof(*self->offsets));
163 165 if (self->offsets == NULL)
164 166 return (const char *)PyErr_NoMemory();
165 167 ret = inline_scan(self, self->offsets);
@@ -182,23 +184,11 b' static const char *index_deref(indexObje'
182 184 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
183 185 int maxrev)
184 186 {
185 if (rev >= self->length) {
186 long tmp;
187 PyObject *tuple =
188 PyList_GET_ITEM(self->added, rev - self->length);
189 if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 5), &tmp)) {
190 return -1;
191 }
192 ps[0] = (int)tmp;
193 if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 6), &tmp)) {
194 return -1;
195 }
196 ps[1] = (int)tmp;
197 } else {
198 const char *data = index_deref(self, rev);
199 ps[0] = getbe32(data + 24);
200 ps[1] = getbe32(data + 28);
201 }
187 const char *data = index_deref(self, rev);
188
189 ps[0] = getbe32(data + 24);
190 ps[1] = getbe32(data + 28);
191
202 192 /* If index file is corrupted, ps[] may point to invalid revisions. So
203 193 * there is a risk of buffer overflow to trust them unconditionally. */
204 194 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
@@ -237,74 +227,41 b' static int HgRevlogIndex_GetParents(PyOb'
237 227
238 228 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
239 229 {
230 const char *data;
240 231 uint64_t offset;
241 if (rev == nullrev) {
232
233 if (rev == nullrev)
242 234 return 0;
243 }
244 if (rev >= self->length) {
245 PyObject *tuple;
246 PyObject *pylong;
247 PY_LONG_LONG tmp;
248 tuple = PyList_GET_ITEM(self->added, rev - self->length);
249 pylong = PyTuple_GET_ITEM(tuple, 0);
250 tmp = PyLong_AsLongLong(pylong);
251 if (tmp == -1 && PyErr_Occurred()) {
252 return -1;
253 }
254 if (tmp < 0) {
255 PyErr_Format(PyExc_OverflowError,
256 "revlog entry size out of bound (%lld)",
257 (long long)tmp);
258 return -1;
259 }
260 offset = (uint64_t)tmp;
235
236 data = index_deref(self, rev);
237 offset = getbe32(data + 4);
238 if (rev == 0) {
239 /* mask out version number for the first entry */
240 offset &= 0xFFFF;
261 241 } else {
262 const char *data = index_deref(self, rev);
263 offset = getbe32(data + 4);
264 if (rev == 0) {
265 /* mask out version number for the first entry */
266 offset &= 0xFFFF;
267 } else {
268 uint32_t offset_high = getbe32(data);
269 offset |= ((uint64_t)offset_high) << 32;
270 }
242 uint32_t offset_high = getbe32(data);
243 offset |= ((uint64_t)offset_high) << 32;
271 244 }
272 245 return (int64_t)(offset >> 16);
273 246 }
274 247
275 248 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
276 249 {
277 if (rev == nullrev) {
250 const char *data;
251 int tmp;
252
253 if (rev == nullrev)
278 254 return 0;
255
256 data = index_deref(self, rev);
257
258 tmp = (int)getbe32(data + 8);
259 if (tmp < 0) {
260 PyErr_Format(PyExc_OverflowError,
261 "revlog entry size out of bound (%d)", tmp);
262 return -1;
279 263 }
280 if (rev >= self->length) {
281 PyObject *tuple;
282 PyObject *pylong;
283 long ret;
284 tuple = PyList_GET_ITEM(self->added, rev - self->length);
285 pylong = PyTuple_GET_ITEM(tuple, 1);
286 ret = PyInt_AsLong(pylong);
287 if (ret == -1 && PyErr_Occurred()) {
288 return -1;
289 }
290 if (ret < 0 || ret > (long)INT_MAX) {
291 PyErr_Format(PyExc_OverflowError,
292 "revlog entry size out of bound (%ld)",
293 ret);
294 return -1;
295 }
296 return (int)ret;
297 } else {
298 const char *data = index_deref(self, rev);
299 int tmp = (int)getbe32(data + 8);
300 if (tmp < 0) {
301 PyErr_Format(PyExc_OverflowError,
302 "revlog entry size out of bound (%d)",
303 tmp);
304 return -1;
305 }
306 return tmp;
307 }
264 return tmp;
308 265 }
309 266
310 267 /*
@@ -337,19 +294,16 b' static PyObject *index_get(indexObject *'
337 294 return NULL;
338 295 }
339 296
340 if (pos >= self->length) {
341 PyObject *obj;
342 obj = PyList_GET_ITEM(self->added, pos - self->length);
343 Py_INCREF(obj);
344 return obj;
345 }
346
347 297 data = index_deref(self, pos);
348 298 if (data == NULL)
349 299 return NULL;
350 300
351 301 offset_flags = getbe32(data + 4);
352 if (pos == 0) /* mask out version number for the first entry */
302 /*
303 * The first entry on-disk needs the version number masked out,
304 * but this doesn't apply if entries are added to an empty index.
305 */
306 if (self->length && pos == 0)
353 307 offset_flags &= 0xFFFF;
354 308 else {
355 309 uint32_t offset_high = getbe32(data);
@@ -383,13 +337,6 b' static const char *index_node(indexObjec'
383 337 if (pos >= length)
384 338 return NULL;
385 339
386 if (pos >= self->length) {
387 PyObject *tuple, *str;
388 tuple = PyList_GET_ITEM(self->added, pos - self->length);
389 str = PyTuple_GetItem(tuple, 7);
390 return str ? PyBytes_AS_STRING(str) : NULL;
391 }
392
393 340 data = index_deref(self, pos);
394 341 return data ? data + 32 : NULL;
395 342 }
@@ -423,30 +370,48 b' static int node_check(PyObject *obj, cha'
423 370
424 371 static PyObject *index_append(indexObject *self, PyObject *obj)
425 372 {
426 char *node;
427 Py_ssize_t len;
373 unsigned long offset_flags;
374 int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
375 Py_ssize_t c_node_id_len;
376 const char *c_node_id;
377 char *data;
428 378
429 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
379 if (!PyArg_ParseTuple(obj, tuple_format, &offset_flags, &comp_len,
380 &uncomp_len, &base_rev, &link_rev, &parent_1,
381 &parent_2, &c_node_id, &c_node_id_len)) {
430 382 PyErr_SetString(PyExc_TypeError, "8-tuple required");
431 383 return NULL;
432 384 }
385 if (c_node_id_len != 20 && c_node_id_len != 32) {
386 PyErr_SetString(PyExc_TypeError, "invalid node");
387 return NULL;
388 }
433 389
434 if (node_check(PyTuple_GET_ITEM(obj, 7), &node) == -1)
435 return NULL;
436
437 len = index_length(self);
438
439 if (self->added == NULL) {
440 self->added = PyList_New(0);
441 if (self->added == NULL)
442 return NULL;
390 if (self->new_length == self->added_length) {
391 size_t new_added_length =
392 self->added_length ? self->added_length * 2 : 4096;
393 void *new_added =
394 PyMem_Realloc(self->added, new_added_length * v1_hdrsize);
395 if (!new_added)
396 return PyErr_NoMemory();
397 self->added = new_added;
398 self->added_length = new_added_length;
443 399 }
444
445 if (PyList_Append(self->added, obj) == -1)
446 return NULL;
400 rev = self->length + self->new_length;
401 data = self->added + v1_hdrsize * self->new_length++;
402 putbe32(offset_flags >> 32, data);
403 putbe32(offset_flags & 0xffffffffU, data + 4);
404 putbe32(comp_len, data + 8);
405 putbe32(uncomp_len, data + 12);
406 putbe32(base_rev, data + 16);
407 putbe32(link_rev, data + 20);
408 putbe32(parent_1, data + 24);
409 putbe32(parent_2, data + 28);
410 memcpy(data + 32, c_node_id, c_node_id_len);
411 memset(data + 32 + c_node_id_len, 0, 32 - c_node_id_len);
447 412
448 413 if (self->ntinitialized)
449 nt_insert(&self->nt, node, (int)len);
414 nt_insert(&self->nt, c_node_id, rev);
450 415
451 416 Py_CLEAR(self->headrevs);
452 417 Py_RETURN_NONE;
@@ -473,20 +438,8 b' static PyObject *index_stats(indexObject'
473 438 Py_CLEAR(t); \
474 439 } while (0)
475 440
476 if (self->added) {
477 Py_ssize_t len = PyList_GET_SIZE(self->added);
478 s = PyBytes_FromString("index entries added");
479 t = PyInt_FromSsize_t(len);
480 if (!s || !t)
481 goto bail;
482 if (PyDict_SetItem(obj, s, t) == -1)
483 goto bail;
484 Py_CLEAR(s);
485 Py_CLEAR(t);
486 }
487
488 if (self->raw_length != self->length)
489 istat(raw_length, "revs on disk");
441 if (self->added_length)
442 istat(new_length, "index entries added");
490 443 istat(length, "revs in memory");
491 444 istat(ntlookups, "node trie lookups");
492 445 istat(ntmisses, "node trie misses");
@@ -998,22 +951,11 b' static inline int index_baserev(indexObj'
998 951 const char *data;
999 952 int result;
1000 953
1001 if (rev >= self->length) {
1002 PyObject *tuple =
1003 PyList_GET_ITEM(self->added, rev - self->length);
1004 long ret;
1005 if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 3), &ret)) {
1006 return -2;
1007 }
1008 result = (int)ret;
1009 } else {
1010 data = index_deref(self, rev);
1011 if (data == NULL) {
1012 return -2;
1013 }
954 data = index_deref(self, rev);
955 if (data == NULL)
956 return -2;
957 result = getbe32(data + 16);
1014 958
1015 result = getbe32(data + 16);
1016 }
1017 959 if (result > rev) {
1018 960 PyErr_Format(
1019 961 PyExc_ValueError,
@@ -1854,7 +1796,7 b' static PyTypeObject nodetreeType = {'
1854 1796 static int index_init_nt(indexObject *self)
1855 1797 {
1856 1798 if (!self->ntinitialized) {
1857 if (nt_init(&self->nt, self, (int)self->raw_length) == -1) {
1799 if (nt_init(&self->nt, self, (int)self->length) == -1) {
1858 1800 nt_dealloc(&self->nt);
1859 1801 return -1;
1860 1802 }
@@ -2479,17 +2421,17 b' static PyObject *index_ancestors(indexOb'
2479 2421 */
2480 2422 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2481 2423 {
2482 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
2483
2484 for (i = start; i < len; i++) {
2485 PyObject *tuple = PyList_GET_ITEM(self->added, i);
2486 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
2424 Py_ssize_t i, len;
2487 2425
2488 nt_delete_node(&self->nt, PyBytes_AS_STRING(node));
2489 }
2426 len = self->length + self->new_length;
2427 i = start - self->length;
2428 if (i < 0)
2429 return;
2490 2430
2491 if (start == 0)
2492 Py_CLEAR(self->added);
2431 for (i = start; i < len; i++)
2432 nt_delete_node(&self->nt, index_deref(self, i) + 32);
2433
2434 self->new_length = start - self->length;
2493 2435 }
2494 2436
2495 2437 /*
@@ -2547,28 +2489,25 b' static int index_slice_del(indexObject *'
2547 2489
2548 2490 nt_delete_node(&self->nt, node);
2549 2491 }
2550 if (self->added)
2551 index_invalidate_added(self, 0);
2492 if (self->new_length)
2493 index_invalidate_added(self, self->length);
2552 2494 if (self->ntrev > start)
2553 2495 self->ntrev = (int)start;
2554 } else if (self->added) {
2555 Py_CLEAR(self->added);
2496 } else if (self->new_length) {
2497 self->new_length = 0;
2556 2498 }
2557 2499
2558 2500 self->length = start;
2559 if (start < self->raw_length)
2560 self->raw_length = start;
2561 2501 goto done;
2562 2502 }
2563 2503
2564 2504 if (self->ntinitialized) {
2565 index_invalidate_added(self, start - self->length);
2505 index_invalidate_added(self, start);
2566 2506 if (self->ntrev > start)
2567 2507 self->ntrev = (int)start;
2508 } else {
2509 self->new_length = start - self->length;
2568 2510 }
2569 if (self->added)
2570 ret = PyList_SetSlice(self->added, start - self->length,
2571 PyList_GET_SIZE(self->added), NULL);
2572 2511 done:
2573 2512 Py_CLEAR(self->headrevs);
2574 2513 return ret;
@@ -2647,8 +2586,9 b' static int index_init(indexObject *self,'
2647 2586
2648 2587 /* Initialize before argument-checking to avoid index_dealloc() crash.
2649 2588 */
2650 self->raw_length = 0;
2651 2589 self->added = NULL;
2590 self->new_length = 0;
2591 self->added_length = 0;
2652 2592 self->data = NULL;
2653 2593 memset(&self->buf, 0, sizeof(self->buf));
2654 2594 self->headrevs = NULL;
@@ -2680,15 +2620,13 b' static int index_init(indexObject *self,'
2680 2620 Py_ssize_t len = inline_scan(self, NULL);
2681 2621 if (len == -1)
2682 2622 goto bail;
2683 self->raw_length = len;
2684 2623 self->length = len;
2685 2624 } else {
2686 2625 if (size % v1_hdrsize) {
2687 2626 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2688 2627 goto bail;
2689 2628 }
2690 self->raw_length = size / v1_hdrsize;
2691 self->length = self->raw_length;
2629 self->length = size / v1_hdrsize;
2692 2630 }
2693 2631
2694 2632 return 0;
@@ -2732,7 +2670,7 b' static void index_dealloc(indexObject *s'
2732 2670 memset(&self->buf, 0, sizeof(self->buf));
2733 2671 }
2734 2672 Py_XDECREF(self->data);
2735 Py_XDECREF(self->added);
2673 PyMem_Free(self->added);
2736 2674 PyObject_Del(self);
2737 2675 }
2738 2676
@@ -94,7 +94,8 b' class BaseIndexObject(object):'
94 94 def append(self, tup):
95 95 if '_nodemap' in vars(self):
96 96 self._nodemap[tup[7]] = len(self)
97 self._extra.append(tup)
97 data = _pack(indexformatng, *tup)
98 self._extra.append(data)
98 99
99 100 def _check_index(self, i):
100 101 if not isinstance(i, int):
@@ -107,14 +108,13 b' class BaseIndexObject(object):'
107 108 return nullitem
108 109 self._check_index(i)
109 110 if i >= self._lgt:
110 return self._extra[i - self._lgt]
111 index = self._calculate_index(i)
112 r = struct.unpack(indexformatng, self._data[index : index + indexsize])
113 if i == 0:
114 e = list(r)
115 type = gettype(e[0])
116 e[0] = offset_type(0, type)
117 return tuple(e)
111 data = self._extra[i - self._lgt]
112 else:
113 index = self._calculate_index(i)
114 data = self._data[index : index + indexsize]
115 r = _unpack(indexformatng, data)
116 if self._lgt and i == 0:
117 r = (offset_type(0, gettype(r[0])),) + r[1:]
118 118 return r
119 119
120 120
General Comments 0
You need to be logged in to leave comments. Login now