##// END OF EJS Templates
revlog: signal which revlog index are compatible with Rust...
marmoute -
r48042:9d1a8829 default
parent child Browse files
Show More
@@ -1,763 +1,763
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #define PY_SSIZE_T_CLEAN
11 11 #include <Python.h>
12 12 #include <ctype.h>
13 13 #include <stddef.h>
14 14 #include <string.h>
15 15
16 16 #include "bitmanipulation.h"
17 17 #include "charencode.h"
18 18 #include "util.h"
19 19
20 20 #ifdef IS_PY3K
21 21 /* The mapping of Python types is meant to be temporary to get Python
22 22 * 3 to compile. We should remove this once Python 3 support is fully
23 23 * supported and proper types are used in the extensions themselves. */
24 24 #define PyInt_Check PyLong_Check
25 25 #define PyInt_FromLong PyLong_FromLong
26 26 #define PyInt_FromSsize_t PyLong_FromSsize_t
27 27 #define PyInt_AsLong PyLong_AsLong
28 28 #endif
29 29
30 30 static const char *const versionerrortext = "Python minor version mismatch";
31 31
32 32 static PyObject *dict_new_presized(PyObject *self, PyObject *args)
33 33 {
34 34 Py_ssize_t expected_size;
35 35
36 36 if (!PyArg_ParseTuple(args, "n:make_presized_dict", &expected_size)) {
37 37 return NULL;
38 38 }
39 39
40 40 return _dict_new_presized(expected_size);
41 41 }
42 42
43 43 static inline dirstateTupleObject *make_dirstate_tuple(char state, int mode,
44 44 int size, int mtime)
45 45 {
46 46 dirstateTupleObject *t =
47 47 PyObject_New(dirstateTupleObject, &dirstateTupleType);
48 48 if (!t) {
49 49 return NULL;
50 50 }
51 51 t->state = state;
52 52 t->mode = mode;
53 53 t->size = size;
54 54 t->mtime = mtime;
55 55 return t;
56 56 }
57 57
58 58 static PyObject *dirstate_tuple_new(PyTypeObject *subtype, PyObject *args,
59 59 PyObject *kwds)
60 60 {
61 61 /* We do all the initialization here and not a tp_init function because
62 62 * dirstate_tuple is immutable. */
63 63 dirstateTupleObject *t;
64 64 char state;
65 65 int size, mode, mtime;
66 66 if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime)) {
67 67 return NULL;
68 68 }
69 69
70 70 t = (dirstateTupleObject *)subtype->tp_alloc(subtype, 1);
71 71 if (!t) {
72 72 return NULL;
73 73 }
74 74 t->state = state;
75 75 t->mode = mode;
76 76 t->size = size;
77 77 t->mtime = mtime;
78 78
79 79 return (PyObject *)t;
80 80 }
81 81
82 82 static void dirstate_tuple_dealloc(PyObject *o)
83 83 {
84 84 PyObject_Del(o);
85 85 }
86 86
87 87 static Py_ssize_t dirstate_tuple_length(PyObject *o)
88 88 {
89 89 return 4;
90 90 }
91 91
92 92 static PyObject *dirstate_tuple_item(PyObject *o, Py_ssize_t i)
93 93 {
94 94 dirstateTupleObject *t = (dirstateTupleObject *)o;
95 95 switch (i) {
96 96 case 0:
97 97 return PyBytes_FromStringAndSize(&t->state, 1);
98 98 case 1:
99 99 return PyInt_FromLong(t->mode);
100 100 case 2:
101 101 return PyInt_FromLong(t->size);
102 102 case 3:
103 103 return PyInt_FromLong(t->mtime);
104 104 default:
105 105 PyErr_SetString(PyExc_IndexError, "index out of range");
106 106 return NULL;
107 107 }
108 108 }
109 109
110 110 static PySequenceMethods dirstate_tuple_sq = {
111 111 dirstate_tuple_length, /* sq_length */
112 112 0, /* sq_concat */
113 113 0, /* sq_repeat */
114 114 dirstate_tuple_item, /* sq_item */
115 115 0, /* sq_ass_item */
116 116 0, /* sq_contains */
117 117 0, /* sq_inplace_concat */
118 118 0 /* sq_inplace_repeat */
119 119 };
120 120
121 121 PyTypeObject dirstateTupleType = {
122 122 PyVarObject_HEAD_INIT(NULL, 0) /* header */
123 123 "dirstate_tuple", /* tp_name */
124 124 sizeof(dirstateTupleObject), /* tp_basicsize */
125 125 0, /* tp_itemsize */
126 126 (destructor)dirstate_tuple_dealloc, /* tp_dealloc */
127 127 0, /* tp_print */
128 128 0, /* tp_getattr */
129 129 0, /* tp_setattr */
130 130 0, /* tp_compare */
131 131 0, /* tp_repr */
132 132 0, /* tp_as_number */
133 133 &dirstate_tuple_sq, /* tp_as_sequence */
134 134 0, /* tp_as_mapping */
135 135 0, /* tp_hash */
136 136 0, /* tp_call */
137 137 0, /* tp_str */
138 138 0, /* tp_getattro */
139 139 0, /* tp_setattro */
140 140 0, /* tp_as_buffer */
141 141 Py_TPFLAGS_DEFAULT, /* tp_flags */
142 142 "dirstate tuple", /* tp_doc */
143 143 0, /* tp_traverse */
144 144 0, /* tp_clear */
145 145 0, /* tp_richcompare */
146 146 0, /* tp_weaklistoffset */
147 147 0, /* tp_iter */
148 148 0, /* tp_iternext */
149 149 0, /* tp_methods */
150 150 0, /* tp_members */
151 151 0, /* tp_getset */
152 152 0, /* tp_base */
153 153 0, /* tp_dict */
154 154 0, /* tp_descr_get */
155 155 0, /* tp_descr_set */
156 156 0, /* tp_dictoffset */
157 157 0, /* tp_init */
158 158 0, /* tp_alloc */
159 159 dirstate_tuple_new, /* tp_new */
160 160 };
161 161
162 162 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
163 163 {
164 164 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
165 165 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
166 166 char state, *cur, *str, *cpos;
167 167 int mode, size, mtime;
168 168 unsigned int flen, pos = 40;
169 169 Py_ssize_t len = 40;
170 170 Py_ssize_t readlen;
171 171
172 172 if (!PyArg_ParseTuple(
173 173 args, PY23("O!O!s#:parse_dirstate", "O!O!y#:parse_dirstate"),
174 174 &PyDict_Type, &dmap, &PyDict_Type, &cmap, &str, &readlen)) {
175 175 goto quit;
176 176 }
177 177
178 178 len = readlen;
179 179
180 180 /* read parents */
181 181 if (len < 40) {
182 182 PyErr_SetString(PyExc_ValueError,
183 183 "too little data for parents");
184 184 goto quit;
185 185 }
186 186
187 187 parents = Py_BuildValue(PY23("s#s#", "y#y#"), str, (Py_ssize_t)20,
188 188 str + 20, (Py_ssize_t)20);
189 189 if (!parents) {
190 190 goto quit;
191 191 }
192 192
193 193 /* read filenames */
194 194 while (pos >= 40 && pos < len) {
195 195 if (pos + 17 > len) {
196 196 PyErr_SetString(PyExc_ValueError,
197 197 "overflow in dirstate");
198 198 goto quit;
199 199 }
200 200 cur = str + pos;
201 201 /* unpack header */
202 202 state = *cur;
203 203 mode = getbe32(cur + 1);
204 204 size = getbe32(cur + 5);
205 205 mtime = getbe32(cur + 9);
206 206 flen = getbe32(cur + 13);
207 207 pos += 17;
208 208 cur += 17;
209 209 if (flen > len - pos) {
210 210 PyErr_SetString(PyExc_ValueError,
211 211 "overflow in dirstate");
212 212 goto quit;
213 213 }
214 214
215 215 entry =
216 216 (PyObject *)make_dirstate_tuple(state, mode, size, mtime);
217 217 cpos = memchr(cur, 0, flen);
218 218 if (cpos) {
219 219 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
220 220 cname = PyBytes_FromStringAndSize(
221 221 cpos + 1, flen - (cpos - cur) - 1);
222 222 if (!fname || !cname ||
223 223 PyDict_SetItem(cmap, fname, cname) == -1 ||
224 224 PyDict_SetItem(dmap, fname, entry) == -1) {
225 225 goto quit;
226 226 }
227 227 Py_DECREF(cname);
228 228 } else {
229 229 fname = PyBytes_FromStringAndSize(cur, flen);
230 230 if (!fname ||
231 231 PyDict_SetItem(dmap, fname, entry) == -1) {
232 232 goto quit;
233 233 }
234 234 }
235 235 Py_DECREF(fname);
236 236 Py_DECREF(entry);
237 237 fname = cname = entry = NULL;
238 238 pos += flen;
239 239 }
240 240
241 241 ret = parents;
242 242 Py_INCREF(ret);
243 243 quit:
244 244 Py_XDECREF(fname);
245 245 Py_XDECREF(cname);
246 246 Py_XDECREF(entry);
247 247 Py_XDECREF(parents);
248 248 return ret;
249 249 }
250 250
251 251 /*
252 252 * Build a set of non-normal and other parent entries from the dirstate dmap
253 253 */
254 254 static PyObject *nonnormalotherparententries(PyObject *self, PyObject *args)
255 255 {
256 256 PyObject *dmap, *fname, *v;
257 257 PyObject *nonnset = NULL, *otherpset = NULL, *result = NULL;
258 258 Py_ssize_t pos;
259 259
260 260 if (!PyArg_ParseTuple(args, "O!:nonnormalentries", &PyDict_Type,
261 261 &dmap)) {
262 262 goto bail;
263 263 }
264 264
265 265 nonnset = PySet_New(NULL);
266 266 if (nonnset == NULL) {
267 267 goto bail;
268 268 }
269 269
270 270 otherpset = PySet_New(NULL);
271 271 if (otherpset == NULL) {
272 272 goto bail;
273 273 }
274 274
275 275 pos = 0;
276 276 while (PyDict_Next(dmap, &pos, &fname, &v)) {
277 277 dirstateTupleObject *t;
278 278 if (!dirstate_tuple_check(v)) {
279 279 PyErr_SetString(PyExc_TypeError,
280 280 "expected a dirstate tuple");
281 281 goto bail;
282 282 }
283 283 t = (dirstateTupleObject *)v;
284 284
285 285 if (t->state == 'n' && t->size == -2) {
286 286 if (PySet_Add(otherpset, fname) == -1) {
287 287 goto bail;
288 288 }
289 289 }
290 290
291 291 if (t->state == 'n' && t->mtime != -1) {
292 292 continue;
293 293 }
294 294 if (PySet_Add(nonnset, fname) == -1) {
295 295 goto bail;
296 296 }
297 297 }
298 298
299 299 result = Py_BuildValue("(OO)", nonnset, otherpset);
300 300 if (result == NULL) {
301 301 goto bail;
302 302 }
303 303 Py_DECREF(nonnset);
304 304 Py_DECREF(otherpset);
305 305 return result;
306 306 bail:
307 307 Py_XDECREF(nonnset);
308 308 Py_XDECREF(otherpset);
309 309 Py_XDECREF(result);
310 310 return NULL;
311 311 }
312 312
313 313 /*
314 314 * Efficiently pack a dirstate object into its on-disk format.
315 315 */
316 316 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
317 317 {
318 318 PyObject *packobj = NULL;
319 319 PyObject *map, *copymap, *pl, *mtime_unset = NULL;
320 320 Py_ssize_t nbytes, pos, l;
321 321 PyObject *k, *v = NULL, *pn;
322 322 char *p, *s;
323 323 int now;
324 324
325 325 if (!PyArg_ParseTuple(args, "O!O!O!i:pack_dirstate", &PyDict_Type, &map,
326 326 &PyDict_Type, &copymap, &PyTuple_Type, &pl,
327 327 &now)) {
328 328 return NULL;
329 329 }
330 330
331 331 if (PyTuple_Size(pl) != 2) {
332 332 PyErr_SetString(PyExc_TypeError, "expected 2-element tuple");
333 333 return NULL;
334 334 }
335 335
336 336 /* Figure out how much we need to allocate. */
337 337 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
338 338 PyObject *c;
339 339 if (!PyBytes_Check(k)) {
340 340 PyErr_SetString(PyExc_TypeError, "expected string key");
341 341 goto bail;
342 342 }
343 343 nbytes += PyBytes_GET_SIZE(k) + 17;
344 344 c = PyDict_GetItem(copymap, k);
345 345 if (c) {
346 346 if (!PyBytes_Check(c)) {
347 347 PyErr_SetString(PyExc_TypeError,
348 348 "expected string key");
349 349 goto bail;
350 350 }
351 351 nbytes += PyBytes_GET_SIZE(c) + 1;
352 352 }
353 353 }
354 354
355 355 packobj = PyBytes_FromStringAndSize(NULL, nbytes);
356 356 if (packobj == NULL) {
357 357 goto bail;
358 358 }
359 359
360 360 p = PyBytes_AS_STRING(packobj);
361 361
362 362 pn = PyTuple_GET_ITEM(pl, 0);
363 363 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
364 364 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
365 365 goto bail;
366 366 }
367 367 memcpy(p, s, l);
368 368 p += 20;
369 369 pn = PyTuple_GET_ITEM(pl, 1);
370 370 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
371 371 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
372 372 goto bail;
373 373 }
374 374 memcpy(p, s, l);
375 375 p += 20;
376 376
377 377 for (pos = 0; PyDict_Next(map, &pos, &k, &v);) {
378 378 dirstateTupleObject *tuple;
379 379 char state;
380 380 int mode, size, mtime;
381 381 Py_ssize_t len, l;
382 382 PyObject *o;
383 383 char *t;
384 384
385 385 if (!dirstate_tuple_check(v)) {
386 386 PyErr_SetString(PyExc_TypeError,
387 387 "expected a dirstate tuple");
388 388 goto bail;
389 389 }
390 390 tuple = (dirstateTupleObject *)v;
391 391
392 392 state = tuple->state;
393 393 mode = tuple->mode;
394 394 size = tuple->size;
395 395 mtime = tuple->mtime;
396 396 if (state == 'n' && mtime == now) {
397 397 /* See pure/parsers.py:pack_dirstate for why we do
398 398 * this. */
399 399 mtime = -1;
400 400 mtime_unset = (PyObject *)make_dirstate_tuple(
401 401 state, mode, size, mtime);
402 402 if (!mtime_unset) {
403 403 goto bail;
404 404 }
405 405 if (PyDict_SetItem(map, k, mtime_unset) == -1) {
406 406 goto bail;
407 407 }
408 408 Py_DECREF(mtime_unset);
409 409 mtime_unset = NULL;
410 410 }
411 411 *p++ = state;
412 412 putbe32((uint32_t)mode, p);
413 413 putbe32((uint32_t)size, p + 4);
414 414 putbe32((uint32_t)mtime, p + 8);
415 415 t = p + 12;
416 416 p += 16;
417 417 len = PyBytes_GET_SIZE(k);
418 418 memcpy(p, PyBytes_AS_STRING(k), len);
419 419 p += len;
420 420 o = PyDict_GetItem(copymap, k);
421 421 if (o) {
422 422 *p++ = '\0';
423 423 l = PyBytes_GET_SIZE(o);
424 424 memcpy(p, PyBytes_AS_STRING(o), l);
425 425 p += l;
426 426 len += l + 1;
427 427 }
428 428 putbe32((uint32_t)len, t);
429 429 }
430 430
431 431 pos = p - PyBytes_AS_STRING(packobj);
432 432 if (pos != nbytes) {
433 433 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
434 434 (long)pos, (long)nbytes);
435 435 goto bail;
436 436 }
437 437
438 438 return packobj;
439 439 bail:
440 440 Py_XDECREF(mtime_unset);
441 441 Py_XDECREF(packobj);
442 442 Py_XDECREF(v);
443 443 return NULL;
444 444 }
445 445
446 446 #define BUMPED_FIX 1
447 447 #define USING_SHA_256 2
448 448 #define FM1_HEADER_SIZE (4 + 8 + 2 + 2 + 1 + 1 + 1)
449 449
450 450 static PyObject *readshas(const char *source, unsigned char num,
451 451 Py_ssize_t hashwidth)
452 452 {
453 453 int i;
454 454 PyObject *list = PyTuple_New(num);
455 455 if (list == NULL) {
456 456 return NULL;
457 457 }
458 458 for (i = 0; i < num; i++) {
459 459 PyObject *hash = PyBytes_FromStringAndSize(source, hashwidth);
460 460 if (hash == NULL) {
461 461 Py_DECREF(list);
462 462 return NULL;
463 463 }
464 464 PyTuple_SET_ITEM(list, i, hash);
465 465 source += hashwidth;
466 466 }
467 467 return list;
468 468 }
469 469
470 470 static PyObject *fm1readmarker(const char *databegin, const char *dataend,
471 471 uint32_t *msize)
472 472 {
473 473 const char *data = databegin;
474 474 const char *meta;
475 475
476 476 double mtime;
477 477 int16_t tz;
478 478 uint16_t flags;
479 479 unsigned char nsuccs, nparents, nmetadata;
480 480 Py_ssize_t hashwidth = 20;
481 481
482 482 PyObject *prec = NULL, *parents = NULL, *succs = NULL;
483 483 PyObject *metadata = NULL, *ret = NULL;
484 484 int i;
485 485
486 486 if (data + FM1_HEADER_SIZE > dataend) {
487 487 goto overflow;
488 488 }
489 489
490 490 *msize = getbe32(data);
491 491 data += 4;
492 492 mtime = getbefloat64(data);
493 493 data += 8;
494 494 tz = getbeint16(data);
495 495 data += 2;
496 496 flags = getbeuint16(data);
497 497 data += 2;
498 498
499 499 if (flags & USING_SHA_256) {
500 500 hashwidth = 32;
501 501 }
502 502
503 503 nsuccs = (unsigned char)(*data++);
504 504 nparents = (unsigned char)(*data++);
505 505 nmetadata = (unsigned char)(*data++);
506 506
507 507 if (databegin + *msize > dataend) {
508 508 goto overflow;
509 509 }
510 510 dataend = databegin + *msize; /* narrow down to marker size */
511 511
512 512 if (data + hashwidth > dataend) {
513 513 goto overflow;
514 514 }
515 515 prec = PyBytes_FromStringAndSize(data, hashwidth);
516 516 data += hashwidth;
517 517 if (prec == NULL) {
518 518 goto bail;
519 519 }
520 520
521 521 if (data + nsuccs * hashwidth > dataend) {
522 522 goto overflow;
523 523 }
524 524 succs = readshas(data, nsuccs, hashwidth);
525 525 if (succs == NULL) {
526 526 goto bail;
527 527 }
528 528 data += nsuccs * hashwidth;
529 529
530 530 if (nparents == 1 || nparents == 2) {
531 531 if (data + nparents * hashwidth > dataend) {
532 532 goto overflow;
533 533 }
534 534 parents = readshas(data, nparents, hashwidth);
535 535 if (parents == NULL) {
536 536 goto bail;
537 537 }
538 538 data += nparents * hashwidth;
539 539 } else {
540 540 parents = Py_None;
541 541 Py_INCREF(parents);
542 542 }
543 543
544 544 if (data + 2 * nmetadata > dataend) {
545 545 goto overflow;
546 546 }
547 547 meta = data + (2 * nmetadata);
548 548 metadata = PyTuple_New(nmetadata);
549 549 if (metadata == NULL) {
550 550 goto bail;
551 551 }
552 552 for (i = 0; i < nmetadata; i++) {
553 553 PyObject *tmp, *left = NULL, *right = NULL;
554 554 Py_ssize_t leftsize = (unsigned char)(*data++);
555 555 Py_ssize_t rightsize = (unsigned char)(*data++);
556 556 if (meta + leftsize + rightsize > dataend) {
557 557 goto overflow;
558 558 }
559 559 left = PyBytes_FromStringAndSize(meta, leftsize);
560 560 meta += leftsize;
561 561 right = PyBytes_FromStringAndSize(meta, rightsize);
562 562 meta += rightsize;
563 563 tmp = PyTuple_New(2);
564 564 if (!left || !right || !tmp) {
565 565 Py_XDECREF(left);
566 566 Py_XDECREF(right);
567 567 Py_XDECREF(tmp);
568 568 goto bail;
569 569 }
570 570 PyTuple_SET_ITEM(tmp, 0, left);
571 571 PyTuple_SET_ITEM(tmp, 1, right);
572 572 PyTuple_SET_ITEM(metadata, i, tmp);
573 573 }
574 574 ret = Py_BuildValue("(OOHO(di)O)", prec, succs, flags, metadata, mtime,
575 575 (int)tz * 60, parents);
576 576 goto bail; /* return successfully */
577 577
578 578 overflow:
579 579 PyErr_SetString(PyExc_ValueError, "overflow in obsstore");
580 580 bail:
581 581 Py_XDECREF(prec);
582 582 Py_XDECREF(succs);
583 583 Py_XDECREF(metadata);
584 584 Py_XDECREF(parents);
585 585 return ret;
586 586 }
587 587
588 588 static PyObject *fm1readmarkers(PyObject *self, PyObject *args)
589 589 {
590 590 const char *data, *dataend;
591 591 Py_ssize_t datalen, offset, stop;
592 592 PyObject *markers = NULL;
593 593
594 594 if (!PyArg_ParseTuple(args, PY23("s#nn", "y#nn"), &data, &datalen,
595 595 &offset, &stop)) {
596 596 return NULL;
597 597 }
598 598 if (offset < 0) {
599 599 PyErr_SetString(PyExc_ValueError,
600 600 "invalid negative offset in fm1readmarkers");
601 601 return NULL;
602 602 }
603 603 if (stop > datalen) {
604 604 PyErr_SetString(
605 605 PyExc_ValueError,
606 606 "stop longer than data length in fm1readmarkers");
607 607 return NULL;
608 608 }
609 609 dataend = data + datalen;
610 610 data += offset;
611 611 markers = PyList_New(0);
612 612 if (!markers) {
613 613 return NULL;
614 614 }
615 615 while (offset < stop) {
616 616 uint32_t msize;
617 617 int error;
618 618 PyObject *record = fm1readmarker(data, dataend, &msize);
619 619 if (!record) {
620 620 goto bail;
621 621 }
622 622 error = PyList_Append(markers, record);
623 623 Py_DECREF(record);
624 624 if (error) {
625 625 goto bail;
626 626 }
627 627 data += msize;
628 628 offset += msize;
629 629 }
630 630 return markers;
631 631 bail:
632 632 Py_DECREF(markers);
633 633 return NULL;
634 634 }
635 635
636 636 static char parsers_doc[] = "Efficient content parsing.";
637 637
638 638 PyObject *encodedir(PyObject *self, PyObject *args);
639 639 PyObject *pathencode(PyObject *self, PyObject *args);
640 640 PyObject *lowerencode(PyObject *self, PyObject *args);
641 641 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs);
642 642
643 643 static PyMethodDef methods[] = {
644 644 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
645 645 {"nonnormalotherparententries", nonnormalotherparententries, METH_VARARGS,
646 646 "create a set containing non-normal and other parent entries of given "
647 647 "dirstate\n"},
648 648 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
649 649 {"parse_index2", (PyCFunction)parse_index2, METH_VARARGS | METH_KEYWORDS,
650 650 "parse a revlog index\n"},
651 651 {"isasciistr", isasciistr, METH_VARARGS, "check if an ASCII string\n"},
652 652 {"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"},
653 653 {"asciiupper", asciiupper, METH_VARARGS, "uppercase an ASCII string\n"},
654 654 {"dict_new_presized", dict_new_presized, METH_VARARGS,
655 655 "construct a dict with an expected size\n"},
656 656 {"make_file_foldmap", make_file_foldmap, METH_VARARGS,
657 657 "make file foldmap\n"},
658 658 {"jsonescapeu8fast", jsonescapeu8fast, METH_VARARGS,
659 659 "escape a UTF-8 byte string to JSON (fast path)\n"},
660 660 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
661 661 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
662 662 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
663 663 {"fm1readmarkers", fm1readmarkers, METH_VARARGS,
664 664 "parse v1 obsolete markers\n"},
665 665 {NULL, NULL}};
666 666
667 667 void dirs_module_init(PyObject *mod);
668 668 void manifest_module_init(PyObject *mod);
669 669 void revlog_module_init(PyObject *mod);
670 670
671 static const int version = 19;
671 static const int version = 20;
672 672
673 673 static void module_init(PyObject *mod)
674 674 {
675 675 PyObject *capsule = NULL;
676 676 PyModule_AddIntConstant(mod, "version", version);
677 677
678 678 /* This module constant has two purposes. First, it lets us unit test
679 679 * the ImportError raised without hard-coding any error text. This
680 680 * means we can change the text in the future without breaking tests,
681 681 * even across changesets without a recompile. Second, its presence
682 682 * can be used to determine whether the version-checking logic is
683 683 * present, which also helps in testing across changesets without a
684 684 * recompile. Note that this means the pure-Python version of parsers
685 685 * should not have this module constant. */
686 686 PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
687 687
688 688 dirs_module_init(mod);
689 689 manifest_module_init(mod);
690 690 revlog_module_init(mod);
691 691
692 692 capsule = PyCapsule_New(
693 693 make_dirstate_tuple,
694 694 "mercurial.cext.parsers.make_dirstate_tuple_CAPI", NULL);
695 695 if (capsule != NULL)
696 696 PyModule_AddObject(mod, "make_dirstate_tuple_CAPI", capsule);
697 697
698 698 if (PyType_Ready(&dirstateTupleType) < 0) {
699 699 return;
700 700 }
701 701 Py_INCREF(&dirstateTupleType);
702 702 PyModule_AddObject(mod, "dirstatetuple",
703 703 (PyObject *)&dirstateTupleType);
704 704 }
705 705
706 706 static int check_python_version(void)
707 707 {
708 708 PyObject *sys = PyImport_ImportModule("sys"), *ver;
709 709 long hexversion;
710 710 if (!sys) {
711 711 return -1;
712 712 }
713 713 ver = PyObject_GetAttrString(sys, "hexversion");
714 714 Py_DECREF(sys);
715 715 if (!ver) {
716 716 return -1;
717 717 }
718 718 hexversion = PyInt_AsLong(ver);
719 719 Py_DECREF(ver);
720 720 /* sys.hexversion is a 32-bit number by default, so the -1 case
721 721 * should only occur in unusual circumstances (e.g. if sys.hexversion
722 722 * is manually set to an invalid value). */
723 723 if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
724 724 PyErr_Format(PyExc_ImportError,
725 725 "%s: The Mercurial extension "
726 726 "modules were compiled with Python " PY_VERSION
727 727 ", but "
728 728 "Mercurial is currently using Python with "
729 729 "sys.hexversion=%ld: "
730 730 "Python %s\n at: %s",
731 731 versionerrortext, hexversion, Py_GetVersion(),
732 732 Py_GetProgramFullPath());
733 733 return -1;
734 734 }
735 735 return 0;
736 736 }
737 737
738 738 #ifdef IS_PY3K
739 739 static struct PyModuleDef parsers_module = {PyModuleDef_HEAD_INIT, "parsers",
740 740 parsers_doc, -1, methods};
741 741
742 742 PyMODINIT_FUNC PyInit_parsers(void)
743 743 {
744 744 PyObject *mod;
745 745
746 746 if (check_python_version() == -1)
747 747 return NULL;
748 748 mod = PyModule_Create(&parsers_module);
749 749 module_init(mod);
750 750 return mod;
751 751 }
752 752 #else
753 753 PyMODINIT_FUNC initparsers(void)
754 754 {
755 755 PyObject *mod;
756 756
757 757 if (check_python_version() == -1) {
758 758 return;
759 759 }
760 760 mod = Py_InitModule3("parsers", methods, parsers_doc);
761 761 module_init(mod);
762 762 }
763 763 #endif
@@ -1,3055 +1,3060
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #define PY_SSIZE_T_CLEAN
11 11 #include <Python.h>
12 12 #include <assert.h>
13 13 #include <ctype.h>
14 14 #include <limits.h>
15 15 #include <stddef.h>
16 16 #include <stdlib.h>
17 17 #include <string.h>
18 18 #include <structmember.h>
19 19
20 20 #include "bitmanipulation.h"
21 21 #include "charencode.h"
22 22 #include "compat.h"
23 23 #include "revlog.h"
24 24 #include "util.h"
25 25
26 26 #ifdef IS_PY3K
27 27 /* The mapping of Python types is meant to be temporary to get Python
28 28 * 3 to compile. We should remove this once Python 3 support is fully
29 29 * supported and proper types are used in the extensions themselves. */
30 30 #define PyInt_Check PyLong_Check
31 31 #define PyInt_FromLong PyLong_FromLong
32 32 #define PyInt_FromSsize_t PyLong_FromSsize_t
33 33 #define PyInt_AsLong PyLong_AsLong
34 34 #endif
35 35
36 36 typedef struct indexObjectStruct indexObject;
37 37
38 38 typedef struct {
39 39 int children[16];
40 40 } nodetreenode;
41 41
42 42 typedef struct {
43 43 int abi_version;
44 44 Py_ssize_t (*index_length)(const indexObject *);
45 45 const char *(*index_node)(indexObject *, Py_ssize_t);
46 46 int (*index_parents)(PyObject *, int, int *);
47 47 } Revlog_CAPI;
48 48
49 49 /*
50 50 * A base-16 trie for fast node->rev mapping.
51 51 *
52 52 * Positive value is index of the next node in the trie
53 53 * Negative value is a leaf: -(rev + 2)
54 54 * Zero is empty
55 55 */
56 56 typedef struct {
57 57 indexObject *index;
58 58 nodetreenode *nodes;
59 59 Py_ssize_t nodelen;
60 60 size_t length; /* # nodes in use */
61 61 size_t capacity; /* # nodes allocated */
62 62 int depth; /* maximum depth of tree */
63 63 int splits; /* # splits performed */
64 64 } nodetree;
65 65
66 66 typedef struct {
67 67 PyObject_HEAD /* ; */
68 68 nodetree nt;
69 69 } nodetreeObject;
70 70
71 71 /*
72 72 * This class has two behaviors.
73 73 *
74 74 * When used in a list-like way (with integer keys), we decode an
75 75 * entry in a RevlogNG index file on demand. We have limited support for
76 76 * integer-keyed insert and delete, only at elements right before the
77 77 * end.
78 78 *
79 79 * With string keys, we lazily perform a reverse mapping from node to
80 80 * rev, using a base-16 trie.
81 81 */
82 82 struct indexObjectStruct {
83 83 PyObject_HEAD
84 84 /* Type-specific fields go here. */
85 85 PyObject *data; /* raw bytes of index */
86 86 Py_ssize_t nodelen; /* digest size of the hash, 20 for SHA-1 */
87 87 PyObject *nullentry; /* fast path for references to null */
88 88 Py_buffer buf; /* buffer of data */
89 89 const char **offsets; /* populated on demand */
90 90 Py_ssize_t length; /* current on-disk number of elements */
91 91 unsigned new_length; /* number of added elements */
92 92 unsigned added_length; /* space reserved for added elements */
93 93 char *added; /* populated on demand */
94 94 PyObject *headrevs; /* cache, invalidated on changes */
95 95 PyObject *filteredrevs; /* filtered revs set */
96 96 nodetree nt; /* base-16 trie */
97 97 int ntinitialized; /* 0 or 1 */
98 98 int ntrev; /* last rev scanned */
99 99 int ntlookups; /* # lookups */
100 100 int ntmisses; /* # lookups that miss the cache */
101 101 int inlined;
102 102 long entry_size; /* size of index headers. Differs in v1 v.s. v2 format
103 103 */
104 char format_version; /* size of index headers. Differs in v1 v.s. v2
105 format */
104 long rust_ext_compat; /* compatibility with being used in rust
105 extensions */
106 char format_version; /* size of index headers. Differs in v1 v.s. v2
107 format */
106 108 };
107 109
108 110 static Py_ssize_t index_length(const indexObject *self)
109 111 {
110 112 return self->length + self->new_length;
111 113 }
112 114
113 115 static const char nullid[32] = {0};
114 116 static const Py_ssize_t nullrev = -1;
115 117
116 118 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
117 119
118 120 static int index_find_node(indexObject *self, const char *node);
119 121
120 122 #if LONG_MAX == 0x7fffffffL
121 123 static const char *const tuple_format = PY23("Kiiiiiis#KiBB", "Kiiiiiiy#KiBB");
122 124 #else
123 125 static const char *const tuple_format = PY23("kiiiiiis#kiBB", "kiiiiiiy#kiBB");
124 126 #endif
125 127
126 128 /* A RevlogNG v1 index entry is 64 bytes long. */
127 129 static const long v1_entry_size = 64;
128 130
129 131 /* A Revlogv2 index entry is 96 bytes long. */
130 132 static const long v2_entry_size = 96;
131 133
132 134 static const long format_v1 = 1; /* Internal only, could be any number */
133 135 static const long format_v2 = 2; /* Internal only, could be any number */
134 136
135 137 static const char comp_mode_inline = 2;
136 138
137 139 static void raise_revlog_error(void)
138 140 {
139 141 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
140 142
141 143 mod = PyImport_ImportModule("mercurial.error");
142 144 if (mod == NULL) {
143 145 goto cleanup;
144 146 }
145 147
146 148 dict = PyModule_GetDict(mod);
147 149 if (dict == NULL) {
148 150 goto cleanup;
149 151 }
150 152 Py_INCREF(dict);
151 153
152 154 errclass = PyDict_GetItemString(dict, "RevlogError");
153 155 if (errclass == NULL) {
154 156 PyErr_SetString(PyExc_SystemError,
155 157 "could not find RevlogError");
156 158 goto cleanup;
157 159 }
158 160
159 161 /* value of exception is ignored by callers */
160 162 PyErr_SetString(errclass, "RevlogError");
161 163
162 164 cleanup:
163 165 Py_XDECREF(dict);
164 166 Py_XDECREF(mod);
165 167 }
166 168
167 169 /*
168 170 * Return a pointer to the beginning of a RevlogNG record.
169 171 */
170 172 static const char *index_deref(indexObject *self, Py_ssize_t pos)
171 173 {
172 174 if (pos >= self->length)
173 175 return self->added + (pos - self->length) * self->entry_size;
174 176
175 177 if (self->inlined && pos > 0) {
176 178 if (self->offsets == NULL) {
177 179 Py_ssize_t ret;
178 180 self->offsets =
179 181 PyMem_Malloc(self->length * sizeof(*self->offsets));
180 182 if (self->offsets == NULL)
181 183 return (const char *)PyErr_NoMemory();
182 184 ret = inline_scan(self, self->offsets);
183 185 if (ret == -1) {
184 186 return NULL;
185 187 };
186 188 }
187 189 return self->offsets[pos];
188 190 }
189 191
190 192 return (const char *)(self->buf.buf) + pos * self->entry_size;
191 193 }
192 194
193 195 /*
194 196 * Get parents of the given rev.
195 197 *
196 198 * The specified rev must be valid and must not be nullrev. A returned
197 199 * parent revision may be nullrev, but is guaranteed to be in valid range.
198 200 */
199 201 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
200 202 int maxrev)
201 203 {
202 204 const char *data = index_deref(self, rev);
203 205
204 206 ps[0] = getbe32(data + 24);
205 207 ps[1] = getbe32(data + 28);
206 208
207 209 /* If index file is corrupted, ps[] may point to invalid revisions. So
208 210 * there is a risk of buffer overflow to trust them unconditionally. */
209 211 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
210 212 PyErr_SetString(PyExc_ValueError, "parent out of range");
211 213 return -1;
212 214 }
213 215 return 0;
214 216 }
215 217
216 218 /*
217 219 * Get parents of the given rev.
218 220 *
219 221 * If the specified rev is out of range, IndexError will be raised. If the
220 222 * revlog entry is corrupted, ValueError may be raised.
221 223 *
222 224 * Returns 0 on success or -1 on failure.
223 225 */
224 226 static int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps)
225 227 {
226 228 int tiprev;
227 229 if (!op || !HgRevlogIndex_Check(op) || !ps) {
228 230 PyErr_BadInternalCall();
229 231 return -1;
230 232 }
231 233 tiprev = (int)index_length((indexObject *)op) - 1;
232 234 if (rev < -1 || rev > tiprev) {
233 235 PyErr_Format(PyExc_IndexError, "rev out of range: %d", rev);
234 236 return -1;
235 237 } else if (rev == -1) {
236 238 ps[0] = ps[1] = -1;
237 239 return 0;
238 240 } else {
239 241 return index_get_parents((indexObject *)op, rev, ps, tiprev);
240 242 }
241 243 }
242 244
243 245 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
244 246 {
245 247 const char *data;
246 248 uint64_t offset;
247 249
248 250 if (rev == nullrev)
249 251 return 0;
250 252
251 253 data = index_deref(self, rev);
252 254 offset = getbe32(data + 4);
253 255 if (rev == 0) {
254 256 /* mask out version number for the first entry */
255 257 offset &= 0xFFFF;
256 258 } else {
257 259 uint32_t offset_high = getbe32(data);
258 260 offset |= ((uint64_t)offset_high) << 32;
259 261 }
260 262 return (int64_t)(offset >> 16);
261 263 }
262 264
263 265 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
264 266 {
265 267 const char *data;
266 268 int tmp;
267 269
268 270 if (rev == nullrev)
269 271 return 0;
270 272
271 273 data = index_deref(self, rev);
272 274
273 275 tmp = (int)getbe32(data + 8);
274 276 if (tmp < 0) {
275 277 PyErr_Format(PyExc_OverflowError,
276 278 "revlog entry size out of bound (%d)", tmp);
277 279 return -1;
278 280 }
279 281 return tmp;
280 282 }
281 283
282 284 /*
283 285 * RevlogNG format (all in big endian, data may be inlined):
284 286 * 6 bytes: offset
285 287 * 2 bytes: flags
286 288 * 4 bytes: compressed length
287 289 * 4 bytes: uncompressed length
288 290 * 4 bytes: base revision
289 291 * 4 bytes: link revision
290 292 * 4 bytes: parent 1 revision
291 293 * 4 bytes: parent 2 revision
292 294 * 32 bytes: nodeid (only 20 bytes used with SHA-1)
293 295 */
294 296 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
295 297 {
296 298 uint64_t offset_flags, sidedata_offset;
297 299 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
298 300 sidedata_comp_len;
299 301 char data_comp_mode, sidedata_comp_mode;
300 302 const char *c_node_id;
301 303 const char *data;
302 304 Py_ssize_t length = index_length(self);
303 305
304 306 if (pos == nullrev) {
305 307 Py_INCREF(self->nullentry);
306 308 return self->nullentry;
307 309 }
308 310
309 311 if (pos < 0 || pos >= length) {
310 312 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
311 313 return NULL;
312 314 }
313 315
314 316 data = index_deref(self, pos);
315 317 if (data == NULL)
316 318 return NULL;
317 319
318 320 offset_flags = getbe32(data + 4);
319 321 /*
320 322 * The first entry on-disk needs the version number masked out,
321 323 * but this doesn't apply if entries are added to an empty index.
322 324 */
323 325 if (self->length && pos == 0)
324 326 offset_flags &= 0xFFFF;
325 327 else {
326 328 uint32_t offset_high = getbe32(data);
327 329 offset_flags |= ((uint64_t)offset_high) << 32;
328 330 }
329 331
330 332 comp_len = getbe32(data + 8);
331 333 uncomp_len = getbe32(data + 12);
332 334 base_rev = getbe32(data + 16);
333 335 link_rev = getbe32(data + 20);
334 336 parent_1 = getbe32(data + 24);
335 337 parent_2 = getbe32(data + 28);
336 338 c_node_id = data + 32;
337 339
338 340 if (self->format_version == format_v1) {
339 341 sidedata_offset = 0;
340 342 sidedata_comp_len = 0;
341 343 data_comp_mode = comp_mode_inline;
342 344 sidedata_comp_mode = comp_mode_inline;
343 345 } else {
344 346 sidedata_offset = getbe64(data + 64);
345 347 sidedata_comp_len = getbe32(data + 72);
346 348 data_comp_mode = data[76] & 3;
347 349 sidedata_comp_mode = ((data[76] >> 2) & 3);
348 350 }
349 351
350 352 return Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
351 353 base_rev, link_rev, parent_1, parent_2, c_node_id,
352 354 self->nodelen, sidedata_offset, sidedata_comp_len,
353 355 data_comp_mode, sidedata_comp_mode);
354 356 }
355 357 /*
356 358 * Pack header information in binary
357 359 */
358 360 static PyObject *index_pack_header(indexObject *self, PyObject *args)
359 361 {
360 362 int header;
361 363 char out[4];
362 364 if (!PyArg_ParseTuple(args, "I", &header)) {
363 365 return NULL;
364 366 }
365 367 if (self->format_version != format_v1) {
366 368 PyErr_Format(PyExc_RuntimeError,
367 369 "version header should go in the docket, not the "
368 370 "index: %lu",
369 371 header);
370 372 return NULL;
371 373 }
372 374 putbe32(header, out);
373 375 return PyBytes_FromStringAndSize(out, 4);
374 376 }
375 377 /*
376 378 * Return the raw binary string representing a revision
377 379 */
378 380 static PyObject *index_entry_binary(indexObject *self, PyObject *value)
379 381 {
380 382 long rev;
381 383 const char *data;
382 384 Py_ssize_t length = index_length(self);
383 385
384 386 if (!pylong_to_long(value, &rev)) {
385 387 return NULL;
386 388 }
387 389 if (rev < 0 || rev >= length) {
388 390 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
389 391 rev);
390 392 return NULL;
391 393 };
392 394
393 395 data = index_deref(self, rev);
394 396 if (data == NULL)
395 397 return NULL;
396 398 if (rev == 0 && self->format_version == format_v1) {
397 399 /* the header is eating the start of the first entry */
398 400 return PyBytes_FromStringAndSize(data + 4,
399 401 self->entry_size - 4);
400 402 }
401 403 return PyBytes_FromStringAndSize(data, self->entry_size);
402 404 }
403 405
404 406 /*
405 407 * Return the hash of node corresponding to the given rev.
406 408 */
407 409 static const char *index_node(indexObject *self, Py_ssize_t pos)
408 410 {
409 411 Py_ssize_t length = index_length(self);
410 412 const char *data;
411 413
412 414 if (pos == nullrev)
413 415 return nullid;
414 416
415 417 if (pos >= length)
416 418 return NULL;
417 419
418 420 data = index_deref(self, pos);
419 421 return data ? data + 32 : NULL;
420 422 }
421 423
422 424 /*
423 425 * Return the hash of the node corresponding to the given rev. The
424 426 * rev is assumed to be existing. If not, an exception is set.
425 427 */
426 428 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
427 429 {
428 430 const char *node = index_node(self, pos);
429 431 if (node == NULL) {
430 432 PyErr_Format(PyExc_IndexError, "could not access rev %d",
431 433 (int)pos);
432 434 }
433 435 return node;
434 436 }
435 437
436 438 static int nt_insert(nodetree *self, const char *node, int rev);
437 439
438 440 static int node_check(Py_ssize_t nodelen, PyObject *obj, char **node)
439 441 {
440 442 Py_ssize_t thisnodelen;
441 443 if (PyBytes_AsStringAndSize(obj, node, &thisnodelen) == -1)
442 444 return -1;
443 445 if (nodelen == thisnodelen)
444 446 return 0;
445 447 PyErr_Format(PyExc_ValueError, "node len %zd != expected node len %zd",
446 448 thisnodelen, nodelen);
447 449 return -1;
448 450 }
449 451
450 452 static PyObject *index_append(indexObject *self, PyObject *obj)
451 453 {
452 454 uint64_t offset_flags, sidedata_offset;
453 455 int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
454 456 char data_comp_mode, sidedata_comp_mode;
455 457 Py_ssize_t c_node_id_len, sidedata_comp_len;
456 458 const char *c_node_id;
457 459 char comp_field;
458 460 char *data;
459 461
460 462 if (!PyArg_ParseTuple(obj, tuple_format, &offset_flags, &comp_len,
461 463 &uncomp_len, &base_rev, &link_rev, &parent_1,
462 464 &parent_2, &c_node_id, &c_node_id_len,
463 465 &sidedata_offset, &sidedata_comp_len,
464 466 &data_comp_mode, &sidedata_comp_mode)) {
465 467 PyErr_SetString(PyExc_TypeError, "11-tuple required");
466 468 return NULL;
467 469 }
468 470
469 471 if (c_node_id_len != self->nodelen) {
470 472 PyErr_SetString(PyExc_TypeError, "invalid node");
471 473 return NULL;
472 474 }
473 475 if (self->format_version == format_v1) {
474 476
475 477 if (data_comp_mode != comp_mode_inline) {
476 478 PyErr_Format(PyExc_ValueError,
477 479 "invalid data compression mode: %i",
478 480 data_comp_mode);
479 481 return NULL;
480 482 }
481 483 if (sidedata_comp_mode != comp_mode_inline) {
482 484 PyErr_Format(PyExc_ValueError,
483 485 "invalid sidedata compression mode: %i",
484 486 sidedata_comp_mode);
485 487 return NULL;
486 488 }
487 489 }
488 490
489 491 if (self->new_length == self->added_length) {
490 492 size_t new_added_length =
491 493 self->added_length ? self->added_length * 2 : 4096;
492 494 void *new_added = PyMem_Realloc(
493 495 self->added, new_added_length * self->entry_size);
494 496 if (!new_added)
495 497 return PyErr_NoMemory();
496 498 self->added = new_added;
497 499 self->added_length = new_added_length;
498 500 }
499 501 rev = self->length + self->new_length;
500 502 data = self->added + self->entry_size * self->new_length++;
501 503 putbe32(offset_flags >> 32, data);
502 504 putbe32(offset_flags & 0xffffffffU, data + 4);
503 505 putbe32(comp_len, data + 8);
504 506 putbe32(uncomp_len, data + 12);
505 507 putbe32(base_rev, data + 16);
506 508 putbe32(link_rev, data + 20);
507 509 putbe32(parent_1, data + 24);
508 510 putbe32(parent_2, data + 28);
509 511 memcpy(data + 32, c_node_id, c_node_id_len);
510 512 /* Padding since SHA-1 is only 20 bytes for now */
511 513 memset(data + 32 + c_node_id_len, 0, 32 - c_node_id_len);
512 514 if (self->format_version == format_v2) {
513 515 putbe64(sidedata_offset, data + 64);
514 516 putbe32(sidedata_comp_len, data + 72);
515 517 comp_field = data_comp_mode & 3;
516 518 comp_field = comp_field | (sidedata_comp_mode & 3) << 2;
517 519 data[76] = comp_field;
518 520 /* Padding for 96 bytes alignment */
519 521 memset(data + 77, 0, self->entry_size - 77);
520 522 }
521 523
522 524 if (self->ntinitialized)
523 525 nt_insert(&self->nt, c_node_id, rev);
524 526
525 527 Py_CLEAR(self->headrevs);
526 528 Py_RETURN_NONE;
527 529 }
528 530
529 531 /* Replace an existing index entry's sidedata offset and length with new ones.
530 532 This cannot be used outside of the context of sidedata rewriting,
531 533 inside the transaction that creates the given revision. */
532 534 static PyObject *index_replace_sidedata_info(indexObject *self, PyObject *args)
533 535 {
534 536 uint64_t offset_flags, sidedata_offset;
535 537 int rev;
536 538 char comp_mode;
537 539 Py_ssize_t sidedata_comp_len;
538 540 char *data;
539 541 #if LONG_MAX == 0x7fffffffL
540 542 const char *const sidedata_format = PY23("nKiKB", "nKiKB");
541 543 #else
542 544 const char *const sidedata_format = PY23("nkikB", "nkikB");
543 545 #endif
544 546
545 547 if (self->entry_size == v1_entry_size || self->inlined) {
546 548 /*
547 549 There is a bug in the transaction handling when going from an
548 550 inline revlog to a separate index and data file. Turn it off until
549 551 it's fixed, since v2 revlogs sometimes get rewritten on exchange.
550 552 See issue6485.
551 553 */
552 554 raise_revlog_error();
553 555 return NULL;
554 556 }
555 557
556 558 if (!PyArg_ParseTuple(args, sidedata_format, &rev, &sidedata_offset,
557 559 &sidedata_comp_len, &offset_flags, &comp_mode))
558 560 return NULL;
559 561
560 562 if (rev < 0 || rev >= index_length(self)) {
561 563 PyErr_SetString(PyExc_IndexError, "revision outside index");
562 564 return NULL;
563 565 }
564 566 if (rev < self->length) {
565 567 PyErr_SetString(
566 568 PyExc_IndexError,
567 569 "cannot rewrite entries outside of this transaction");
568 570 return NULL;
569 571 }
570 572
571 573 /* Find the newly added node, offset from the "already on-disk" length
572 574 */
573 575 data = self->added + self->entry_size * (rev - self->length);
574 576 putbe64(offset_flags, data);
575 577 putbe64(sidedata_offset, data + 64);
576 578 putbe32(sidedata_comp_len, data + 72);
577 579 data[76] = (data[76] & ~(3 << 2)) | ((comp_mode & 3) << 2);
578 580
579 581 Py_RETURN_NONE;
580 582 }
581 583
582 584 static PyObject *index_stats(indexObject *self)
583 585 {
584 586 PyObject *obj = PyDict_New();
585 587 PyObject *s = NULL;
586 588 PyObject *t = NULL;
587 589
588 590 if (obj == NULL)
589 591 return NULL;
590 592
591 593 #define istat(__n, __d) \
592 594 do { \
593 595 s = PyBytes_FromString(__d); \
594 596 t = PyInt_FromSsize_t(self->__n); \
595 597 if (!s || !t) \
596 598 goto bail; \
597 599 if (PyDict_SetItem(obj, s, t) == -1) \
598 600 goto bail; \
599 601 Py_CLEAR(s); \
600 602 Py_CLEAR(t); \
601 603 } while (0)
602 604
603 605 if (self->added_length)
604 606 istat(new_length, "index entries added");
605 607 istat(length, "revs in memory");
606 608 istat(ntlookups, "node trie lookups");
607 609 istat(ntmisses, "node trie misses");
608 610 istat(ntrev, "node trie last rev scanned");
609 611 if (self->ntinitialized) {
610 612 istat(nt.capacity, "node trie capacity");
611 613 istat(nt.depth, "node trie depth");
612 614 istat(nt.length, "node trie count");
613 615 istat(nt.splits, "node trie splits");
614 616 }
615 617
616 618 #undef istat
617 619
618 620 return obj;
619 621
620 622 bail:
621 623 Py_XDECREF(obj);
622 624 Py_XDECREF(s);
623 625 Py_XDECREF(t);
624 626 return NULL;
625 627 }
626 628
627 629 /*
628 630 * When we cache a list, we want to be sure the caller can't mutate
629 631 * the cached copy.
630 632 */
631 633 static PyObject *list_copy(PyObject *list)
632 634 {
633 635 Py_ssize_t len = PyList_GET_SIZE(list);
634 636 PyObject *newlist = PyList_New(len);
635 637 Py_ssize_t i;
636 638
637 639 if (newlist == NULL)
638 640 return NULL;
639 641
640 642 for (i = 0; i < len; i++) {
641 643 PyObject *obj = PyList_GET_ITEM(list, i);
642 644 Py_INCREF(obj);
643 645 PyList_SET_ITEM(newlist, i, obj);
644 646 }
645 647
646 648 return newlist;
647 649 }
648 650
649 651 static int check_filter(PyObject *filter, Py_ssize_t arg)
650 652 {
651 653 if (filter) {
652 654 PyObject *arglist, *result;
653 655 int isfiltered;
654 656
655 657 arglist = Py_BuildValue("(n)", arg);
656 658 if (!arglist) {
657 659 return -1;
658 660 }
659 661
660 662 result = PyObject_Call(filter, arglist, NULL);
661 663 Py_DECREF(arglist);
662 664 if (!result) {
663 665 return -1;
664 666 }
665 667
666 668 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
667 669 * same as this function, so we can just return it directly.*/
668 670 isfiltered = PyObject_IsTrue(result);
669 671 Py_DECREF(result);
670 672 return isfiltered;
671 673 } else {
672 674 return 0;
673 675 }
674 676 }
675 677
676 678 static inline void set_phase_from_parents(char *phases, int parent_1,
677 679 int parent_2, Py_ssize_t i)
678 680 {
679 681 if (parent_1 >= 0 && phases[parent_1] > phases[i])
680 682 phases[i] = phases[parent_1];
681 683 if (parent_2 >= 0 && phases[parent_2] > phases[i])
682 684 phases[i] = phases[parent_2];
683 685 }
684 686
685 687 static PyObject *reachableroots2(indexObject *self, PyObject *args)
686 688 {
687 689
688 690 /* Input */
689 691 long minroot;
690 692 PyObject *includepatharg = NULL;
691 693 int includepath = 0;
692 694 /* heads and roots are lists */
693 695 PyObject *heads = NULL;
694 696 PyObject *roots = NULL;
695 697 PyObject *reachable = NULL;
696 698
697 699 PyObject *val;
698 700 Py_ssize_t len = index_length(self);
699 701 long revnum;
700 702 Py_ssize_t k;
701 703 Py_ssize_t i;
702 704 Py_ssize_t l;
703 705 int r;
704 706 int parents[2];
705 707
706 708 /* Internal data structure:
707 709 * tovisit: array of length len+1 (all revs + nullrev), filled upto
708 710 * lentovisit
709 711 *
710 712 * revstates: array of length len+1 (all revs + nullrev) */
711 713 int *tovisit = NULL;
712 714 long lentovisit = 0;
713 715 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
714 716 char *revstates = NULL;
715 717
716 718 /* Get arguments */
717 719 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
718 720 &PyList_Type, &roots, &PyBool_Type,
719 721 &includepatharg))
720 722 goto bail;
721 723
722 724 if (includepatharg == Py_True)
723 725 includepath = 1;
724 726
725 727 /* Initialize return set */
726 728 reachable = PyList_New(0);
727 729 if (reachable == NULL)
728 730 goto bail;
729 731
730 732 /* Initialize internal datastructures */
731 733 tovisit = (int *)malloc((len + 1) * sizeof(int));
732 734 if (tovisit == NULL) {
733 735 PyErr_NoMemory();
734 736 goto bail;
735 737 }
736 738
737 739 revstates = (char *)calloc(len + 1, 1);
738 740 if (revstates == NULL) {
739 741 PyErr_NoMemory();
740 742 goto bail;
741 743 }
742 744
743 745 l = PyList_GET_SIZE(roots);
744 746 for (i = 0; i < l; i++) {
745 747 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
746 748 if (revnum == -1 && PyErr_Occurred())
747 749 goto bail;
748 750 /* If root is out of range, e.g. wdir(), it must be unreachable
749 751 * from heads. So we can just ignore it. */
750 752 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
751 753 continue;
752 754 revstates[revnum + 1] |= RS_ROOT;
753 755 }
754 756
755 757 /* Populate tovisit with all the heads */
756 758 l = PyList_GET_SIZE(heads);
757 759 for (i = 0; i < l; i++) {
758 760 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
759 761 if (revnum == -1 && PyErr_Occurred())
760 762 goto bail;
761 763 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
762 764 PyErr_SetString(PyExc_IndexError, "head out of range");
763 765 goto bail;
764 766 }
765 767 if (!(revstates[revnum + 1] & RS_SEEN)) {
766 768 tovisit[lentovisit++] = (int)revnum;
767 769 revstates[revnum + 1] |= RS_SEEN;
768 770 }
769 771 }
770 772
771 773 /* Visit the tovisit list and find the reachable roots */
772 774 k = 0;
773 775 while (k < lentovisit) {
774 776 /* Add the node to reachable if it is a root*/
775 777 revnum = tovisit[k++];
776 778 if (revstates[revnum + 1] & RS_ROOT) {
777 779 revstates[revnum + 1] |= RS_REACHABLE;
778 780 val = PyInt_FromLong(revnum);
779 781 if (val == NULL)
780 782 goto bail;
781 783 r = PyList_Append(reachable, val);
782 784 Py_DECREF(val);
783 785 if (r < 0)
784 786 goto bail;
785 787 if (includepath == 0)
786 788 continue;
787 789 }
788 790
789 791 /* Add its parents to the list of nodes to visit */
790 792 if (revnum == nullrev)
791 793 continue;
792 794 r = index_get_parents(self, revnum, parents, (int)len - 1);
793 795 if (r < 0)
794 796 goto bail;
795 797 for (i = 0; i < 2; i++) {
796 798 if (!(revstates[parents[i] + 1] & RS_SEEN) &&
797 799 parents[i] >= minroot) {
798 800 tovisit[lentovisit++] = parents[i];
799 801 revstates[parents[i] + 1] |= RS_SEEN;
800 802 }
801 803 }
802 804 }
803 805
804 806 /* Find all the nodes in between the roots we found and the heads
805 807 * and add them to the reachable set */
806 808 if (includepath == 1) {
807 809 long minidx = minroot;
808 810 if (minidx < 0)
809 811 minidx = 0;
810 812 for (i = minidx; i < len; i++) {
811 813 if (!(revstates[i + 1] & RS_SEEN))
812 814 continue;
813 815 r = index_get_parents(self, i, parents, (int)len - 1);
814 816 /* Corrupted index file, error is set from
815 817 * index_get_parents */
816 818 if (r < 0)
817 819 goto bail;
818 820 if (((revstates[parents[0] + 1] |
819 821 revstates[parents[1] + 1]) &
820 822 RS_REACHABLE) &&
821 823 !(revstates[i + 1] & RS_REACHABLE)) {
822 824 revstates[i + 1] |= RS_REACHABLE;
823 825 val = PyInt_FromSsize_t(i);
824 826 if (val == NULL)
825 827 goto bail;
826 828 r = PyList_Append(reachable, val);
827 829 Py_DECREF(val);
828 830 if (r < 0)
829 831 goto bail;
830 832 }
831 833 }
832 834 }
833 835
834 836 free(revstates);
835 837 free(tovisit);
836 838 return reachable;
837 839 bail:
838 840 Py_XDECREF(reachable);
839 841 free(revstates);
840 842 free(tovisit);
841 843 return NULL;
842 844 }
843 845
844 846 static int add_roots_get_min(indexObject *self, PyObject *roots, char *phases,
845 847 char phase)
846 848 {
847 849 Py_ssize_t len = index_length(self);
848 850 PyObject *item;
849 851 PyObject *iterator;
850 852 int rev, minrev = -1;
851 853 char *node;
852 854
853 855 if (!PySet_Check(roots)) {
854 856 PyErr_SetString(PyExc_TypeError,
855 857 "roots must be a set of nodes");
856 858 return -2;
857 859 }
858 860 iterator = PyObject_GetIter(roots);
859 861 if (iterator == NULL)
860 862 return -2;
861 863 while ((item = PyIter_Next(iterator))) {
862 864 if (node_check(self->nodelen, item, &node) == -1)
863 865 goto failed;
864 866 rev = index_find_node(self, node);
865 867 /* null is implicitly public, so negative is invalid */
866 868 if (rev < 0 || rev >= len)
867 869 goto failed;
868 870 phases[rev] = phase;
869 871 if (minrev == -1 || minrev > rev)
870 872 minrev = rev;
871 873 Py_DECREF(item);
872 874 }
873 875 Py_DECREF(iterator);
874 876 return minrev;
875 877 failed:
876 878 Py_DECREF(iterator);
877 879 Py_DECREF(item);
878 880 return -2;
879 881 }
880 882
881 883 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
882 884 {
883 885 /* 0: public (untracked), 1: draft, 2: secret, 32: archive,
884 886 96: internal */
885 887 static const char trackedphases[] = {1, 2, 32, 96};
886 888 PyObject *roots = Py_None;
887 889 PyObject *phasesetsdict = NULL;
888 890 PyObject *phasesets[4] = {NULL, NULL, NULL, NULL};
889 891 Py_ssize_t len = index_length(self);
890 892 char *phases = NULL;
891 893 int minphaserev = -1, rev, i;
892 894 const int numphases = (int)(sizeof(phasesets) / sizeof(phasesets[0]));
893 895
894 896 if (!PyArg_ParseTuple(args, "O", &roots))
895 897 return NULL;
896 898 if (roots == NULL || !PyDict_Check(roots)) {
897 899 PyErr_SetString(PyExc_TypeError, "roots must be a dictionary");
898 900 return NULL;
899 901 }
900 902
901 903 phases = calloc(len, 1);
902 904 if (phases == NULL) {
903 905 PyErr_NoMemory();
904 906 return NULL;
905 907 }
906 908
907 909 for (i = 0; i < numphases; ++i) {
908 910 PyObject *pyphase = PyInt_FromLong(trackedphases[i]);
909 911 PyObject *phaseroots = NULL;
910 912 if (pyphase == NULL)
911 913 goto release;
912 914 phaseroots = PyDict_GetItem(roots, pyphase);
913 915 Py_DECREF(pyphase);
914 916 if (phaseroots == NULL)
915 917 continue;
916 918 rev = add_roots_get_min(self, phaseroots, phases,
917 919 trackedphases[i]);
918 920 if (rev == -2)
919 921 goto release;
920 922 if (rev != -1 && (minphaserev == -1 || rev < minphaserev))
921 923 minphaserev = rev;
922 924 }
923 925
924 926 for (i = 0; i < numphases; ++i) {
925 927 phasesets[i] = PySet_New(NULL);
926 928 if (phasesets[i] == NULL)
927 929 goto release;
928 930 }
929 931
930 932 if (minphaserev == -1)
931 933 minphaserev = len;
932 934 for (rev = minphaserev; rev < len; ++rev) {
933 935 PyObject *pyphase = NULL;
934 936 PyObject *pyrev = NULL;
935 937 int parents[2];
936 938 /*
937 939 * The parent lookup could be skipped for phaseroots, but
938 940 * phase --force would historically not recompute them
939 941 * correctly, leaving descendents with a lower phase around.
940 942 * As such, unconditionally recompute the phase.
941 943 */
942 944 if (index_get_parents(self, rev, parents, (int)len - 1) < 0)
943 945 goto release;
944 946 set_phase_from_parents(phases, parents[0], parents[1], rev);
945 947 switch (phases[rev]) {
946 948 case 0:
947 949 continue;
948 950 case 1:
949 951 pyphase = phasesets[0];
950 952 break;
951 953 case 2:
952 954 pyphase = phasesets[1];
953 955 break;
954 956 case 32:
955 957 pyphase = phasesets[2];
956 958 break;
957 959 case 96:
958 960 pyphase = phasesets[3];
959 961 break;
960 962 default:
961 963 /* this should never happen since the phase number is
962 964 * specified by this function. */
963 965 PyErr_SetString(PyExc_SystemError,
964 966 "bad phase number in internal list");
965 967 goto release;
966 968 }
967 969 pyrev = PyInt_FromLong(rev);
968 970 if (pyrev == NULL)
969 971 goto release;
970 972 if (PySet_Add(pyphase, pyrev) == -1) {
971 973 Py_DECREF(pyrev);
972 974 goto release;
973 975 }
974 976 Py_DECREF(pyrev);
975 977 }
976 978
977 979 phasesetsdict = _dict_new_presized(numphases);
978 980 if (phasesetsdict == NULL)
979 981 goto release;
980 982 for (i = 0; i < numphases; ++i) {
981 983 PyObject *pyphase = PyInt_FromLong(trackedphases[i]);
982 984 if (pyphase == NULL)
983 985 goto release;
984 986 if (PyDict_SetItem(phasesetsdict, pyphase, phasesets[i]) ==
985 987 -1) {
986 988 Py_DECREF(pyphase);
987 989 goto release;
988 990 }
989 991 Py_DECREF(phasesets[i]);
990 992 phasesets[i] = NULL;
991 993 }
992 994
993 995 return Py_BuildValue("nN", len, phasesetsdict);
994 996
995 997 release:
996 998 for (i = 0; i < numphases; ++i)
997 999 Py_XDECREF(phasesets[i]);
998 1000 Py_XDECREF(phasesetsdict);
999 1001
1000 1002 free(phases);
1001 1003 return NULL;
1002 1004 }
1003 1005
1004 1006 static PyObject *index_headrevs(indexObject *self, PyObject *args)
1005 1007 {
1006 1008 Py_ssize_t i, j, len;
1007 1009 char *nothead = NULL;
1008 1010 PyObject *heads = NULL;
1009 1011 PyObject *filter = NULL;
1010 1012 PyObject *filteredrevs = Py_None;
1011 1013
1012 1014 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
1013 1015 return NULL;
1014 1016 }
1015 1017
1016 1018 if (self->headrevs && filteredrevs == self->filteredrevs)
1017 1019 return list_copy(self->headrevs);
1018 1020
1019 1021 Py_DECREF(self->filteredrevs);
1020 1022 self->filteredrevs = filteredrevs;
1021 1023 Py_INCREF(filteredrevs);
1022 1024
1023 1025 if (filteredrevs != Py_None) {
1024 1026 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
1025 1027 if (!filter) {
1026 1028 PyErr_SetString(
1027 1029 PyExc_TypeError,
1028 1030 "filteredrevs has no attribute __contains__");
1029 1031 goto bail;
1030 1032 }
1031 1033 }
1032 1034
1033 1035 len = index_length(self);
1034 1036 heads = PyList_New(0);
1035 1037 if (heads == NULL)
1036 1038 goto bail;
1037 1039 if (len == 0) {
1038 1040 PyObject *nullid = PyInt_FromLong(-1);
1039 1041 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
1040 1042 Py_XDECREF(nullid);
1041 1043 goto bail;
1042 1044 }
1043 1045 goto done;
1044 1046 }
1045 1047
1046 1048 nothead = calloc(len, 1);
1047 1049 if (nothead == NULL) {
1048 1050 PyErr_NoMemory();
1049 1051 goto bail;
1050 1052 }
1051 1053
1052 1054 for (i = len - 1; i >= 0; i--) {
1053 1055 int isfiltered;
1054 1056 int parents[2];
1055 1057
1056 1058 /* If nothead[i] == 1, it means we've seen an unfiltered child
1057 1059 * of this node already, and therefore this node is not
1058 1060 * filtered. So we can skip the expensive check_filter step.
1059 1061 */
1060 1062 if (nothead[i] != 1) {
1061 1063 isfiltered = check_filter(filter, i);
1062 1064 if (isfiltered == -1) {
1063 1065 PyErr_SetString(PyExc_TypeError,
1064 1066 "unable to check filter");
1065 1067 goto bail;
1066 1068 }
1067 1069
1068 1070 if (isfiltered) {
1069 1071 nothead[i] = 1;
1070 1072 continue;
1071 1073 }
1072 1074 }
1073 1075
1074 1076 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
1075 1077 goto bail;
1076 1078 for (j = 0; j < 2; j++) {
1077 1079 if (parents[j] >= 0)
1078 1080 nothead[parents[j]] = 1;
1079 1081 }
1080 1082 }
1081 1083
1082 1084 for (i = 0; i < len; i++) {
1083 1085 PyObject *head;
1084 1086
1085 1087 if (nothead[i])
1086 1088 continue;
1087 1089 head = PyInt_FromSsize_t(i);
1088 1090 if (head == NULL || PyList_Append(heads, head) == -1) {
1089 1091 Py_XDECREF(head);
1090 1092 goto bail;
1091 1093 }
1092 1094 }
1093 1095
1094 1096 done:
1095 1097 self->headrevs = heads;
1096 1098 Py_XDECREF(filter);
1097 1099 free(nothead);
1098 1100 return list_copy(self->headrevs);
1099 1101 bail:
1100 1102 Py_XDECREF(filter);
1101 1103 Py_XDECREF(heads);
1102 1104 free(nothead);
1103 1105 return NULL;
1104 1106 }
1105 1107
1106 1108 /**
1107 1109 * Obtain the base revision index entry.
1108 1110 *
1109 1111 * Callers must ensure that rev >= 0 or illegal memory access may occur.
1110 1112 */
1111 1113 static inline int index_baserev(indexObject *self, int rev)
1112 1114 {
1113 1115 const char *data;
1114 1116 int result;
1115 1117
1116 1118 data = index_deref(self, rev);
1117 1119 if (data == NULL)
1118 1120 return -2;
1119 1121 result = getbe32(data + 16);
1120 1122
1121 1123 if (result > rev) {
1122 1124 PyErr_Format(
1123 1125 PyExc_ValueError,
1124 1126 "corrupted revlog, revision base above revision: %d, %d",
1125 1127 rev, result);
1126 1128 return -2;
1127 1129 }
1128 1130 if (result < -1) {
1129 1131 PyErr_Format(
1130 1132 PyExc_ValueError,
1131 1133 "corrupted revlog, revision base out of range: %d, %d", rev,
1132 1134 result);
1133 1135 return -2;
1134 1136 }
1135 1137 return result;
1136 1138 }
1137 1139
1138 1140 /**
1139 1141 * Find if a revision is a snapshot or not
1140 1142 *
1141 1143 * Only relevant for sparse-revlog case.
1142 1144 * Callers must ensure that rev is in a valid range.
1143 1145 */
1144 1146 static int index_issnapshotrev(indexObject *self, Py_ssize_t rev)
1145 1147 {
1146 1148 int ps[2];
1147 1149 Py_ssize_t base;
1148 1150 while (rev >= 0) {
1149 1151 base = (Py_ssize_t)index_baserev(self, rev);
1150 1152 if (base == rev) {
1151 1153 base = -1;
1152 1154 }
1153 1155 if (base == -2) {
1154 1156 assert(PyErr_Occurred());
1155 1157 return -1;
1156 1158 }
1157 1159 if (base == -1) {
1158 1160 return 1;
1159 1161 }
1160 1162 if (index_get_parents(self, rev, ps, (int)rev) < 0) {
1161 1163 assert(PyErr_Occurred());
1162 1164 return -1;
1163 1165 };
1164 1166 if (base == ps[0] || base == ps[1]) {
1165 1167 return 0;
1166 1168 }
1167 1169 rev = base;
1168 1170 }
1169 1171 return rev == -1;
1170 1172 }
1171 1173
1172 1174 static PyObject *index_issnapshot(indexObject *self, PyObject *value)
1173 1175 {
1174 1176 long rev;
1175 1177 int issnap;
1176 1178 Py_ssize_t length = index_length(self);
1177 1179
1178 1180 if (!pylong_to_long(value, &rev)) {
1179 1181 return NULL;
1180 1182 }
1181 1183 if (rev < -1 || rev >= length) {
1182 1184 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
1183 1185 rev);
1184 1186 return NULL;
1185 1187 };
1186 1188 issnap = index_issnapshotrev(self, (Py_ssize_t)rev);
1187 1189 if (issnap < 0) {
1188 1190 return NULL;
1189 1191 };
1190 1192 return PyBool_FromLong((long)issnap);
1191 1193 }
1192 1194
1193 1195 static PyObject *index_findsnapshots(indexObject *self, PyObject *args)
1194 1196 {
1195 1197 Py_ssize_t start_rev;
1196 1198 PyObject *cache;
1197 1199 Py_ssize_t base;
1198 1200 Py_ssize_t rev;
1199 1201 PyObject *key = NULL;
1200 1202 PyObject *value = NULL;
1201 1203 const Py_ssize_t length = index_length(self);
1202 1204 if (!PyArg_ParseTuple(args, "O!n", &PyDict_Type, &cache, &start_rev)) {
1203 1205 return NULL;
1204 1206 }
1205 1207 for (rev = start_rev; rev < length; rev++) {
1206 1208 int issnap;
1207 1209 PyObject *allvalues = NULL;
1208 1210 issnap = index_issnapshotrev(self, rev);
1209 1211 if (issnap < 0) {
1210 1212 goto bail;
1211 1213 }
1212 1214 if (issnap == 0) {
1213 1215 continue;
1214 1216 }
1215 1217 base = (Py_ssize_t)index_baserev(self, rev);
1216 1218 if (base == rev) {
1217 1219 base = -1;
1218 1220 }
1219 1221 if (base == -2) {
1220 1222 assert(PyErr_Occurred());
1221 1223 goto bail;
1222 1224 }
1223 1225 key = PyInt_FromSsize_t(base);
1224 1226 allvalues = PyDict_GetItem(cache, key);
1225 1227 if (allvalues == NULL && PyErr_Occurred()) {
1226 1228 goto bail;
1227 1229 }
1228 1230 if (allvalues == NULL) {
1229 1231 int r;
1230 1232 allvalues = PyList_New(0);
1231 1233 if (!allvalues) {
1232 1234 goto bail;
1233 1235 }
1234 1236 r = PyDict_SetItem(cache, key, allvalues);
1235 1237 Py_DECREF(allvalues);
1236 1238 if (r < 0) {
1237 1239 goto bail;
1238 1240 }
1239 1241 }
1240 1242 value = PyInt_FromSsize_t(rev);
1241 1243 if (PyList_Append(allvalues, value)) {
1242 1244 goto bail;
1243 1245 }
1244 1246 Py_CLEAR(key);
1245 1247 Py_CLEAR(value);
1246 1248 }
1247 1249 Py_RETURN_NONE;
1248 1250 bail:
1249 1251 Py_XDECREF(key);
1250 1252 Py_XDECREF(value);
1251 1253 return NULL;
1252 1254 }
1253 1255
1254 1256 static PyObject *index_deltachain(indexObject *self, PyObject *args)
1255 1257 {
1256 1258 int rev, generaldelta;
1257 1259 PyObject *stoparg;
1258 1260 int stoprev, iterrev, baserev = -1;
1259 1261 int stopped;
1260 1262 PyObject *chain = NULL, *result = NULL;
1261 1263 const Py_ssize_t length = index_length(self);
1262 1264
1263 1265 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
1264 1266 return NULL;
1265 1267 }
1266 1268
1267 1269 if (PyInt_Check(stoparg)) {
1268 1270 stoprev = (int)PyInt_AsLong(stoparg);
1269 1271 if (stoprev == -1 && PyErr_Occurred()) {
1270 1272 return NULL;
1271 1273 }
1272 1274 } else if (stoparg == Py_None) {
1273 1275 stoprev = -2;
1274 1276 } else {
1275 1277 PyErr_SetString(PyExc_ValueError,
1276 1278 "stoprev must be integer or None");
1277 1279 return NULL;
1278 1280 }
1279 1281
1280 1282 if (rev < 0 || rev >= length) {
1281 1283 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1282 1284 return NULL;
1283 1285 }
1284 1286
1285 1287 chain = PyList_New(0);
1286 1288 if (chain == NULL) {
1287 1289 return NULL;
1288 1290 }
1289 1291
1290 1292 baserev = index_baserev(self, rev);
1291 1293
1292 1294 /* This should never happen. */
1293 1295 if (baserev <= -2) {
1294 1296 /* Error should be set by index_deref() */
1295 1297 assert(PyErr_Occurred());
1296 1298 goto bail;
1297 1299 }
1298 1300
1299 1301 iterrev = rev;
1300 1302
1301 1303 while (iterrev != baserev && iterrev != stoprev) {
1302 1304 PyObject *value = PyInt_FromLong(iterrev);
1303 1305 if (value == NULL) {
1304 1306 goto bail;
1305 1307 }
1306 1308 if (PyList_Append(chain, value)) {
1307 1309 Py_DECREF(value);
1308 1310 goto bail;
1309 1311 }
1310 1312 Py_DECREF(value);
1311 1313
1312 1314 if (generaldelta) {
1313 1315 iterrev = baserev;
1314 1316 } else {
1315 1317 iterrev--;
1316 1318 }
1317 1319
1318 1320 if (iterrev < 0) {
1319 1321 break;
1320 1322 }
1321 1323
1322 1324 if (iterrev >= length) {
1323 1325 PyErr_SetString(PyExc_IndexError,
1324 1326 "revision outside index");
1325 1327 return NULL;
1326 1328 }
1327 1329
1328 1330 baserev = index_baserev(self, iterrev);
1329 1331
1330 1332 /* This should never happen. */
1331 1333 if (baserev <= -2) {
1332 1334 /* Error should be set by index_deref() */
1333 1335 assert(PyErr_Occurred());
1334 1336 goto bail;
1335 1337 }
1336 1338 }
1337 1339
1338 1340 if (iterrev == stoprev) {
1339 1341 stopped = 1;
1340 1342 } else {
1341 1343 PyObject *value = PyInt_FromLong(iterrev);
1342 1344 if (value == NULL) {
1343 1345 goto bail;
1344 1346 }
1345 1347 if (PyList_Append(chain, value)) {
1346 1348 Py_DECREF(value);
1347 1349 goto bail;
1348 1350 }
1349 1351 Py_DECREF(value);
1350 1352
1351 1353 stopped = 0;
1352 1354 }
1353 1355
1354 1356 if (PyList_Reverse(chain)) {
1355 1357 goto bail;
1356 1358 }
1357 1359
1358 1360 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
1359 1361 Py_DECREF(chain);
1360 1362 return result;
1361 1363
1362 1364 bail:
1363 1365 Py_DECREF(chain);
1364 1366 return NULL;
1365 1367 }
1366 1368
1367 1369 static inline int64_t
1368 1370 index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
1369 1371 {
1370 1372 int64_t start_offset;
1371 1373 int64_t end_offset;
1372 1374 int end_size;
1373 1375 start_offset = index_get_start(self, start_rev);
1374 1376 if (start_offset < 0) {
1375 1377 return -1;
1376 1378 }
1377 1379 end_offset = index_get_start(self, end_rev);
1378 1380 if (end_offset < 0) {
1379 1381 return -1;
1380 1382 }
1381 1383 end_size = index_get_length(self, end_rev);
1382 1384 if (end_size < 0) {
1383 1385 return -1;
1384 1386 }
1385 1387 if (end_offset < start_offset) {
1386 1388 PyErr_Format(PyExc_ValueError,
1387 1389 "corrupted revlog index: inconsistent offset "
1388 1390 "between revisions (%zd) and (%zd)",
1389 1391 start_rev, end_rev);
1390 1392 return -1;
1391 1393 }
1392 1394 return (end_offset - start_offset) + (int64_t)end_size;
1393 1395 }
1394 1396
1395 1397 /* returns endidx so that revs[startidx:endidx] has no empty trailing revs */
1396 1398 static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
1397 1399 Py_ssize_t startidx, Py_ssize_t endidx)
1398 1400 {
1399 1401 int length;
1400 1402 while (endidx > 1 && endidx > startidx) {
1401 1403 length = index_get_length(self, revs[endidx - 1]);
1402 1404 if (length < 0) {
1403 1405 return -1;
1404 1406 }
1405 1407 if (length != 0) {
1406 1408 break;
1407 1409 }
1408 1410 endidx -= 1;
1409 1411 }
1410 1412 return endidx;
1411 1413 }
1412 1414
1413 1415 struct Gap {
1414 1416 int64_t size;
1415 1417 Py_ssize_t idx;
1416 1418 };
1417 1419
1418 1420 static int gap_compare(const void *left, const void *right)
1419 1421 {
1420 1422 const struct Gap *l_left = ((const struct Gap *)left);
1421 1423 const struct Gap *l_right = ((const struct Gap *)right);
1422 1424 if (l_left->size < l_right->size) {
1423 1425 return -1;
1424 1426 } else if (l_left->size > l_right->size) {
1425 1427 return 1;
1426 1428 }
1427 1429 return 0;
1428 1430 }
1429 1431 static int Py_ssize_t_compare(const void *left, const void *right)
1430 1432 {
1431 1433 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1432 1434 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1433 1435 if (l_left < l_right) {
1434 1436 return -1;
1435 1437 } else if (l_left > l_right) {
1436 1438 return 1;
1437 1439 }
1438 1440 return 0;
1439 1441 }
1440 1442
1441 1443 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1442 1444 {
1443 1445 /* method arguments */
1444 1446 PyObject *list_revs = NULL; /* revisions in the chain */
1445 1447 double targetdensity = 0; /* min density to achieve */
1446 1448 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1447 1449
1448 1450 /* other core variables */
1449 1451 Py_ssize_t idxlen = index_length(self);
1450 1452 Py_ssize_t i; /* used for various iteration */
1451 1453 PyObject *result = NULL; /* the final return of the function */
1452 1454
1453 1455 /* generic information about the delta chain being slice */
1454 1456 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1455 1457 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1456 1458 int64_t chainpayload = 0; /* sum of all delta in the chain */
1457 1459 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1458 1460
1459 1461 /* variable used for slicing the delta chain */
1460 1462 int64_t readdata = 0; /* amount of data currently planned to be read */
1461 1463 double density = 0; /* ration of payload data compared to read ones */
1462 1464 int64_t previous_end;
1463 1465 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1464 1466 Py_ssize_t num_gaps =
1465 1467 0; /* total number of notable gap recorded so far */
1466 1468 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1467 1469 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1468 1470 PyObject *chunk = NULL; /* individual slice */
1469 1471 PyObject *allchunks = NULL; /* all slices */
1470 1472 Py_ssize_t previdx;
1471 1473
1472 1474 /* parsing argument */
1473 1475 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1474 1476 &targetdensity, &mingapsize)) {
1475 1477 goto bail;
1476 1478 }
1477 1479
1478 1480 /* If the delta chain contains a single element, we do not need slicing
1479 1481 */
1480 1482 num_revs = PyList_GET_SIZE(list_revs);
1481 1483 if (num_revs <= 1) {
1482 1484 result = PyTuple_Pack(1, list_revs);
1483 1485 goto done;
1484 1486 }
1485 1487
1486 1488 /* Turn the python list into a native integer array (for efficiency) */
1487 1489 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1488 1490 if (revs == NULL) {
1489 1491 PyErr_NoMemory();
1490 1492 goto bail;
1491 1493 }
1492 1494 for (i = 0; i < num_revs; i++) {
1493 1495 Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i));
1494 1496 if (revnum == -1 && PyErr_Occurred()) {
1495 1497 goto bail;
1496 1498 }
1497 1499 if (revnum < nullrev || revnum >= idxlen) {
1498 1500 PyErr_Format(PyExc_IndexError,
1499 1501 "index out of range: %zd", revnum);
1500 1502 goto bail;
1501 1503 }
1502 1504 revs[i] = revnum;
1503 1505 }
1504 1506
1505 1507 /* Compute and check various property of the unsliced delta chain */
1506 1508 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1507 1509 if (deltachainspan < 0) {
1508 1510 goto bail;
1509 1511 }
1510 1512
1511 1513 if (deltachainspan <= mingapsize) {
1512 1514 result = PyTuple_Pack(1, list_revs);
1513 1515 goto done;
1514 1516 }
1515 1517 chainpayload = 0;
1516 1518 for (i = 0; i < num_revs; i++) {
1517 1519 int tmp = index_get_length(self, revs[i]);
1518 1520 if (tmp < 0) {
1519 1521 goto bail;
1520 1522 }
1521 1523 chainpayload += tmp;
1522 1524 }
1523 1525
1524 1526 readdata = deltachainspan;
1525 1527 density = 1.0;
1526 1528
1527 1529 if (0 < deltachainspan) {
1528 1530 density = (double)chainpayload / (double)deltachainspan;
1529 1531 }
1530 1532
1531 1533 if (density >= targetdensity) {
1532 1534 result = PyTuple_Pack(1, list_revs);
1533 1535 goto done;
1534 1536 }
1535 1537
1536 1538 /* if chain is too sparse, look for relevant gaps */
1537 1539 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1538 1540 if (gaps == NULL) {
1539 1541 PyErr_NoMemory();
1540 1542 goto bail;
1541 1543 }
1542 1544
1543 1545 previous_end = -1;
1544 1546 for (i = 0; i < num_revs; i++) {
1545 1547 int64_t revstart;
1546 1548 int revsize;
1547 1549 revstart = index_get_start(self, revs[i]);
1548 1550 if (revstart < 0) {
1549 1551 goto bail;
1550 1552 };
1551 1553 revsize = index_get_length(self, revs[i]);
1552 1554 if (revsize < 0) {
1553 1555 goto bail;
1554 1556 };
1555 1557 if (revsize == 0) {
1556 1558 continue;
1557 1559 }
1558 1560 if (previous_end >= 0) {
1559 1561 int64_t gapsize = revstart - previous_end;
1560 1562 if (gapsize > mingapsize) {
1561 1563 gaps[num_gaps].size = gapsize;
1562 1564 gaps[num_gaps].idx = i;
1563 1565 num_gaps += 1;
1564 1566 }
1565 1567 }
1566 1568 previous_end = revstart + revsize;
1567 1569 }
1568 1570 if (num_gaps == 0) {
1569 1571 result = PyTuple_Pack(1, list_revs);
1570 1572 goto done;
1571 1573 }
1572 1574 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
1573 1575
1574 1576 /* Slice the largest gap first, they improve the density the most */
1575 1577 selected_indices =
1576 1578 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
1577 1579 if (selected_indices == NULL) {
1578 1580 PyErr_NoMemory();
1579 1581 goto bail;
1580 1582 }
1581 1583
1582 1584 for (i = num_gaps - 1; i >= 0; i--) {
1583 1585 selected_indices[num_selected] = gaps[i].idx;
1584 1586 readdata -= gaps[i].size;
1585 1587 num_selected += 1;
1586 1588 if (readdata <= 0) {
1587 1589 density = 1.0;
1588 1590 } else {
1589 1591 density = (double)chainpayload / (double)readdata;
1590 1592 }
1591 1593 if (density >= targetdensity) {
1592 1594 break;
1593 1595 }
1594 1596 }
1595 1597 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
1596 1598 &Py_ssize_t_compare);
1597 1599
1598 1600 /* create the resulting slice */
1599 1601 allchunks = PyList_New(0);
1600 1602 if (allchunks == NULL) {
1601 1603 goto bail;
1602 1604 }
1603 1605 previdx = 0;
1604 1606 selected_indices[num_selected] = num_revs;
1605 1607 for (i = 0; i <= num_selected; i++) {
1606 1608 Py_ssize_t idx = selected_indices[i];
1607 1609 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
1608 1610 if (endidx < 0) {
1609 1611 goto bail;
1610 1612 }
1611 1613 if (previdx < endidx) {
1612 1614 chunk = PyList_GetSlice(list_revs, previdx, endidx);
1613 1615 if (chunk == NULL) {
1614 1616 goto bail;
1615 1617 }
1616 1618 if (PyList_Append(allchunks, chunk) == -1) {
1617 1619 goto bail;
1618 1620 }
1619 1621 Py_DECREF(chunk);
1620 1622 chunk = NULL;
1621 1623 }
1622 1624 previdx = idx;
1623 1625 }
1624 1626 result = allchunks;
1625 1627 goto done;
1626 1628
1627 1629 bail:
1628 1630 Py_XDECREF(allchunks);
1629 1631 Py_XDECREF(chunk);
1630 1632 done:
1631 1633 free(revs);
1632 1634 free(gaps);
1633 1635 free(selected_indices);
1634 1636 return result;
1635 1637 }
1636 1638
1637 1639 static inline int nt_level(const char *node, Py_ssize_t level)
1638 1640 {
1639 1641 int v = node[level >> 1];
1640 1642 if (!(level & 1))
1641 1643 v >>= 4;
1642 1644 return v & 0xf;
1643 1645 }
1644 1646
1645 1647 /*
1646 1648 * Return values:
1647 1649 *
1648 1650 * -4: match is ambiguous (multiple candidates)
1649 1651 * -2: not found
1650 1652 * rest: valid rev
1651 1653 */
1652 1654 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
1653 1655 int hex)
1654 1656 {
1655 1657 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1656 1658 int level, maxlevel, off;
1657 1659
1658 1660 /* If the input is binary, do a fast check for the nullid first. */
1659 1661 if (!hex && nodelen == self->nodelen && node[0] == '\0' &&
1660 1662 node[1] == '\0' && memcmp(node, nullid, self->nodelen) == 0)
1661 1663 return -1;
1662 1664
1663 1665 if (hex)
1664 1666 maxlevel = nodelen;
1665 1667 else
1666 1668 maxlevel = 2 * nodelen;
1667 1669 if (maxlevel > 2 * self->nodelen)
1668 1670 maxlevel = 2 * self->nodelen;
1669 1671
1670 1672 for (level = off = 0; level < maxlevel; level++) {
1671 1673 int k = getnybble(node, level);
1672 1674 nodetreenode *n = &self->nodes[off];
1673 1675 int v = n->children[k];
1674 1676
1675 1677 if (v < 0) {
1676 1678 const char *n;
1677 1679 Py_ssize_t i;
1678 1680
1679 1681 v = -(v + 2);
1680 1682 n = index_node(self->index, v);
1681 1683 if (n == NULL)
1682 1684 return -2;
1683 1685 for (i = level; i < maxlevel; i++)
1684 1686 if (getnybble(node, i) != nt_level(n, i))
1685 1687 return -2;
1686 1688 return v;
1687 1689 }
1688 1690 if (v == 0)
1689 1691 return -2;
1690 1692 off = v;
1691 1693 }
1692 1694 /* multiple matches against an ambiguous prefix */
1693 1695 return -4;
1694 1696 }
1695 1697
1696 1698 static int nt_new(nodetree *self)
1697 1699 {
1698 1700 if (self->length == self->capacity) {
1699 1701 size_t newcapacity;
1700 1702 nodetreenode *newnodes;
1701 1703 newcapacity = self->capacity * 2;
1702 1704 if (newcapacity >= SIZE_MAX / sizeof(nodetreenode)) {
1703 1705 PyErr_SetString(PyExc_MemoryError,
1704 1706 "overflow in nt_new");
1705 1707 return -1;
1706 1708 }
1707 1709 newnodes =
1708 1710 realloc(self->nodes, newcapacity * sizeof(nodetreenode));
1709 1711 if (newnodes == NULL) {
1710 1712 PyErr_SetString(PyExc_MemoryError, "out of memory");
1711 1713 return -1;
1712 1714 }
1713 1715 self->capacity = newcapacity;
1714 1716 self->nodes = newnodes;
1715 1717 memset(&self->nodes[self->length], 0,
1716 1718 sizeof(nodetreenode) * (self->capacity - self->length));
1717 1719 }
1718 1720 return self->length++;
1719 1721 }
1720 1722
1721 1723 static int nt_insert(nodetree *self, const char *node, int rev)
1722 1724 {
1723 1725 int level = 0;
1724 1726 int off = 0;
1725 1727
1726 1728 while (level < 2 * self->nodelen) {
1727 1729 int k = nt_level(node, level);
1728 1730 nodetreenode *n;
1729 1731 int v;
1730 1732
1731 1733 n = &self->nodes[off];
1732 1734 v = n->children[k];
1733 1735
1734 1736 if (v == 0) {
1735 1737 n->children[k] = -rev - 2;
1736 1738 return 0;
1737 1739 }
1738 1740 if (v < 0) {
1739 1741 const char *oldnode =
1740 1742 index_node_existing(self->index, -(v + 2));
1741 1743 int noff;
1742 1744
1743 1745 if (oldnode == NULL)
1744 1746 return -1;
1745 1747 if (!memcmp(oldnode, node, self->nodelen)) {
1746 1748 n->children[k] = -rev - 2;
1747 1749 return 0;
1748 1750 }
1749 1751 noff = nt_new(self);
1750 1752 if (noff == -1)
1751 1753 return -1;
1752 1754 /* self->nodes may have been changed by realloc */
1753 1755 self->nodes[off].children[k] = noff;
1754 1756 off = noff;
1755 1757 n = &self->nodes[off];
1756 1758 n->children[nt_level(oldnode, ++level)] = v;
1757 1759 if (level > self->depth)
1758 1760 self->depth = level;
1759 1761 self->splits += 1;
1760 1762 } else {
1761 1763 level += 1;
1762 1764 off = v;
1763 1765 }
1764 1766 }
1765 1767
1766 1768 return -1;
1767 1769 }
1768 1770
1769 1771 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
1770 1772 {
1771 1773 Py_ssize_t rev;
1772 1774 const char *node;
1773 1775 Py_ssize_t length;
1774 1776 if (!PyArg_ParseTuple(args, "n", &rev))
1775 1777 return NULL;
1776 1778 length = index_length(self->nt.index);
1777 1779 if (rev < 0 || rev >= length) {
1778 1780 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1779 1781 return NULL;
1780 1782 }
1781 1783 node = index_node_existing(self->nt.index, rev);
1782 1784 if (nt_insert(&self->nt, node, (int)rev) == -1)
1783 1785 return NULL;
1784 1786 Py_RETURN_NONE;
1785 1787 }
1786 1788
1787 1789 static int nt_delete_node(nodetree *self, const char *node)
1788 1790 {
1789 1791 /* rev==-2 happens to get encoded as 0, which is interpreted as not set
1790 1792 */
1791 1793 return nt_insert(self, node, -2);
1792 1794 }
1793 1795
1794 1796 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
1795 1797 {
1796 1798 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
1797 1799 self->nodes = NULL;
1798 1800
1799 1801 self->index = index;
1800 1802 /* The input capacity is in terms of revisions, while the field is in
1801 1803 * terms of nodetree nodes. */
1802 1804 self->capacity = (capacity < 4 ? 4 : capacity / 2);
1803 1805 self->nodelen = index->nodelen;
1804 1806 self->depth = 0;
1805 1807 self->splits = 0;
1806 1808 if (self->capacity > SIZE_MAX / sizeof(nodetreenode)) {
1807 1809 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
1808 1810 return -1;
1809 1811 }
1810 1812 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
1811 1813 if (self->nodes == NULL) {
1812 1814 PyErr_NoMemory();
1813 1815 return -1;
1814 1816 }
1815 1817 self->length = 1;
1816 1818 return 0;
1817 1819 }
1818 1820
1819 1821 static int ntobj_init(nodetreeObject *self, PyObject *args)
1820 1822 {
1821 1823 PyObject *index;
1822 1824 unsigned capacity;
1823 1825 if (!PyArg_ParseTuple(args, "O!I", &HgRevlogIndex_Type, &index,
1824 1826 &capacity))
1825 1827 return -1;
1826 1828 Py_INCREF(index);
1827 1829 return nt_init(&self->nt, (indexObject *)index, capacity);
1828 1830 }
1829 1831
1830 1832 static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
1831 1833 {
1832 1834 return nt_find(self, node, nodelen, 1);
1833 1835 }
1834 1836
1835 1837 /*
1836 1838 * Find the length of the shortest unique prefix of node.
1837 1839 *
1838 1840 * Return values:
1839 1841 *
1840 1842 * -3: error (exception set)
1841 1843 * -2: not found (no exception set)
1842 1844 * rest: length of shortest prefix
1843 1845 */
1844 1846 static int nt_shortest(nodetree *self, const char *node)
1845 1847 {
1846 1848 int level, off;
1847 1849
1848 1850 for (level = off = 0; level < 2 * self->nodelen; level++) {
1849 1851 int k, v;
1850 1852 nodetreenode *n = &self->nodes[off];
1851 1853 k = nt_level(node, level);
1852 1854 v = n->children[k];
1853 1855 if (v < 0) {
1854 1856 const char *n;
1855 1857 v = -(v + 2);
1856 1858 n = index_node_existing(self->index, v);
1857 1859 if (n == NULL)
1858 1860 return -3;
1859 1861 if (memcmp(node, n, self->nodelen) != 0)
1860 1862 /*
1861 1863 * Found a unique prefix, but it wasn't for the
1862 1864 * requested node (i.e the requested node does
1863 1865 * not exist).
1864 1866 */
1865 1867 return -2;
1866 1868 return level + 1;
1867 1869 }
1868 1870 if (v == 0)
1869 1871 return -2;
1870 1872 off = v;
1871 1873 }
1872 1874 /*
1873 1875 * The node was still not unique after 40 hex digits, so this won't
1874 1876 * happen. Also, if we get here, then there's a programming error in
1875 1877 * this file that made us insert a node longer than 40 hex digits.
1876 1878 */
1877 1879 PyErr_SetString(PyExc_Exception, "broken node tree");
1878 1880 return -3;
1879 1881 }
1880 1882
1881 1883 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
1882 1884 {
1883 1885 PyObject *val;
1884 1886 char *node;
1885 1887 int length;
1886 1888
1887 1889 if (!PyArg_ParseTuple(args, "O", &val))
1888 1890 return NULL;
1889 1891 if (node_check(self->nt.nodelen, val, &node) == -1)
1890 1892 return NULL;
1891 1893
1892 1894 length = nt_shortest(&self->nt, node);
1893 1895 if (length == -3)
1894 1896 return NULL;
1895 1897 if (length == -2) {
1896 1898 raise_revlog_error();
1897 1899 return NULL;
1898 1900 }
1899 1901 return PyInt_FromLong(length);
1900 1902 }
1901 1903
1902 1904 static void nt_dealloc(nodetree *self)
1903 1905 {
1904 1906 free(self->nodes);
1905 1907 self->nodes = NULL;
1906 1908 }
1907 1909
1908 1910 static void ntobj_dealloc(nodetreeObject *self)
1909 1911 {
1910 1912 Py_XDECREF(self->nt.index);
1911 1913 nt_dealloc(&self->nt);
1912 1914 PyObject_Del(self);
1913 1915 }
1914 1916
1915 1917 static PyMethodDef ntobj_methods[] = {
1916 1918 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
1917 1919 "insert an index entry"},
1918 1920 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
1919 1921 "find length of shortest hex nodeid of a binary ID"},
1920 1922 {NULL} /* Sentinel */
1921 1923 };
1922 1924
1923 1925 static PyTypeObject nodetreeType = {
1924 1926 PyVarObject_HEAD_INIT(NULL, 0) /* header */
1925 1927 "parsers.nodetree", /* tp_name */
1926 1928 sizeof(nodetreeObject), /* tp_basicsize */
1927 1929 0, /* tp_itemsize */
1928 1930 (destructor)ntobj_dealloc, /* tp_dealloc */
1929 1931 0, /* tp_print */
1930 1932 0, /* tp_getattr */
1931 1933 0, /* tp_setattr */
1932 1934 0, /* tp_compare */
1933 1935 0, /* tp_repr */
1934 1936 0, /* tp_as_number */
1935 1937 0, /* tp_as_sequence */
1936 1938 0, /* tp_as_mapping */
1937 1939 0, /* tp_hash */
1938 1940 0, /* tp_call */
1939 1941 0, /* tp_str */
1940 1942 0, /* tp_getattro */
1941 1943 0, /* tp_setattro */
1942 1944 0, /* tp_as_buffer */
1943 1945 Py_TPFLAGS_DEFAULT, /* tp_flags */
1944 1946 "nodetree", /* tp_doc */
1945 1947 0, /* tp_traverse */
1946 1948 0, /* tp_clear */
1947 1949 0, /* tp_richcompare */
1948 1950 0, /* tp_weaklistoffset */
1949 1951 0, /* tp_iter */
1950 1952 0, /* tp_iternext */
1951 1953 ntobj_methods, /* tp_methods */
1952 1954 0, /* tp_members */
1953 1955 0, /* tp_getset */
1954 1956 0, /* tp_base */
1955 1957 0, /* tp_dict */
1956 1958 0, /* tp_descr_get */
1957 1959 0, /* tp_descr_set */
1958 1960 0, /* tp_dictoffset */
1959 1961 (initproc)ntobj_init, /* tp_init */
1960 1962 0, /* tp_alloc */
1961 1963 };
1962 1964
1963 1965 static int index_init_nt(indexObject *self)
1964 1966 {
1965 1967 if (!self->ntinitialized) {
1966 1968 if (nt_init(&self->nt, self, (int)self->length) == -1) {
1967 1969 nt_dealloc(&self->nt);
1968 1970 return -1;
1969 1971 }
1970 1972 if (nt_insert(&self->nt, nullid, -1) == -1) {
1971 1973 nt_dealloc(&self->nt);
1972 1974 return -1;
1973 1975 }
1974 1976 self->ntinitialized = 1;
1975 1977 self->ntrev = (int)index_length(self);
1976 1978 self->ntlookups = 1;
1977 1979 self->ntmisses = 0;
1978 1980 }
1979 1981 return 0;
1980 1982 }
1981 1983
1982 1984 /*
1983 1985 * Return values:
1984 1986 *
1985 1987 * -3: error (exception set)
1986 1988 * -2: not found (no exception set)
1987 1989 * rest: valid rev
1988 1990 */
1989 1991 static int index_find_node(indexObject *self, const char *node)
1990 1992 {
1991 1993 int rev;
1992 1994
1993 1995 if (index_init_nt(self) == -1)
1994 1996 return -3;
1995 1997
1996 1998 self->ntlookups++;
1997 1999 rev = nt_find(&self->nt, node, self->nodelen, 0);
1998 2000 if (rev >= -1)
1999 2001 return rev;
2000 2002
2001 2003 /*
2002 2004 * For the first handful of lookups, we scan the entire index,
2003 2005 * and cache only the matching nodes. This optimizes for cases
2004 2006 * like "hg tip", where only a few nodes are accessed.
2005 2007 *
2006 2008 * After that, we cache every node we visit, using a single
2007 2009 * scan amortized over multiple lookups. This gives the best
2008 2010 * bulk performance, e.g. for "hg log".
2009 2011 */
2010 2012 if (self->ntmisses++ < 4) {
2011 2013 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2012 2014 const char *n = index_node_existing(self, rev);
2013 2015 if (n == NULL)
2014 2016 return -3;
2015 2017 if (memcmp(node, n, self->nodelen) == 0) {
2016 2018 if (nt_insert(&self->nt, n, rev) == -1)
2017 2019 return -3;
2018 2020 break;
2019 2021 }
2020 2022 }
2021 2023 } else {
2022 2024 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2023 2025 const char *n = index_node_existing(self, rev);
2024 2026 if (n == NULL)
2025 2027 return -3;
2026 2028 if (nt_insert(&self->nt, n, rev) == -1) {
2027 2029 self->ntrev = rev + 1;
2028 2030 return -3;
2029 2031 }
2030 2032 if (memcmp(node, n, self->nodelen) == 0) {
2031 2033 break;
2032 2034 }
2033 2035 }
2034 2036 self->ntrev = rev;
2035 2037 }
2036 2038
2037 2039 if (rev >= 0)
2038 2040 return rev;
2039 2041 return -2;
2040 2042 }
2041 2043
2042 2044 static PyObject *index_getitem(indexObject *self, PyObject *value)
2043 2045 {
2044 2046 char *node;
2045 2047 int rev;
2046 2048
2047 2049 if (PyInt_Check(value)) {
2048 2050 long idx;
2049 2051 if (!pylong_to_long(value, &idx)) {
2050 2052 return NULL;
2051 2053 }
2052 2054 return index_get(self, idx);
2053 2055 }
2054 2056
2055 2057 if (node_check(self->nodelen, value, &node) == -1)
2056 2058 return NULL;
2057 2059 rev = index_find_node(self, node);
2058 2060 if (rev >= -1)
2059 2061 return PyInt_FromLong(rev);
2060 2062 if (rev == -2)
2061 2063 raise_revlog_error();
2062 2064 return NULL;
2063 2065 }
2064 2066
2065 2067 /*
2066 2068 * Fully populate the radix tree.
2067 2069 */
2068 2070 static int index_populate_nt(indexObject *self)
2069 2071 {
2070 2072 int rev;
2071 2073 if (self->ntrev > 0) {
2072 2074 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2073 2075 const char *n = index_node_existing(self, rev);
2074 2076 if (n == NULL)
2075 2077 return -1;
2076 2078 if (nt_insert(&self->nt, n, rev) == -1)
2077 2079 return -1;
2078 2080 }
2079 2081 self->ntrev = -1;
2080 2082 }
2081 2083 return 0;
2082 2084 }
2083 2085
2084 2086 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
2085 2087 {
2086 2088 const char *fullnode;
2087 2089 Py_ssize_t nodelen;
2088 2090 char *node;
2089 2091 int rev, i;
2090 2092
2091 2093 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
2092 2094 return NULL;
2093 2095
2094 2096 if (nodelen < 1) {
2095 2097 PyErr_SetString(PyExc_ValueError, "key too short");
2096 2098 return NULL;
2097 2099 }
2098 2100
2099 2101 if (nodelen > 2 * self->nodelen) {
2100 2102 PyErr_SetString(PyExc_ValueError, "key too long");
2101 2103 return NULL;
2102 2104 }
2103 2105
2104 2106 for (i = 0; i < nodelen; i++)
2105 2107 hexdigit(node, i);
2106 2108 if (PyErr_Occurred()) {
2107 2109 /* input contains non-hex characters */
2108 2110 PyErr_Clear();
2109 2111 Py_RETURN_NONE;
2110 2112 }
2111 2113
2112 2114 if (index_init_nt(self) == -1)
2113 2115 return NULL;
2114 2116 if (index_populate_nt(self) == -1)
2115 2117 return NULL;
2116 2118 rev = nt_partialmatch(&self->nt, node, nodelen);
2117 2119
2118 2120 switch (rev) {
2119 2121 case -4:
2120 2122 raise_revlog_error();
2121 2123 return NULL;
2122 2124 case -2:
2123 2125 Py_RETURN_NONE;
2124 2126 case -1:
2125 2127 return PyBytes_FromStringAndSize(nullid, self->nodelen);
2126 2128 }
2127 2129
2128 2130 fullnode = index_node_existing(self, rev);
2129 2131 if (fullnode == NULL) {
2130 2132 return NULL;
2131 2133 }
2132 2134 return PyBytes_FromStringAndSize(fullnode, self->nodelen);
2133 2135 }
2134 2136
2135 2137 static PyObject *index_shortest(indexObject *self, PyObject *args)
2136 2138 {
2137 2139 PyObject *val;
2138 2140 char *node;
2139 2141 int length;
2140 2142
2141 2143 if (!PyArg_ParseTuple(args, "O", &val))
2142 2144 return NULL;
2143 2145 if (node_check(self->nodelen, val, &node) == -1)
2144 2146 return NULL;
2145 2147
2146 2148 self->ntlookups++;
2147 2149 if (index_init_nt(self) == -1)
2148 2150 return NULL;
2149 2151 if (index_populate_nt(self) == -1)
2150 2152 return NULL;
2151 2153 length = nt_shortest(&self->nt, node);
2152 2154 if (length == -3)
2153 2155 return NULL;
2154 2156 if (length == -2) {
2155 2157 raise_revlog_error();
2156 2158 return NULL;
2157 2159 }
2158 2160 return PyInt_FromLong(length);
2159 2161 }
2160 2162
2161 2163 static PyObject *index_m_get(indexObject *self, PyObject *args)
2162 2164 {
2163 2165 PyObject *val;
2164 2166 char *node;
2165 2167 int rev;
2166 2168
2167 2169 if (!PyArg_ParseTuple(args, "O", &val))
2168 2170 return NULL;
2169 2171 if (node_check(self->nodelen, val, &node) == -1)
2170 2172 return NULL;
2171 2173 rev = index_find_node(self, node);
2172 2174 if (rev == -3)
2173 2175 return NULL;
2174 2176 if (rev == -2)
2175 2177 Py_RETURN_NONE;
2176 2178 return PyInt_FromLong(rev);
2177 2179 }
2178 2180
2179 2181 static int index_contains(indexObject *self, PyObject *value)
2180 2182 {
2181 2183 char *node;
2182 2184
2183 2185 if (PyInt_Check(value)) {
2184 2186 long rev;
2185 2187 if (!pylong_to_long(value, &rev)) {
2186 2188 return -1;
2187 2189 }
2188 2190 return rev >= -1 && rev < index_length(self);
2189 2191 }
2190 2192
2191 2193 if (node_check(self->nodelen, value, &node) == -1)
2192 2194 return -1;
2193 2195
2194 2196 switch (index_find_node(self, node)) {
2195 2197 case -3:
2196 2198 return -1;
2197 2199 case -2:
2198 2200 return 0;
2199 2201 default:
2200 2202 return 1;
2201 2203 }
2202 2204 }
2203 2205
2204 2206 static PyObject *index_m_has_node(indexObject *self, PyObject *args)
2205 2207 {
2206 2208 int ret = index_contains(self, args);
2207 2209 if (ret < 0)
2208 2210 return NULL;
2209 2211 return PyBool_FromLong((long)ret);
2210 2212 }
2211 2213
2212 2214 static PyObject *index_m_rev(indexObject *self, PyObject *val)
2213 2215 {
2214 2216 char *node;
2215 2217 int rev;
2216 2218
2217 2219 if (node_check(self->nodelen, val, &node) == -1)
2218 2220 return NULL;
2219 2221 rev = index_find_node(self, node);
2220 2222 if (rev >= -1)
2221 2223 return PyInt_FromLong(rev);
2222 2224 if (rev == -2)
2223 2225 raise_revlog_error();
2224 2226 return NULL;
2225 2227 }
2226 2228
2227 2229 typedef uint64_t bitmask;
2228 2230
2229 2231 /*
2230 2232 * Given a disjoint set of revs, return all candidates for the
2231 2233 * greatest common ancestor. In revset notation, this is the set
2232 2234 * "heads(::a and ::b and ...)"
2233 2235 */
2234 2236 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
2235 2237 int revcount)
2236 2238 {
2237 2239 const bitmask allseen = (1ull << revcount) - 1;
2238 2240 const bitmask poison = 1ull << revcount;
2239 2241 PyObject *gca = PyList_New(0);
2240 2242 int i, v, interesting;
2241 2243 int maxrev = -1;
2242 2244 bitmask sp;
2243 2245 bitmask *seen;
2244 2246
2245 2247 if (gca == NULL)
2246 2248 return PyErr_NoMemory();
2247 2249
2248 2250 for (i = 0; i < revcount; i++) {
2249 2251 if (revs[i] > maxrev)
2250 2252 maxrev = revs[i];
2251 2253 }
2252 2254
2253 2255 seen = calloc(sizeof(*seen), maxrev + 1);
2254 2256 if (seen == NULL) {
2255 2257 Py_DECREF(gca);
2256 2258 return PyErr_NoMemory();
2257 2259 }
2258 2260
2259 2261 for (i = 0; i < revcount; i++)
2260 2262 seen[revs[i]] = 1ull << i;
2261 2263
2262 2264 interesting = revcount;
2263 2265
2264 2266 for (v = maxrev; v >= 0 && interesting; v--) {
2265 2267 bitmask sv = seen[v];
2266 2268 int parents[2];
2267 2269
2268 2270 if (!sv)
2269 2271 continue;
2270 2272
2271 2273 if (sv < poison) {
2272 2274 interesting -= 1;
2273 2275 if (sv == allseen) {
2274 2276 PyObject *obj = PyInt_FromLong(v);
2275 2277 if (obj == NULL)
2276 2278 goto bail;
2277 2279 if (PyList_Append(gca, obj) == -1) {
2278 2280 Py_DECREF(obj);
2279 2281 goto bail;
2280 2282 }
2281 2283 sv |= poison;
2282 2284 for (i = 0; i < revcount; i++) {
2283 2285 if (revs[i] == v)
2284 2286 goto done;
2285 2287 }
2286 2288 }
2287 2289 }
2288 2290 if (index_get_parents(self, v, parents, maxrev) < 0)
2289 2291 goto bail;
2290 2292
2291 2293 for (i = 0; i < 2; i++) {
2292 2294 int p = parents[i];
2293 2295 if (p == -1)
2294 2296 continue;
2295 2297 sp = seen[p];
2296 2298 if (sv < poison) {
2297 2299 if (sp == 0) {
2298 2300 seen[p] = sv;
2299 2301 interesting++;
2300 2302 } else if (sp != sv)
2301 2303 seen[p] |= sv;
2302 2304 } else {
2303 2305 if (sp && sp < poison)
2304 2306 interesting--;
2305 2307 seen[p] = sv;
2306 2308 }
2307 2309 }
2308 2310 }
2309 2311
2310 2312 done:
2311 2313 free(seen);
2312 2314 return gca;
2313 2315 bail:
2314 2316 free(seen);
2315 2317 Py_XDECREF(gca);
2316 2318 return NULL;
2317 2319 }
2318 2320
2319 2321 /*
2320 2322 * Given a disjoint set of revs, return the subset with the longest
2321 2323 * path to the root.
2322 2324 */
2323 2325 static PyObject *find_deepest(indexObject *self, PyObject *revs)
2324 2326 {
2325 2327 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
2326 2328 static const Py_ssize_t capacity = 24;
2327 2329 int *depth, *interesting = NULL;
2328 2330 int i, j, v, ninteresting;
2329 2331 PyObject *dict = NULL, *keys = NULL;
2330 2332 long *seen = NULL;
2331 2333 int maxrev = -1;
2332 2334 long final;
2333 2335
2334 2336 if (revcount > capacity) {
2335 2337 PyErr_Format(PyExc_OverflowError,
2336 2338 "bitset size (%ld) > capacity (%ld)",
2337 2339 (long)revcount, (long)capacity);
2338 2340 return NULL;
2339 2341 }
2340 2342
2341 2343 for (i = 0; i < revcount; i++) {
2342 2344 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2343 2345 if (n > maxrev)
2344 2346 maxrev = n;
2345 2347 }
2346 2348
2347 2349 depth = calloc(sizeof(*depth), maxrev + 1);
2348 2350 if (depth == NULL)
2349 2351 return PyErr_NoMemory();
2350 2352
2351 2353 seen = calloc(sizeof(*seen), maxrev + 1);
2352 2354 if (seen == NULL) {
2353 2355 PyErr_NoMemory();
2354 2356 goto bail;
2355 2357 }
2356 2358
2357 2359 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
2358 2360 if (interesting == NULL) {
2359 2361 PyErr_NoMemory();
2360 2362 goto bail;
2361 2363 }
2362 2364
2363 2365 if (PyList_Sort(revs) == -1)
2364 2366 goto bail;
2365 2367
2366 2368 for (i = 0; i < revcount; i++) {
2367 2369 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2368 2370 long b = 1l << i;
2369 2371 depth[n] = 1;
2370 2372 seen[n] = b;
2371 2373 interesting[b] = 1;
2372 2374 }
2373 2375
2374 2376 /* invariant: ninteresting is the number of non-zero entries in
2375 2377 * interesting. */
2376 2378 ninteresting = (int)revcount;
2377 2379
2378 2380 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2379 2381 int dv = depth[v];
2380 2382 int parents[2];
2381 2383 long sv;
2382 2384
2383 2385 if (dv == 0)
2384 2386 continue;
2385 2387
2386 2388 sv = seen[v];
2387 2389 if (index_get_parents(self, v, parents, maxrev) < 0)
2388 2390 goto bail;
2389 2391
2390 2392 for (i = 0; i < 2; i++) {
2391 2393 int p = parents[i];
2392 2394 long sp;
2393 2395 int dp;
2394 2396
2395 2397 if (p == -1)
2396 2398 continue;
2397 2399
2398 2400 dp = depth[p];
2399 2401 sp = seen[p];
2400 2402 if (dp <= dv) {
2401 2403 depth[p] = dv + 1;
2402 2404 if (sp != sv) {
2403 2405 interesting[sv] += 1;
2404 2406 seen[p] = sv;
2405 2407 if (sp) {
2406 2408 interesting[sp] -= 1;
2407 2409 if (interesting[sp] == 0)
2408 2410 ninteresting -= 1;
2409 2411 }
2410 2412 }
2411 2413 } else if (dv == dp - 1) {
2412 2414 long nsp = sp | sv;
2413 2415 if (nsp == sp)
2414 2416 continue;
2415 2417 seen[p] = nsp;
2416 2418 interesting[sp] -= 1;
2417 2419 if (interesting[sp] == 0)
2418 2420 ninteresting -= 1;
2419 2421 if (interesting[nsp] == 0)
2420 2422 ninteresting += 1;
2421 2423 interesting[nsp] += 1;
2422 2424 }
2423 2425 }
2424 2426 interesting[sv] -= 1;
2425 2427 if (interesting[sv] == 0)
2426 2428 ninteresting -= 1;
2427 2429 }
2428 2430
2429 2431 final = 0;
2430 2432 j = ninteresting;
2431 2433 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2432 2434 if (interesting[i] == 0)
2433 2435 continue;
2434 2436 final |= i;
2435 2437 j -= 1;
2436 2438 }
2437 2439 if (final == 0) {
2438 2440 keys = PyList_New(0);
2439 2441 goto bail;
2440 2442 }
2441 2443
2442 2444 dict = PyDict_New();
2443 2445 if (dict == NULL)
2444 2446 goto bail;
2445 2447
2446 2448 for (i = 0; i < revcount; i++) {
2447 2449 PyObject *key;
2448 2450
2449 2451 if ((final & (1 << i)) == 0)
2450 2452 continue;
2451 2453
2452 2454 key = PyList_GET_ITEM(revs, i);
2453 2455 Py_INCREF(key);
2454 2456 Py_INCREF(Py_None);
2455 2457 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2456 2458 Py_DECREF(key);
2457 2459 Py_DECREF(Py_None);
2458 2460 goto bail;
2459 2461 }
2460 2462 }
2461 2463
2462 2464 keys = PyDict_Keys(dict);
2463 2465
2464 2466 bail:
2465 2467 free(depth);
2466 2468 free(seen);
2467 2469 free(interesting);
2468 2470 Py_XDECREF(dict);
2469 2471
2470 2472 return keys;
2471 2473 }
2472 2474
2473 2475 /*
2474 2476 * Given a (possibly overlapping) set of revs, return all the
2475 2477 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2476 2478 */
2477 2479 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2478 2480 {
2479 2481 PyObject *ret = NULL;
2480 2482 Py_ssize_t argcount, i, len;
2481 2483 bitmask repeat = 0;
2482 2484 int revcount = 0;
2483 2485 int *revs;
2484 2486
2485 2487 argcount = PySequence_Length(args);
2486 2488 revs = PyMem_Malloc(argcount * sizeof(*revs));
2487 2489 if (argcount > 0 && revs == NULL)
2488 2490 return PyErr_NoMemory();
2489 2491 len = index_length(self);
2490 2492
2491 2493 for (i = 0; i < argcount; i++) {
2492 2494 static const int capacity = 24;
2493 2495 PyObject *obj = PySequence_GetItem(args, i);
2494 2496 bitmask x;
2495 2497 long val;
2496 2498
2497 2499 if (!PyInt_Check(obj)) {
2498 2500 PyErr_SetString(PyExc_TypeError,
2499 2501 "arguments must all be ints");
2500 2502 Py_DECREF(obj);
2501 2503 goto bail;
2502 2504 }
2503 2505 val = PyInt_AsLong(obj);
2504 2506 Py_DECREF(obj);
2505 2507 if (val == -1) {
2506 2508 ret = PyList_New(0);
2507 2509 goto done;
2508 2510 }
2509 2511 if (val < 0 || val >= len) {
2510 2512 PyErr_SetString(PyExc_IndexError, "index out of range");
2511 2513 goto bail;
2512 2514 }
2513 2515 /* this cheesy bloom filter lets us avoid some more
2514 2516 * expensive duplicate checks in the common set-is-disjoint
2515 2517 * case */
2516 2518 x = 1ull << (val & 0x3f);
2517 2519 if (repeat & x) {
2518 2520 int k;
2519 2521 for (k = 0; k < revcount; k++) {
2520 2522 if (val == revs[k])
2521 2523 goto duplicate;
2522 2524 }
2523 2525 } else
2524 2526 repeat |= x;
2525 2527 if (revcount >= capacity) {
2526 2528 PyErr_Format(PyExc_OverflowError,
2527 2529 "bitset size (%d) > capacity (%d)",
2528 2530 revcount, capacity);
2529 2531 goto bail;
2530 2532 }
2531 2533 revs[revcount++] = (int)val;
2532 2534 duplicate:;
2533 2535 }
2534 2536
2535 2537 if (revcount == 0) {
2536 2538 ret = PyList_New(0);
2537 2539 goto done;
2538 2540 }
2539 2541 if (revcount == 1) {
2540 2542 PyObject *obj;
2541 2543 ret = PyList_New(1);
2542 2544 if (ret == NULL)
2543 2545 goto bail;
2544 2546 obj = PyInt_FromLong(revs[0]);
2545 2547 if (obj == NULL)
2546 2548 goto bail;
2547 2549 PyList_SET_ITEM(ret, 0, obj);
2548 2550 goto done;
2549 2551 }
2550 2552
2551 2553 ret = find_gca_candidates(self, revs, revcount);
2552 2554 if (ret == NULL)
2553 2555 goto bail;
2554 2556
2555 2557 done:
2556 2558 PyMem_Free(revs);
2557 2559 return ret;
2558 2560
2559 2561 bail:
2560 2562 PyMem_Free(revs);
2561 2563 Py_XDECREF(ret);
2562 2564 return NULL;
2563 2565 }
2564 2566
2565 2567 /*
2566 2568 * Given a (possibly overlapping) set of revs, return the greatest
2567 2569 * common ancestors: those with the longest path to the root.
2568 2570 */
2569 2571 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2570 2572 {
2571 2573 PyObject *ret;
2572 2574 PyObject *gca = index_commonancestorsheads(self, args);
2573 2575 if (gca == NULL)
2574 2576 return NULL;
2575 2577
2576 2578 if (PyList_GET_SIZE(gca) <= 1) {
2577 2579 return gca;
2578 2580 }
2579 2581
2580 2582 ret = find_deepest(self, gca);
2581 2583 Py_DECREF(gca);
2582 2584 return ret;
2583 2585 }
2584 2586
2585 2587 /*
2586 2588 * Invalidate any trie entries introduced by added revs.
2587 2589 */
2588 2590 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2589 2591 {
2590 2592 Py_ssize_t i, len;
2591 2593
2592 2594 len = self->length + self->new_length;
2593 2595 i = start - self->length;
2594 2596 if (i < 0)
2595 2597 return;
2596 2598
2597 2599 for (i = start; i < len; i++)
2598 2600 nt_delete_node(&self->nt, index_deref(self, i) + 32);
2599 2601
2600 2602 self->new_length = start - self->length;
2601 2603 }
2602 2604
2603 2605 /*
2604 2606 * Delete a numeric range of revs, which must be at the end of the
2605 2607 * range.
2606 2608 */
2607 2609 static int index_slice_del(indexObject *self, PyObject *item)
2608 2610 {
2609 2611 Py_ssize_t start, stop, step, slicelength;
2610 2612 Py_ssize_t length = index_length(self) + 1;
2611 2613 int ret = 0;
2612 2614
2613 2615 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
2614 2616 #ifdef IS_PY3K
2615 2617 if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
2616 2618 &slicelength) < 0)
2617 2619 #else
2618 2620 if (PySlice_GetIndicesEx((PySliceObject *)item, length, &start, &stop,
2619 2621 &step, &slicelength) < 0)
2620 2622 #endif
2621 2623 return -1;
2622 2624
2623 2625 if (slicelength <= 0)
2624 2626 return 0;
2625 2627
2626 2628 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2627 2629 stop = start;
2628 2630
2629 2631 if (step < 0) {
2630 2632 stop = start + 1;
2631 2633 start = stop + step * (slicelength - 1) - 1;
2632 2634 step = -step;
2633 2635 }
2634 2636
2635 2637 if (step != 1) {
2636 2638 PyErr_SetString(PyExc_ValueError,
2637 2639 "revlog index delete requires step size of 1");
2638 2640 return -1;
2639 2641 }
2640 2642
2641 2643 if (stop != length - 1) {
2642 2644 PyErr_SetString(PyExc_IndexError,
2643 2645 "revlog index deletion indices are invalid");
2644 2646 return -1;
2645 2647 }
2646 2648
2647 2649 if (start < self->length) {
2648 2650 if (self->ntinitialized) {
2649 2651 Py_ssize_t i;
2650 2652
2651 2653 for (i = start; i < self->length; i++) {
2652 2654 const char *node = index_node_existing(self, i);
2653 2655 if (node == NULL)
2654 2656 return -1;
2655 2657
2656 2658 nt_delete_node(&self->nt, node);
2657 2659 }
2658 2660 if (self->new_length)
2659 2661 index_invalidate_added(self, self->length);
2660 2662 if (self->ntrev > start)
2661 2663 self->ntrev = (int)start;
2662 2664 } else if (self->new_length) {
2663 2665 self->new_length = 0;
2664 2666 }
2665 2667
2666 2668 self->length = start;
2667 2669 goto done;
2668 2670 }
2669 2671
2670 2672 if (self->ntinitialized) {
2671 2673 index_invalidate_added(self, start);
2672 2674 if (self->ntrev > start)
2673 2675 self->ntrev = (int)start;
2674 2676 } else {
2675 2677 self->new_length = start - self->length;
2676 2678 }
2677 2679 done:
2678 2680 Py_CLEAR(self->headrevs);
2679 2681 return ret;
2680 2682 }
2681 2683
2682 2684 /*
2683 2685 * Supported ops:
2684 2686 *
2685 2687 * slice deletion
2686 2688 * string assignment (extend node->rev mapping)
2687 2689 * string deletion (shrink node->rev mapping)
2688 2690 */
2689 2691 static int index_assign_subscript(indexObject *self, PyObject *item,
2690 2692 PyObject *value)
2691 2693 {
2692 2694 char *node;
2693 2695 long rev;
2694 2696
2695 2697 if (PySlice_Check(item) && value == NULL)
2696 2698 return index_slice_del(self, item);
2697 2699
2698 2700 if (node_check(self->nodelen, item, &node) == -1)
2699 2701 return -1;
2700 2702
2701 2703 if (value == NULL)
2702 2704 return self->ntinitialized ? nt_delete_node(&self->nt, node)
2703 2705 : 0;
2704 2706 rev = PyInt_AsLong(value);
2705 2707 if (rev > INT_MAX || rev < 0) {
2706 2708 if (!PyErr_Occurred())
2707 2709 PyErr_SetString(PyExc_ValueError, "rev out of range");
2708 2710 return -1;
2709 2711 }
2710 2712
2711 2713 if (index_init_nt(self) == -1)
2712 2714 return -1;
2713 2715 return nt_insert(&self->nt, node, (int)rev);
2714 2716 }
2715 2717
2716 2718 /*
2717 2719 * Find all RevlogNG entries in an index that has inline data. Update
2718 2720 * the optional "offsets" table with those entries.
2719 2721 */
2720 2722 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2721 2723 {
2722 2724 const char *data = (const char *)self->buf.buf;
2723 2725 Py_ssize_t pos = 0;
2724 2726 Py_ssize_t end = self->buf.len;
2725 2727 long incr = self->entry_size;
2726 2728 Py_ssize_t len = 0;
2727 2729
2728 2730 while (pos + self->entry_size <= end && pos >= 0) {
2729 2731 uint32_t comp_len, sidedata_comp_len = 0;
2730 2732 /* 3rd element of header is length of compressed inline data */
2731 2733 comp_len = getbe32(data + pos + 8);
2732 2734 if (self->entry_size == v2_entry_size) {
2733 2735 sidedata_comp_len = getbe32(data + pos + 72);
2734 2736 }
2735 2737 incr = self->entry_size + comp_len + sidedata_comp_len;
2736 2738 if (offsets)
2737 2739 offsets[len] = data + pos;
2738 2740 len++;
2739 2741 pos += incr;
2740 2742 }
2741 2743
2742 2744 if (pos != end) {
2743 2745 if (!PyErr_Occurred())
2744 2746 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2745 2747 return -1;
2746 2748 }
2747 2749
2748 2750 return len;
2749 2751 }
2750 2752
2751 2753 static int index_init(indexObject *self, PyObject *args, PyObject *kwargs)
2752 2754 {
2753 2755 PyObject *data_obj, *inlined_obj, *revlogv2;
2754 2756 Py_ssize_t size;
2755 2757
2756 2758 static char *kwlist[] = {"data", "inlined", "revlogv2", NULL};
2757 2759
2758 2760 /* Initialize before argument-checking to avoid index_dealloc() crash.
2759 2761 */
2760 2762 self->added = NULL;
2761 2763 self->new_length = 0;
2762 2764 self->added_length = 0;
2763 2765 self->data = NULL;
2764 2766 memset(&self->buf, 0, sizeof(self->buf));
2765 2767 self->headrevs = NULL;
2766 2768 self->filteredrevs = Py_None;
2767 2769 Py_INCREF(Py_None);
2768 2770 self->ntinitialized = 0;
2769 2771 self->offsets = NULL;
2770 2772 self->nodelen = 20;
2771 2773 self->nullentry = NULL;
2774 self->rust_ext_compat = 1;
2772 2775
2773 2776 revlogv2 = NULL;
2774 2777 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|O", kwlist,
2775 2778 &data_obj, &inlined_obj, &revlogv2))
2776 2779 return -1;
2777 2780 if (!PyObject_CheckBuffer(data_obj)) {
2778 2781 PyErr_SetString(PyExc_TypeError,
2779 2782 "data does not support buffer interface");
2780 2783 return -1;
2781 2784 }
2782 2785 if (self->nodelen < 20 || self->nodelen > (Py_ssize_t)sizeof(nullid)) {
2783 2786 PyErr_SetString(PyExc_RuntimeError, "unsupported node size");
2784 2787 return -1;
2785 2788 }
2786 2789
2787 2790 if (revlogv2 && PyObject_IsTrue(revlogv2)) {
2788 2791 self->format_version = format_v2;
2789 2792 self->entry_size = v2_entry_size;
2790 2793 } else {
2791 2794 self->format_version = format_v1;
2792 2795 self->entry_size = v1_entry_size;
2793 2796 }
2794 2797
2795 2798 self->nullentry = Py_BuildValue(
2796 2799 PY23("iiiiiiis#iiBB", "iiiiiiiy#iiBB"), 0, 0, 0, -1, -1, -1, -1,
2797 2800 nullid, self->nodelen, 0, 0, comp_mode_inline, comp_mode_inline);
2798 2801
2799 2802 if (!self->nullentry)
2800 2803 return -1;
2801 2804 PyObject_GC_UnTrack(self->nullentry);
2802 2805
2803 2806 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
2804 2807 return -1;
2805 2808 size = self->buf.len;
2806 2809
2807 2810 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
2808 2811 self->data = data_obj;
2809 2812
2810 2813 self->ntlookups = self->ntmisses = 0;
2811 2814 self->ntrev = -1;
2812 2815 Py_INCREF(self->data);
2813 2816
2814 2817 if (self->inlined) {
2815 2818 Py_ssize_t len = inline_scan(self, NULL);
2816 2819 if (len == -1)
2817 2820 goto bail;
2818 2821 self->length = len;
2819 2822 } else {
2820 2823 if (size % self->entry_size) {
2821 2824 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2822 2825 goto bail;
2823 2826 }
2824 2827 self->length = size / self->entry_size;
2825 2828 }
2826 2829
2827 2830 return 0;
2828 2831 bail:
2829 2832 return -1;
2830 2833 }
2831 2834
2832 2835 static PyObject *index_nodemap(indexObject *self)
2833 2836 {
2834 2837 Py_INCREF(self);
2835 2838 return (PyObject *)self;
2836 2839 }
2837 2840
2838 2841 static void _index_clearcaches(indexObject *self)
2839 2842 {
2840 2843 if (self->offsets) {
2841 2844 PyMem_Free((void *)self->offsets);
2842 2845 self->offsets = NULL;
2843 2846 }
2844 2847 if (self->ntinitialized) {
2845 2848 nt_dealloc(&self->nt);
2846 2849 }
2847 2850 self->ntinitialized = 0;
2848 2851 Py_CLEAR(self->headrevs);
2849 2852 }
2850 2853
2851 2854 static PyObject *index_clearcaches(indexObject *self)
2852 2855 {
2853 2856 _index_clearcaches(self);
2854 2857 self->ntrev = -1;
2855 2858 self->ntlookups = self->ntmisses = 0;
2856 2859 Py_RETURN_NONE;
2857 2860 }
2858 2861
2859 2862 static void index_dealloc(indexObject *self)
2860 2863 {
2861 2864 _index_clearcaches(self);
2862 2865 Py_XDECREF(self->filteredrevs);
2863 2866 if (self->buf.buf) {
2864 2867 PyBuffer_Release(&self->buf);
2865 2868 memset(&self->buf, 0, sizeof(self->buf));
2866 2869 }
2867 2870 Py_XDECREF(self->data);
2868 2871 PyMem_Free(self->added);
2869 2872 Py_XDECREF(self->nullentry);
2870 2873 PyObject_Del(self);
2871 2874 }
2872 2875
2873 2876 static PySequenceMethods index_sequence_methods = {
2874 2877 (lenfunc)index_length, /* sq_length */
2875 2878 0, /* sq_concat */
2876 2879 0, /* sq_repeat */
2877 2880 (ssizeargfunc)index_get, /* sq_item */
2878 2881 0, /* sq_slice */
2879 2882 0, /* sq_ass_item */
2880 2883 0, /* sq_ass_slice */
2881 2884 (objobjproc)index_contains, /* sq_contains */
2882 2885 };
2883 2886
2884 2887 static PyMappingMethods index_mapping_methods = {
2885 2888 (lenfunc)index_length, /* mp_length */
2886 2889 (binaryfunc)index_getitem, /* mp_subscript */
2887 2890 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
2888 2891 };
2889 2892
2890 2893 static PyMethodDef index_methods[] = {
2891 2894 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
2892 2895 "return the gca set of the given revs"},
2893 2896 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
2894 2897 METH_VARARGS,
2895 2898 "return the heads of the common ancestors of the given revs"},
2896 2899 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
2897 2900 "clear the index caches"},
2898 2901 {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
2899 2902 {"get_rev", (PyCFunction)index_m_get, METH_VARARGS,
2900 2903 "return `rev` associated with a node or None"},
2901 2904 {"has_node", (PyCFunction)index_m_has_node, METH_O,
2902 2905 "return True if the node exist in the index"},
2903 2906 {"rev", (PyCFunction)index_m_rev, METH_O,
2904 2907 "return `rev` associated with a node or raise RevlogError"},
2905 2908 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
2906 2909 "compute phases"},
2907 2910 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
2908 2911 "reachableroots"},
2909 2912 {"replace_sidedata_info", (PyCFunction)index_replace_sidedata_info,
2910 2913 METH_VARARGS, "replace an existing index entry with a new value"},
2911 2914 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
2912 2915 "get head revisions"}, /* Can do filtering since 3.2 */
2913 2916 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
2914 2917 "get filtered head revisions"}, /* Can always do filtering */
2915 2918 {"issnapshot", (PyCFunction)index_issnapshot, METH_O,
2916 2919 "True if the object is a snapshot"},
2917 2920 {"findsnapshots", (PyCFunction)index_findsnapshots, METH_VARARGS,
2918 2921 "Gather snapshot data in a cache dict"},
2919 2922 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
2920 2923 "determine revisions with deltas to reconstruct fulltext"},
2921 2924 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
2922 2925 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
2923 2926 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
2924 2927 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2925 2928 "match a potentially ambiguous node ID"},
2926 2929 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
2927 2930 "find length of shortest hex nodeid of a binary ID"},
2928 2931 {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
2929 2932 {"entry_binary", (PyCFunction)index_entry_binary, METH_O,
2930 2933 "return an entry in binary form"},
2931 2934 {"pack_header", (PyCFunction)index_pack_header, METH_VARARGS,
2932 2935 "pack the revlog header information into binary"},
2933 2936 {NULL} /* Sentinel */
2934 2937 };
2935 2938
2936 2939 static PyGetSetDef index_getset[] = {
2937 2940 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2938 2941 {NULL} /* Sentinel */
2939 2942 };
2940 2943
2941 2944 static PyMemberDef index_members[] = {
2942 2945 {"entry_size", T_LONG, offsetof(indexObject, entry_size), 0,
2943 2946 "size of an index entry"},
2947 {"rust_ext_compat", T_LONG, offsetof(indexObject, rust_ext_compat), 0,
2948 "size of an index entry"},
2944 2949 {NULL} /* Sentinel */
2945 2950 };
2946 2951
2947 2952 PyTypeObject HgRevlogIndex_Type = {
2948 2953 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2949 2954 "parsers.index", /* tp_name */
2950 2955 sizeof(indexObject), /* tp_basicsize */
2951 2956 0, /* tp_itemsize */
2952 2957 (destructor)index_dealloc, /* tp_dealloc */
2953 2958 0, /* tp_print */
2954 2959 0, /* tp_getattr */
2955 2960 0, /* tp_setattr */
2956 2961 0, /* tp_compare */
2957 2962 0, /* tp_repr */
2958 2963 0, /* tp_as_number */
2959 2964 &index_sequence_methods, /* tp_as_sequence */
2960 2965 &index_mapping_methods, /* tp_as_mapping */
2961 2966 0, /* tp_hash */
2962 2967 0, /* tp_call */
2963 2968 0, /* tp_str */
2964 2969 0, /* tp_getattro */
2965 2970 0, /* tp_setattro */
2966 2971 0, /* tp_as_buffer */
2967 2972 Py_TPFLAGS_DEFAULT, /* tp_flags */
2968 2973 "revlog index", /* tp_doc */
2969 2974 0, /* tp_traverse */
2970 2975 0, /* tp_clear */
2971 2976 0, /* tp_richcompare */
2972 2977 0, /* tp_weaklistoffset */
2973 2978 0, /* tp_iter */
2974 2979 0, /* tp_iternext */
2975 2980 index_methods, /* tp_methods */
2976 2981 index_members, /* tp_members */
2977 2982 index_getset, /* tp_getset */
2978 2983 0, /* tp_base */
2979 2984 0, /* tp_dict */
2980 2985 0, /* tp_descr_get */
2981 2986 0, /* tp_descr_set */
2982 2987 0, /* tp_dictoffset */
2983 2988 (initproc)index_init, /* tp_init */
2984 2989 0, /* tp_alloc */
2985 2990 };
2986 2991
2987 2992 /*
2988 2993 * returns a tuple of the form (index, cache) with elements as
2989 2994 * follows:
2990 2995 *
2991 2996 * index: an index object that lazily parses Revlog (v1 or v2) records
2992 2997 * cache: if data is inlined, a tuple (0, index_file_content), else None
2993 2998 * index_file_content could be a string, or a buffer
2994 2999 *
2995 3000 * added complications are for backwards compatibility
2996 3001 */
2997 3002 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs)
2998 3003 {
2999 3004 PyObject *cache = NULL;
3000 3005 indexObject *idx;
3001 3006 int ret;
3002 3007
3003 3008 idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
3004 3009 if (idx == NULL)
3005 3010 goto bail;
3006 3011
3007 3012 ret = index_init(idx, args, kwargs);
3008 3013 if (ret == -1)
3009 3014 goto bail;
3010 3015
3011 3016 if (idx->inlined) {
3012 3017 cache = Py_BuildValue("iO", 0, idx->data);
3013 3018 if (cache == NULL)
3014 3019 goto bail;
3015 3020 } else {
3016 3021 cache = Py_None;
3017 3022 Py_INCREF(cache);
3018 3023 }
3019 3024
3020 3025 return Py_BuildValue("NN", idx, cache);
3021 3026
3022 3027 bail:
3023 3028 Py_XDECREF(idx);
3024 3029 Py_XDECREF(cache);
3025 3030 return NULL;
3026 3031 }
3027 3032
3028 3033 static Revlog_CAPI CAPI = {
3029 3034 /* increment the abi_version field upon each change in the Revlog_CAPI
3030 3035 struct or in the ABI of the listed functions */
3031 3036 2,
3032 3037 index_length,
3033 3038 index_node,
3034 3039 HgRevlogIndex_GetParents,
3035 3040 };
3036 3041
3037 3042 void revlog_module_init(PyObject *mod)
3038 3043 {
3039 3044 PyObject *caps = NULL;
3040 3045 HgRevlogIndex_Type.tp_new = PyType_GenericNew;
3041 3046 if (PyType_Ready(&HgRevlogIndex_Type) < 0)
3042 3047 return;
3043 3048 Py_INCREF(&HgRevlogIndex_Type);
3044 3049 PyModule_AddObject(mod, "index", (PyObject *)&HgRevlogIndex_Type);
3045 3050
3046 3051 nodetreeType.tp_new = PyType_GenericNew;
3047 3052 if (PyType_Ready(&nodetreeType) < 0)
3048 3053 return;
3049 3054 Py_INCREF(&nodetreeType);
3050 3055 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
3051 3056
3052 3057 caps = PyCapsule_New(&CAPI, "mercurial.cext.parsers.revlog_CAPI", NULL);
3053 3058 if (caps != NULL)
3054 3059 PyModule_AddObject(mod, "revlog_CAPI", caps);
3055 3060 }
@@ -1,157 +1,157
1 1 # policy.py - module policy logic for Mercurial.
2 2 #
3 3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import sys
12 12
13 13 from .pycompat import getattr
14 14
15 15 # Rules for how modules can be loaded. Values are:
16 16 #
17 17 # c - require C extensions
18 18 # rust+c - require Rust and C extensions
19 19 # rust+c-allow - allow Rust and C extensions with fallback to pure Python
20 20 # for each
21 21 # allow - allow pure Python implementation when C loading fails
22 22 # cffi - required cffi versions (implemented within pure module)
23 23 # cffi-allow - allow pure Python implementation if cffi version is missing
24 24 # py - only load pure Python modules
25 25 #
26 26 # By default, fall back to the pure modules so the in-place build can
27 27 # run without recompiling the C extensions. This will be overridden by
28 28 # __modulepolicy__ generated by setup.py.
29 29 policy = b'allow'
30 30 _packageprefs = {
31 31 # policy: (versioned package, pure package)
32 32 b'c': ('cext', None),
33 33 b'allow': ('cext', 'pure'),
34 34 b'cffi': ('cffi', None),
35 35 b'cffi-allow': ('cffi', 'pure'),
36 36 b'py': (None, 'pure'),
37 37 # For now, rust policies impact importrust only
38 38 b'rust+c': ('cext', None),
39 39 b'rust+c-allow': ('cext', 'pure'),
40 40 }
41 41
42 42 try:
43 43 from . import __modulepolicy__
44 44
45 45 policy = __modulepolicy__.modulepolicy
46 46 except ImportError:
47 47 pass
48 48
49 49 # PyPy doesn't load C extensions.
50 50 #
51 51 # The canonical way to do this is to test platform.python_implementation().
52 52 # But we don't import platform and don't bloat for it here.
53 53 if '__pypy__' in sys.builtin_module_names:
54 54 policy = b'cffi'
55 55
56 56 # Environment variable can always force settings.
57 57 if sys.version_info[0] >= 3:
58 58 if 'HGMODULEPOLICY' in os.environ:
59 59 policy = os.environ['HGMODULEPOLICY'].encode('utf-8')
60 60 else:
61 61 policy = os.environ.get('HGMODULEPOLICY', policy)
62 62
63 63
64 64 def _importfrom(pkgname, modname):
65 65 # from .<pkgname> import <modname> (where . is looked through this module)
66 66 fakelocals = {}
67 67 pkg = __import__(pkgname, globals(), fakelocals, [modname], level=1)
68 68 try:
69 69 fakelocals[modname] = mod = getattr(pkg, modname)
70 70 except AttributeError:
71 71 raise ImportError('cannot import name %s' % modname)
72 72 # force import; fakelocals[modname] may be replaced with the real module
73 73 getattr(mod, '__doc__', None)
74 74 return fakelocals[modname]
75 75
76 76
77 77 # keep in sync with "version" in C modules
78 78 _cextversions = {
79 79 ('cext', 'base85'): 1,
80 80 ('cext', 'bdiff'): 3,
81 81 ('cext', 'mpatch'): 1,
82 82 ('cext', 'osutil'): 4,
83 ('cext', 'parsers'): 19,
83 ('cext', 'parsers'): 20,
84 84 }
85 85
86 86 # map import request to other package or module
87 87 _modredirects = {
88 88 ('cext', 'charencode'): ('cext', 'parsers'),
89 89 ('cffi', 'base85'): ('pure', 'base85'),
90 90 ('cffi', 'charencode'): ('pure', 'charencode'),
91 91 ('cffi', 'parsers'): ('pure', 'parsers'),
92 92 }
93 93
94 94
95 95 def _checkmod(pkgname, modname, mod):
96 96 expected = _cextversions.get((pkgname, modname))
97 97 actual = getattr(mod, 'version', None)
98 98 if actual != expected:
99 99 raise ImportError(
100 100 'cannot import module %s.%s '
101 101 '(expected version: %d, actual: %r)'
102 102 % (pkgname, modname, expected, actual)
103 103 )
104 104
105 105
106 106 def importmod(modname):
107 107 """Import module according to policy and check API version"""
108 108 try:
109 109 verpkg, purepkg = _packageprefs[policy]
110 110 except KeyError:
111 111 raise ImportError('invalid HGMODULEPOLICY %r' % policy)
112 112 assert verpkg or purepkg
113 113 if verpkg:
114 114 pn, mn = _modredirects.get((verpkg, modname), (verpkg, modname))
115 115 try:
116 116 mod = _importfrom(pn, mn)
117 117 if pn == verpkg:
118 118 _checkmod(pn, mn, mod)
119 119 return mod
120 120 except ImportError:
121 121 if not purepkg:
122 122 raise
123 123 pn, mn = _modredirects.get((purepkg, modname), (purepkg, modname))
124 124 return _importfrom(pn, mn)
125 125
126 126
127 127 def _isrustpermissive():
128 128 """Assuming the policy is a Rust one, tell if it's permissive."""
129 129 return policy.endswith(b'-allow')
130 130
131 131
132 132 def importrust(modname, member=None, default=None):
133 133 """Import Rust module according to policy and availability.
134 134
135 135 If policy isn't a Rust one, this returns `default`.
136 136
137 137 If either the module or its member is not available, this returns `default`
138 138 if policy is permissive and raises `ImportError` if not.
139 139 """
140 140 if not policy.startswith(b'rust'):
141 141 return default
142 142
143 143 try:
144 144 mod = _importfrom('rustext', modname)
145 145 except ImportError:
146 146 if _isrustpermissive():
147 147 return default
148 148 raise
149 149 if member is None:
150 150 return mod
151 151
152 152 try:
153 153 return getattr(mod, member)
154 154 except AttributeError:
155 155 if _isrustpermissive():
156 156 return default
157 157 raise ImportError("Cannot import name %s" % member)
@@ -1,406 +1,408
1 1 # parsers.py - Python implementation of parsers.c
2 2 #
3 3 # Copyright 2009 Olivia Mackall <olivia@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import struct
11 11 import zlib
12 12
13 13 from ..node import (
14 14 nullrev,
15 15 sha1nodeconstants,
16 16 )
17 17 from .. import (
18 18 error,
19 19 pycompat,
20 20 util,
21 21 )
22 22
23 23 from ..revlogutils import nodemap as nodemaputil
24 24 from ..revlogutils import constants as revlog_constants
25 25
26 26 stringio = pycompat.bytesio
27 27
28 28
29 29 _pack = struct.pack
30 30 _unpack = struct.unpack
31 31 _compress = zlib.compress
32 32 _decompress = zlib.decompress
33 33
34 34 # Some code below makes tuples directly because it's more convenient. However,
35 35 # code outside this module should always use dirstatetuple.
36 36 def dirstatetuple(*x):
37 37 # x is a tuple
38 38 return x
39 39
40 40
41 41 def gettype(q):
42 42 return int(q & 0xFFFF)
43 43
44 44
45 45 def offset_type(offset, type):
46 46 return int(int(offset) << 16 | type)
47 47
48 48
49 49 class BaseIndexObject(object):
50 # Can I be passed to an algorithme implemented in Rust ?
51 rust_ext_compat = 0
50 52 # Format of an index entry according to Python's `struct` language
51 53 index_format = revlog_constants.INDEX_ENTRY_V1
52 54 # Size of a C unsigned long long int, platform independent
53 55 big_int_size = struct.calcsize(b'>Q')
54 56 # Size of a C long int, platform independent
55 57 int_size = struct.calcsize(b'>i')
56 58 # An empty index entry, used as a default value to be overridden, or nullrev
57 59 null_item = (
58 60 0,
59 61 0,
60 62 0,
61 63 -1,
62 64 -1,
63 65 -1,
64 66 -1,
65 67 sha1nodeconstants.nullid,
66 68 0,
67 69 0,
68 70 revlog_constants.COMP_MODE_INLINE,
69 71 revlog_constants.COMP_MODE_INLINE,
70 72 )
71 73
72 74 @util.propertycache
73 75 def entry_size(self):
74 76 return self.index_format.size
75 77
76 78 @property
77 79 def nodemap(self):
78 80 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
79 81 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
80 82 return self._nodemap
81 83
82 84 @util.propertycache
83 85 def _nodemap(self):
84 86 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
85 87 for r in range(0, len(self)):
86 88 n = self[r][7]
87 89 nodemap[n] = r
88 90 return nodemap
89 91
90 92 def has_node(self, node):
91 93 """return True if the node exist in the index"""
92 94 return node in self._nodemap
93 95
94 96 def rev(self, node):
95 97 """return a revision for a node
96 98
97 99 If the node is unknown, raise a RevlogError"""
98 100 return self._nodemap[node]
99 101
100 102 def get_rev(self, node):
101 103 """return a revision for a node
102 104
103 105 If the node is unknown, return None"""
104 106 return self._nodemap.get(node)
105 107
106 108 def _stripnodes(self, start):
107 109 if '_nodemap' in vars(self):
108 110 for r in range(start, len(self)):
109 111 n = self[r][7]
110 112 del self._nodemap[n]
111 113
112 114 def clearcaches(self):
113 115 self.__dict__.pop('_nodemap', None)
114 116
115 117 def __len__(self):
116 118 return self._lgt + len(self._extra)
117 119
118 120 def append(self, tup):
119 121 if '_nodemap' in vars(self):
120 122 self._nodemap[tup[7]] = len(self)
121 123 data = self._pack_entry(len(self), tup)
122 124 self._extra.append(data)
123 125
124 126 def _pack_entry(self, rev, entry):
125 127 assert entry[8] == 0
126 128 assert entry[9] == 0
127 129 return self.index_format.pack(*entry[:8])
128 130
129 131 def _check_index(self, i):
130 132 if not isinstance(i, int):
131 133 raise TypeError(b"expecting int indexes")
132 134 if i < 0 or i >= len(self):
133 135 raise IndexError
134 136
135 137 def __getitem__(self, i):
136 138 if i == -1:
137 139 return self.null_item
138 140 self._check_index(i)
139 141 if i >= self._lgt:
140 142 data = self._extra[i - self._lgt]
141 143 else:
142 144 index = self._calculate_index(i)
143 145 data = self._data[index : index + self.entry_size]
144 146 r = self._unpack_entry(i, data)
145 147 if self._lgt and i == 0:
146 148 r = (offset_type(0, gettype(r[0])),) + r[1:]
147 149 return r
148 150
149 151 def _unpack_entry(self, rev, data):
150 152 r = self.index_format.unpack(data)
151 153 r = r + (
152 154 0,
153 155 0,
154 156 revlog_constants.COMP_MODE_INLINE,
155 157 revlog_constants.COMP_MODE_INLINE,
156 158 )
157 159 return r
158 160
159 161 def pack_header(self, header):
160 162 """pack header information as binary"""
161 163 v_fmt = revlog_constants.INDEX_HEADER
162 164 return v_fmt.pack(header)
163 165
164 166 def entry_binary(self, rev):
165 167 """return the raw binary string representing a revision"""
166 168 entry = self[rev]
167 169 p = revlog_constants.INDEX_ENTRY_V1.pack(*entry[:8])
168 170 if rev == 0:
169 171 p = p[revlog_constants.INDEX_HEADER.size :]
170 172 return p
171 173
172 174
173 175 class IndexObject(BaseIndexObject):
174 176 def __init__(self, data):
175 177 assert len(data) % self.entry_size == 0, (
176 178 len(data),
177 179 self.entry_size,
178 180 len(data) % self.entry_size,
179 181 )
180 182 self._data = data
181 183 self._lgt = len(data) // self.entry_size
182 184 self._extra = []
183 185
184 186 def _calculate_index(self, i):
185 187 return i * self.entry_size
186 188
187 189 def __delitem__(self, i):
188 190 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
189 191 raise ValueError(b"deleting slices only supports a:-1 with step 1")
190 192 i = i.start
191 193 self._check_index(i)
192 194 self._stripnodes(i)
193 195 if i < self._lgt:
194 196 self._data = self._data[: i * self.entry_size]
195 197 self._lgt = i
196 198 self._extra = []
197 199 else:
198 200 self._extra = self._extra[: i - self._lgt]
199 201
200 202
201 203 class PersistentNodeMapIndexObject(IndexObject):
202 204 """a Debug oriented class to test persistent nodemap
203 205
204 206 We need a simple python object to test API and higher level behavior. See
205 207 the Rust implementation for more serious usage. This should be used only
206 208 through the dedicated `devel.persistent-nodemap` config.
207 209 """
208 210
209 211 def nodemap_data_all(self):
210 212 """Return bytes containing a full serialization of a nodemap
211 213
212 214 The nodemap should be valid for the full set of revisions in the
213 215 index."""
214 216 return nodemaputil.persistent_data(self)
215 217
216 218 def nodemap_data_incremental(self):
217 219 """Return bytes containing a incremental update to persistent nodemap
218 220
219 221 This containst the data for an append-only update of the data provided
220 222 in the last call to `update_nodemap_data`.
221 223 """
222 224 if self._nm_root is None:
223 225 return None
224 226 docket = self._nm_docket
225 227 changed, data = nodemaputil.update_persistent_data(
226 228 self, self._nm_root, self._nm_max_idx, self._nm_docket.tip_rev
227 229 )
228 230
229 231 self._nm_root = self._nm_max_idx = self._nm_docket = None
230 232 return docket, changed, data
231 233
232 234 def update_nodemap_data(self, docket, nm_data):
233 235 """provide full block of persisted binary data for a nodemap
234 236
235 237 The data are expected to come from disk. See `nodemap_data_all` for a
236 238 produceur of such data."""
237 239 if nm_data is not None:
238 240 self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
239 241 if self._nm_root:
240 242 self._nm_docket = docket
241 243 else:
242 244 self._nm_root = self._nm_max_idx = self._nm_docket = None
243 245
244 246
245 247 class InlinedIndexObject(BaseIndexObject):
246 248 def __init__(self, data, inline=0):
247 249 self._data = data
248 250 self._lgt = self._inline_scan(None)
249 251 self._inline_scan(self._lgt)
250 252 self._extra = []
251 253
252 254 def _inline_scan(self, lgt):
253 255 off = 0
254 256 if lgt is not None:
255 257 self._offsets = [0] * lgt
256 258 count = 0
257 259 while off <= len(self._data) - self.entry_size:
258 260 start = off + self.big_int_size
259 261 (s,) = struct.unpack(
260 262 b'>i',
261 263 self._data[start : start + self.int_size],
262 264 )
263 265 if lgt is not None:
264 266 self._offsets[count] = off
265 267 count += 1
266 268 off += self.entry_size + s
267 269 if off != len(self._data):
268 270 raise ValueError(b"corrupted data")
269 271 return count
270 272
271 273 def __delitem__(self, i):
272 274 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
273 275 raise ValueError(b"deleting slices only supports a:-1 with step 1")
274 276 i = i.start
275 277 self._check_index(i)
276 278 self._stripnodes(i)
277 279 if i < self._lgt:
278 280 self._offsets = self._offsets[:i]
279 281 self._lgt = i
280 282 self._extra = []
281 283 else:
282 284 self._extra = self._extra[: i - self._lgt]
283 285
284 286 def _calculate_index(self, i):
285 287 return self._offsets[i]
286 288
287 289
288 290 def parse_index2(data, inline, revlogv2=False):
289 291 if not inline:
290 292 cls = IndexObject2 if revlogv2 else IndexObject
291 293 return cls(data), None
292 294 cls = InlinedIndexObject
293 295 return cls(data, inline), (0, data)
294 296
295 297
296 298 class IndexObject2(IndexObject):
297 299 index_format = revlog_constants.INDEX_ENTRY_V2
298 300
299 301 def replace_sidedata_info(
300 302 self,
301 303 rev,
302 304 sidedata_offset,
303 305 sidedata_length,
304 306 offset_flags,
305 307 compression_mode,
306 308 ):
307 309 """
308 310 Replace an existing index entry's sidedata offset and length with new
309 311 ones.
310 312 This cannot be used outside of the context of sidedata rewriting,
311 313 inside the transaction that creates the revision `rev`.
312 314 """
313 315 if rev < 0:
314 316 raise KeyError
315 317 self._check_index(rev)
316 318 if rev < self._lgt:
317 319 msg = b"cannot rewrite entries outside of this transaction"
318 320 raise KeyError(msg)
319 321 else:
320 322 entry = list(self[rev])
321 323 entry[0] = offset_flags
322 324 entry[8] = sidedata_offset
323 325 entry[9] = sidedata_length
324 326 entry[11] = compression_mode
325 327 entry = tuple(entry)
326 328 new = self._pack_entry(rev, entry)
327 329 self._extra[rev - self._lgt] = new
328 330
329 331 def _unpack_entry(self, rev, data):
330 332 data = self.index_format.unpack(data)
331 333 entry = data[:10]
332 334 data_comp = data[10] & 3
333 335 sidedata_comp = (data[10] & (3 << 2)) >> 2
334 336 return entry + (data_comp, sidedata_comp)
335 337
336 338 def _pack_entry(self, rev, entry):
337 339 data = entry[:10]
338 340 data_comp = entry[10] & 3
339 341 sidedata_comp = (entry[11] & 3) << 2
340 342 data += (data_comp | sidedata_comp,)
341 343
342 344 return self.index_format.pack(*data)
343 345
344 346 def entry_binary(self, rev):
345 347 """return the raw binary string representing a revision"""
346 348 entry = self[rev]
347 349 return self._pack_entry(rev, entry)
348 350
349 351 def pack_header(self, header):
350 352 """pack header information as binary"""
351 353 msg = 'version header should go in the docket, not the index: %d'
352 354 msg %= header
353 355 raise error.ProgrammingError(msg)
354 356
355 357
356 358 def parse_index_devel_nodemap(data, inline):
357 359 """like parse_index2, but alway return a PersistentNodeMapIndexObject"""
358 360 return PersistentNodeMapIndexObject(data), None
359 361
360 362
361 363 def parse_dirstate(dmap, copymap, st):
362 364 parents = [st[:20], st[20:40]]
363 365 # dereference fields so they will be local in loop
364 366 format = b">cllll"
365 367 e_size = struct.calcsize(format)
366 368 pos1 = 40
367 369 l = len(st)
368 370
369 371 # the inner loop
370 372 while pos1 < l:
371 373 pos2 = pos1 + e_size
372 374 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
373 375 pos1 = pos2 + e[4]
374 376 f = st[pos2:pos1]
375 377 if b'\0' in f:
376 378 f, c = f.split(b'\0')
377 379 copymap[f] = c
378 380 dmap[f] = e[:4]
379 381 return parents
380 382
381 383
382 384 def pack_dirstate(dmap, copymap, pl, now):
383 385 now = int(now)
384 386 cs = stringio()
385 387 write = cs.write
386 388 write(b"".join(pl))
387 389 for f, e in pycompat.iteritems(dmap):
388 390 if e[0] == b'n' and e[3] == now:
389 391 # The file was last modified "simultaneously" with the current
390 392 # write to dirstate (i.e. within the same second for file-
391 393 # systems with a granularity of 1 sec). This commonly happens
392 394 # for at least a couple of files on 'update'.
393 395 # The user could change the file without changing its size
394 396 # within the same second. Invalidate the file's mtime in
395 397 # dirstate, forcing future 'status' calls to compare the
396 398 # contents of the file if the size is the same. This prevents
397 399 # mistakenly treating such files as clean.
398 400 e = dirstatetuple(e[0], e[1], e[2], -1)
399 401 dmap[f] = e
400 402
401 403 if f in copymap:
402 404 f = b"%s\0%s" % (f, copymap[f])
403 405 e = _pack(b">cllll", e[0], e[1], e[2], e[3], len(f))
404 406 write(e)
405 407 write(f)
406 408 return cs.getvalue()
@@ -1,162 +1,163
1 1 # revlogv0 - code related to revlog format "V0"
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7 from __future__ import absolute_import
8 8
9 9
10 10 from ..node import sha1nodeconstants
11 11 from .constants import (
12 12 COMP_MODE_INLINE,
13 13 INDEX_ENTRY_V0,
14 14 )
15 15 from ..i18n import _
16 16
17 17 from .. import (
18 18 error,
19 19 node,
20 20 pycompat,
21 21 util,
22 22 )
23 23
24 24 from . import (
25 25 flagutil,
26 26 nodemap as nodemaputil,
27 27 )
28 28
29 29
30 30 def getoffset(q):
31 31 return int(q >> 16)
32 32
33 33
34 34 def gettype(q):
35 35 return int(q & 0xFFFF)
36 36
37 37
38 38 def offset_type(offset, type):
39 39 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
40 40 raise ValueError(b'unknown revlog index flags')
41 41 return int(int(offset) << 16 | type)
42 42
43 43
44 44 class revlogoldindex(list):
45 rust_ext_compat = 0
45 46 entry_size = INDEX_ENTRY_V0.size
46 47 null_item = (
47 48 0,
48 49 0,
49 50 0,
50 51 -1,
51 52 -1,
52 53 -1,
53 54 -1,
54 55 sha1nodeconstants.nullid,
55 56 0,
56 57 0,
57 58 COMP_MODE_INLINE,
58 59 COMP_MODE_INLINE,
59 60 )
60 61
61 62 @property
62 63 def nodemap(self):
63 64 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
64 65 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
65 66 return self._nodemap
66 67
67 68 @util.propertycache
68 69 def _nodemap(self):
69 70 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: node.nullrev})
70 71 for r in range(0, len(self)):
71 72 n = self[r][7]
72 73 nodemap[n] = r
73 74 return nodemap
74 75
75 76 def has_node(self, node):
76 77 """return True if the node exist in the index"""
77 78 return node in self._nodemap
78 79
79 80 def rev(self, node):
80 81 """return a revision for a node
81 82
82 83 If the node is unknown, raise a RevlogError"""
83 84 return self._nodemap[node]
84 85
85 86 def get_rev(self, node):
86 87 """return a revision for a node
87 88
88 89 If the node is unknown, return None"""
89 90 return self._nodemap.get(node)
90 91
91 92 def append(self, tup):
92 93 self._nodemap[tup[7]] = len(self)
93 94 super(revlogoldindex, self).append(tup)
94 95
95 96 def __delitem__(self, i):
96 97 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
97 98 raise ValueError(b"deleting slices only supports a:-1 with step 1")
98 99 for r in pycompat.xrange(i.start, len(self)):
99 100 del self._nodemap[self[r][7]]
100 101 super(revlogoldindex, self).__delitem__(i)
101 102
102 103 def clearcaches(self):
103 104 self.__dict__.pop('_nodemap', None)
104 105
105 106 def __getitem__(self, i):
106 107 if i == -1:
107 108 return self.null_item
108 109 return list.__getitem__(self, i)
109 110
110 111 def pack_header(self, header):
111 112 """pack header information in binary"""
112 113 return b''
113 114
114 115 def entry_binary(self, rev):
115 116 """return the raw binary string representing a revision"""
116 117 entry = self[rev]
117 118 if gettype(entry[0]):
118 119 raise error.RevlogError(
119 120 _(b'index entry flags need revlog version 1')
120 121 )
121 122 e2 = (
122 123 getoffset(entry[0]),
123 124 entry[1],
124 125 entry[3],
125 126 entry[4],
126 127 self[entry[5]][7],
127 128 self[entry[6]][7],
128 129 entry[7],
129 130 )
130 131 return INDEX_ENTRY_V0.pack(*e2)
131 132
132 133
133 134 def parse_index_v0(data, inline):
134 135 s = INDEX_ENTRY_V0.size
135 136 index = []
136 137 nodemap = nodemaputil.NodeMap({node.nullid: node.nullrev})
137 138 n = off = 0
138 139 l = len(data)
139 140 while off + s <= l:
140 141 cur = data[off : off + s]
141 142 off += s
142 143 e = INDEX_ENTRY_V0.unpack(cur)
143 144 # transform to revlogv1 format
144 145 e2 = (
145 146 offset_type(e[0], 0),
146 147 e[1],
147 148 -1,
148 149 e[2],
149 150 e[3],
150 151 nodemap.get(e[4], node.nullrev),
151 152 nodemap.get(e[5], node.nullrev),
152 153 e[6],
153 154 0, # no side data support
154 155 0, # no side data support
155 156 COMP_MODE_INLINE,
156 157 )
157 158 index.append(e2)
158 159 nodemap[e[6]] = n
159 160 n += 1
160 161
161 162 index = revlogoldindex(index)
162 163 return index, None
@@ -1,176 +1,183
1 1 // cindex.rs
2 2 //
3 3 // Copyright 2018 Georges Racinet <gracinet@anybox.fr>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Bindings to use the Index defined by the parsers C extension
9 9 //!
10 10 //! Ideally, we should use an Index entirely implemented in Rust,
11 11 //! but this will take some time to get there.
12 12
13 13 use cpython::{
14 exc::ImportError, ObjectProtocol, PyClone, PyErr, PyObject, PyResult,
15 PyTuple, Python, PythonObject,
14 exc::ImportError, exc::TypeError, ObjectProtocol, PyClone, PyErr,
15 PyObject, PyResult, PyTuple, Python, PythonObject,
16 16 };
17 17 use hg::revlog::{Node, RevlogIndex};
18 18 use hg::{Graph, GraphError, Revision, WORKING_DIRECTORY_REVISION};
19 19 use libc::{c_int, ssize_t};
20 20
21 21 const REVLOG_CABI_VERSION: c_int = 2;
22 22
23 23 #[repr(C)]
24 24 pub struct Revlog_CAPI {
25 25 abi_version: c_int,
26 26 index_length:
27 27 unsafe extern "C" fn(index: *mut revlog_capi::RawPyObject) -> ssize_t,
28 28 index_node: unsafe extern "C" fn(
29 29 index: *mut revlog_capi::RawPyObject,
30 30 rev: ssize_t,
31 31 ) -> *const Node,
32 32 index_parents: unsafe extern "C" fn(
33 33 index: *mut revlog_capi::RawPyObject,
34 34 rev: c_int,
35 35 ps: *mut [c_int; 2],
36 36 ) -> c_int,
37 37 }
38 38
39 39 py_capsule!(
40 40 from mercurial.cext.parsers import revlog_CAPI
41 41 as revlog_capi for Revlog_CAPI);
42 42
43 43 /// A `Graph` backed up by objects and functions from revlog.c
44 44 ///
45 45 /// This implementation of the `Graph` trait, relies on (pointers to)
46 46 /// - the C index object (`index` member)
47 47 /// - the `index_get_parents()` function (`parents` member)
48 48 ///
49 49 /// # Safety
50 50 ///
51 51 /// The C index itself is mutable, and this Rust exposition is **not
52 52 /// protected by the GIL**, meaning that this construct isn't safe with respect
53 53 /// to Python threads.
54 54 ///
55 55 /// All callers of this `Index` must acquire the GIL and must not release it
56 56 /// while working.
57 57 ///
58 58 /// # TODO find a solution to make it GIL safe again.
59 59 ///
60 60 /// This is non trivial, and can wait until we have a clearer picture with
61 61 /// more Rust Mercurial constructs.
62 62 ///
63 63 /// One possibility would be to a `GILProtectedIndex` wrapper enclosing
64 64 /// a `Python<'p>` marker and have it be the one implementing the
65 65 /// `Graph` trait, but this would mean the `Graph` implementor would become
66 66 /// likely to change between subsequent method invocations of the `hg-core`
67 67 /// objects (a serious change of the `hg-core` API):
68 68 /// either exposing ways to mutate the `Graph`, or making it a non persistent
69 69 /// parameter in the relevant methods that need one.
70 70 ///
71 71 /// Another possibility would be to introduce an abstract lock handle into
72 72 /// the core API, that would be tied to `GILGuard` / `Python<'p>`
73 73 /// in the case of the `cpython` crate bindings yet could leave room for other
74 74 /// mechanisms in other contexts.
75 75 pub struct Index {
76 76 index: PyObject,
77 77 capi: &'static Revlog_CAPI,
78 78 }
79 79
80 80 impl Index {
81 81 pub fn new(py: Python, index: PyObject) -> PyResult<Self> {
82 82 let capi = unsafe { revlog_capi::retrieve(py)? };
83 83 if capi.abi_version != REVLOG_CABI_VERSION {
84 84 return Err(PyErr::new::<ImportError, _>(
85 85 py,
86 86 format!(
87 87 "ABI version mismatch: the C ABI revlog version {} \
88 88 does not match the {} expected by Rust hg-cpython",
89 89 capi.abi_version, REVLOG_CABI_VERSION
90 90 ),
91 91 ));
92 92 }
93 let compat: u64 = index.getattr(py, "rust_ext_compat")?.extract(py)?;
94 if compat == 0 {
95 return Err(PyErr::new::<TypeError, _>(
96 py,
97 "index object not compatible with Rust",
98 ));
99 }
93 100 Ok(Index { index, capi })
94 101 }
95 102
96 103 /// return a reference to the CPython Index object in this Struct
97 104 pub fn inner(&self) -> &PyObject {
98 105 &self.index
99 106 }
100 107
101 108 pub fn append(&mut self, py: Python, tup: PyTuple) -> PyResult<PyObject> {
102 109 self.index.call_method(
103 110 py,
104 111 "append",
105 112 PyTuple::new(py, &[tup.into_object()]),
106 113 None,
107 114 )
108 115 }
109 116 }
110 117
111 118 impl Clone for Index {
112 119 fn clone(&self) -> Self {
113 120 let guard = Python::acquire_gil();
114 121 Index {
115 122 index: self.index.clone_ref(guard.python()),
116 123 capi: self.capi,
117 124 }
118 125 }
119 126 }
120 127
121 128 impl PyClone for Index {
122 129 fn clone_ref(&self, py: Python) -> Self {
123 130 Index {
124 131 index: self.index.clone_ref(py),
125 132 capi: self.capi,
126 133 }
127 134 }
128 135 }
129 136
130 137 impl Graph for Index {
131 138 /// wrap a call to the C extern parents function
132 139 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
133 140 if rev == WORKING_DIRECTORY_REVISION {
134 141 return Err(GraphError::WorkingDirectoryUnsupported);
135 142 }
136 143 let mut res: [c_int; 2] = [0; 2];
137 144 let code = unsafe {
138 145 (self.capi.index_parents)(
139 146 self.index.as_ptr(),
140 147 rev as c_int,
141 148 &mut res as *mut [c_int; 2],
142 149 )
143 150 };
144 151 match code {
145 152 0 => Ok(res),
146 153 _ => Err(GraphError::ParentOutOfRange(rev)),
147 154 }
148 155 }
149 156 }
150 157
151 158 impl RevlogIndex for Index {
152 159 /// Note C return type is Py_ssize_t (hence signed), but we shall
153 160 /// force it to unsigned, because it's a length
154 161 fn len(&self) -> usize {
155 162 unsafe { (self.capi.index_length)(self.index.as_ptr()) as usize }
156 163 }
157 164
158 165 fn node(&self, rev: Revision) -> Option<&Node> {
159 166 let raw = unsafe {
160 167 (self.capi.index_node)(self.index.as_ptr(), rev as ssize_t)
161 168 };
162 169 if raw.is_null() {
163 170 None
164 171 } else {
165 172 // TODO it would be much better for the C layer to give us
166 173 // a length, since the hash length will change in the near
167 174 // future, but that's probably out of scope for the nodemap
168 175 // patch series.
169 176 //
170 177 // The root of that unsafety relies in the signature of
171 178 // `capi.index_node()` itself: returning a `Node` pointer
172 179 // whereas it's a `char *` in the C counterpart.
173 180 Some(unsafe { &*raw })
174 181 }
175 182 }
176 183 }
@@ -1,504 +1,509
1 1 // revlog.rs
2 2 //
3 3 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use crate::{
9 9 cindex,
10 10 utils::{node_from_py_bytes, node_from_py_object},
11 11 };
12 12 use cpython::{
13 13 buffer::{Element, PyBuffer},
14 14 exc::{IndexError, ValueError},
15 15 ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyInt, PyModule,
16 16 PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
17 17 };
18 18 use hg::{
19 19 nodemap::{Block, NodeMapError, NodeTree},
20 20 revlog::{nodemap::NodeMap, NodePrefix, RevlogIndex},
21 21 Revision,
22 22 };
23 23 use std::cell::RefCell;
24 24
25 25 /// Return a Struct implementing the Graph trait
26 26 pub(crate) fn pyindex_to_graph(
27 27 py: Python,
28 28 index: PyObject,
29 29 ) -> PyResult<cindex::Index> {
30 30 match index.extract::<MixedIndex>(py) {
31 31 Ok(midx) => Ok(midx.clone_cindex(py)),
32 32 Err(_) => cindex::Index::new(py, index),
33 33 }
34 34 }
35 35
36 36 py_class!(pub class MixedIndex |py| {
37 37 data cindex: RefCell<cindex::Index>;
38 38 data nt: RefCell<Option<NodeTree>>;
39 39 data docket: RefCell<Option<PyObject>>;
40 40 // Holds a reference to the mmap'ed persistent nodemap data
41 41 data mmap: RefCell<Option<PyBuffer>>;
42 42
43 43 def __new__(_cls, cindex: PyObject) -> PyResult<MixedIndex> {
44 44 Self::new(py, cindex)
45 45 }
46 46
47 47 /// Compatibility layer used for Python consumers needing access to the C index
48 48 ///
49 49 /// Only use case so far is `scmutil.shortesthexnodeidprefix`,
50 50 /// that may need to build a custom `nodetree`, based on a specified revset.
51 51 /// With a Rust implementation of the nodemap, we will be able to get rid of
52 52 /// this, by exposing our own standalone nodemap class,
53 53 /// ready to accept `MixedIndex`.
54 54 def get_cindex(&self) -> PyResult<PyObject> {
55 55 Ok(self.cindex(py).borrow().inner().clone_ref(py))
56 56 }
57 57
58 58 // Index API involving nodemap, as defined in mercurial/pure/parsers.py
59 59
60 60 /// Return Revision if found, raises a bare `error.RevlogError`
61 61 /// in case of ambiguity, same as C version does
62 62 def get_rev(&self, node: PyBytes) -> PyResult<Option<Revision>> {
63 63 let opt = self.get_nodetree(py)?.borrow();
64 64 let nt = opt.as_ref().unwrap();
65 65 let idx = &*self.cindex(py).borrow();
66 66 let node = node_from_py_bytes(py, &node)?;
67 67 nt.find_bin(idx, node.into()).map_err(|e| nodemap_error(py, e))
68 68 }
69 69
70 70 /// same as `get_rev()` but raises a bare `error.RevlogError` if node
71 71 /// is not found.
72 72 ///
73 73 /// No need to repeat `node` in the exception, `mercurial/revlog.py`
74 74 /// will catch and rewrap with it
75 75 def rev(&self, node: PyBytes) -> PyResult<Revision> {
76 76 self.get_rev(py, node)?.ok_or_else(|| revlog_error(py))
77 77 }
78 78
79 79 /// return True if the node exist in the index
80 80 def has_node(&self, node: PyBytes) -> PyResult<bool> {
81 81 self.get_rev(py, node).map(|opt| opt.is_some())
82 82 }
83 83
84 84 /// find length of shortest hex nodeid of a binary ID
85 85 def shortest(&self, node: PyBytes) -> PyResult<usize> {
86 86 let opt = self.get_nodetree(py)?.borrow();
87 87 let nt = opt.as_ref().unwrap();
88 88 let idx = &*self.cindex(py).borrow();
89 89 match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
90 90 {
91 91 Ok(Some(l)) => Ok(l),
92 92 Ok(None) => Err(revlog_error(py)),
93 93 Err(e) => Err(nodemap_error(py, e)),
94 94 }
95 95 }
96 96
97 97 def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> {
98 98 let opt = self.get_nodetree(py)?.borrow();
99 99 let nt = opt.as_ref().unwrap();
100 100 let idx = &*self.cindex(py).borrow();
101 101
102 102 let node_as_string = if cfg!(feature = "python3-sys") {
103 103 node.cast_as::<PyString>(py)?.to_string(py)?.to_string()
104 104 }
105 105 else {
106 106 let node = node.extract::<PyBytes>(py)?;
107 107 String::from_utf8_lossy(node.data(py)).to_string()
108 108 };
109 109
110 110 let prefix = NodePrefix::from_hex(&node_as_string).map_err(|_| PyErr::new::<ValueError, _>(py, "Invalid node or prefix"))?;
111 111
112 112 nt.find_bin(idx, prefix)
113 113 // TODO make an inner API returning the node directly
114 114 .map(|opt| opt.map(
115 115 |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes())))
116 116 .map_err(|e| nodemap_error(py, e))
117 117
118 118 }
119 119
120 120 /// append an index entry
121 121 def append(&self, tup: PyTuple) -> PyResult<PyObject> {
122 122 if tup.len(py) < 8 {
123 123 // this is better than the panic promised by tup.get_item()
124 124 return Err(
125 125 PyErr::new::<IndexError, _>(py, "tuple index out of range"))
126 126 }
127 127 let node_bytes = tup.get_item(py, 7).extract(py)?;
128 128 let node = node_from_py_object(py, &node_bytes)?;
129 129
130 130 let mut idx = self.cindex(py).borrow_mut();
131 131 let rev = idx.len() as Revision;
132 132
133 133 idx.append(py, tup)?;
134 134 self.get_nodetree(py)?.borrow_mut().as_mut().unwrap()
135 135 .insert(&*idx, &node, rev)
136 136 .map_err(|e| nodemap_error(py, e))?;
137 137 Ok(py.None())
138 138 }
139 139
140 140 def __delitem__(&self, key: PyObject) -> PyResult<()> {
141 141 // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
142 142 self.cindex(py).borrow().inner().del_item(py, key)?;
143 143 let mut opt = self.get_nodetree(py)?.borrow_mut();
144 144 let mut nt = opt.as_mut().unwrap();
145 145 nt.invalidate_all();
146 146 self.fill_nodemap(py, &mut nt)?;
147 147 Ok(())
148 148 }
149 149
150 150 //
151 151 // Reforwarded C index API
152 152 //
153 153
154 154 // index_methods (tp_methods). Same ordering as in revlog.c
155 155
156 156 /// return the gca set of the given revs
157 157 def ancestors(&self, *args, **kw) -> PyResult<PyObject> {
158 158 self.call_cindex(py, "ancestors", args, kw)
159 159 }
160 160
161 161 /// return the heads of the common ancestors of the given revs
162 162 def commonancestorsheads(&self, *args, **kw) -> PyResult<PyObject> {
163 163 self.call_cindex(py, "commonancestorsheads", args, kw)
164 164 }
165 165
166 166 /// Clear the index caches and inner py_class data.
167 167 /// It is Python's responsibility to call `update_nodemap_data` again.
168 168 def clearcaches(&self, *args, **kw) -> PyResult<PyObject> {
169 169 self.nt(py).borrow_mut().take();
170 170 self.docket(py).borrow_mut().take();
171 171 self.mmap(py).borrow_mut().take();
172 172 self.call_cindex(py, "clearcaches", args, kw)
173 173 }
174 174
175 175 /// return the raw binary string representing a revision
176 176 def entry_binary(&self, *args, **kw) -> PyResult<PyObject> {
177 177 self.call_cindex(py, "entry_binary", args, kw)
178 178 }
179 179
180 180 /// return a binary packed version of the header
181 181 def pack_header(&self, *args, **kw) -> PyResult<PyObject> {
182 182 self.call_cindex(py, "pack_header", args, kw)
183 183 }
184 184
185 185 /// get an index entry
186 186 def get(&self, *args, **kw) -> PyResult<PyObject> {
187 187 self.call_cindex(py, "get", args, kw)
188 188 }
189 189
190 190 /// compute phases
191 191 def computephasesmapsets(&self, *args, **kw) -> PyResult<PyObject> {
192 192 self.call_cindex(py, "computephasesmapsets", args, kw)
193 193 }
194 194
195 195 /// reachableroots
196 196 def reachableroots2(&self, *args, **kw) -> PyResult<PyObject> {
197 197 self.call_cindex(py, "reachableroots2", args, kw)
198 198 }
199 199
200 200 /// get head revisions
201 201 def headrevs(&self, *args, **kw) -> PyResult<PyObject> {
202 202 self.call_cindex(py, "headrevs", args, kw)
203 203 }
204 204
205 205 /// get filtered head revisions
206 206 def headrevsfiltered(&self, *args, **kw) -> PyResult<PyObject> {
207 207 self.call_cindex(py, "headrevsfiltered", args, kw)
208 208 }
209 209
210 210 /// True if the object is a snapshot
211 211 def issnapshot(&self, *args, **kw) -> PyResult<PyObject> {
212 212 self.call_cindex(py, "issnapshot", args, kw)
213 213 }
214 214
215 215 /// Gather snapshot data in a cache dict
216 216 def findsnapshots(&self, *args, **kw) -> PyResult<PyObject> {
217 217 self.call_cindex(py, "findsnapshots", args, kw)
218 218 }
219 219
220 220 /// determine revisions with deltas to reconstruct fulltext
221 221 def deltachain(&self, *args, **kw) -> PyResult<PyObject> {
222 222 self.call_cindex(py, "deltachain", args, kw)
223 223 }
224 224
225 225 /// slice planned chunk read to reach a density threshold
226 226 def slicechunktodensity(&self, *args, **kw) -> PyResult<PyObject> {
227 227 self.call_cindex(py, "slicechunktodensity", args, kw)
228 228 }
229 229
230 230 /// stats for the index
231 231 def stats(&self, *args, **kw) -> PyResult<PyObject> {
232 232 self.call_cindex(py, "stats", args, kw)
233 233 }
234 234
235 235 // index_sequence_methods and index_mapping_methods.
236 236 //
237 237 // Since we call back through the high level Python API,
238 238 // there's no point making a distinction between index_get
239 239 // and index_getitem.
240 240
241 241 def __len__(&self) -> PyResult<usize> {
242 242 self.cindex(py).borrow().inner().len(py)
243 243 }
244 244
245 245 def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
246 246 // this conversion seems needless, but that's actually because
247 247 // `index_getitem` does not handle conversion from PyLong,
248 248 // which expressions such as [e for e in index] internally use.
249 249 // Note that we don't seem to have a direct way to call
250 250 // PySequence_GetItem (does the job), which would possibly be better
251 251 // for performance
252 252 let key = match key.extract::<Revision>(py) {
253 253 Ok(rev) => rev.to_py_object(py).into_object(),
254 254 Err(_) => key,
255 255 };
256 256 self.cindex(py).borrow().inner().get_item(py, key)
257 257 }
258 258
259 259 def __setitem__(&self, key: PyObject, value: PyObject) -> PyResult<()> {
260 260 self.cindex(py).borrow().inner().set_item(py, key, value)
261 261 }
262 262
263 263 def __contains__(&self, item: PyObject) -> PyResult<bool> {
264 264 // ObjectProtocol does not seem to provide contains(), so
265 265 // this is an equivalent implementation of the index_contains()
266 266 // defined in revlog.c
267 267 let cindex = self.cindex(py).borrow();
268 268 match item.extract::<Revision>(py) {
269 269 Ok(rev) => {
270 270 Ok(rev >= -1 && rev < cindex.inner().len(py)? as Revision)
271 271 }
272 272 Err(_) => {
273 273 cindex.inner().call_method(
274 274 py,
275 275 "has_node",
276 276 PyTuple::new(py, &[item]),
277 277 None)?
278 278 .extract(py)
279 279 }
280 280 }
281 281 }
282 282
283 283 def nodemap_data_all(&self) -> PyResult<PyBytes> {
284 284 self.inner_nodemap_data_all(py)
285 285 }
286 286
287 287 def nodemap_data_incremental(&self) -> PyResult<PyObject> {
288 288 self.inner_nodemap_data_incremental(py)
289 289 }
290 290 def update_nodemap_data(
291 291 &self,
292 292 docket: PyObject,
293 293 nm_data: PyObject
294 294 ) -> PyResult<PyObject> {
295 295 self.inner_update_nodemap_data(py, docket, nm_data)
296 296 }
297 297
298 298 @property
299 299 def entry_size(&self) -> PyResult<PyInt> {
300 300 self.cindex(py).borrow().inner().getattr(py, "entry_size")?.extract::<PyInt>(py)
301 301 }
302 302
303 @property
304 def rust_ext_compat(&self) -> PyResult<PyInt> {
305 self.cindex(py).borrow().inner().getattr(py, "rust_ext_compat")?.extract::<PyInt>(py)
306 }
307
303 308 });
304 309
305 310 impl MixedIndex {
306 311 fn new(py: Python, cindex: PyObject) -> PyResult<MixedIndex> {
307 312 Self::create_instance(
308 313 py,
309 314 RefCell::new(cindex::Index::new(py, cindex)?),
310 315 RefCell::new(None),
311 316 RefCell::new(None),
312 317 RefCell::new(None),
313 318 )
314 319 }
315 320
316 321 /// This is scaffolding at this point, but it could also become
317 322 /// a way to start a persistent nodemap or perform a
318 323 /// vacuum / repack operation
319 324 fn fill_nodemap(
320 325 &self,
321 326 py: Python,
322 327 nt: &mut NodeTree,
323 328 ) -> PyResult<PyObject> {
324 329 let index = self.cindex(py).borrow();
325 330 for r in 0..index.len() {
326 331 let rev = r as Revision;
327 332 // in this case node() won't ever return None
328 333 nt.insert(&*index, index.node(rev).unwrap(), rev)
329 334 .map_err(|e| nodemap_error(py, e))?
330 335 }
331 336 Ok(py.None())
332 337 }
333 338
334 339 fn get_nodetree<'a>(
335 340 &'a self,
336 341 py: Python<'a>,
337 342 ) -> PyResult<&'a RefCell<Option<NodeTree>>> {
338 343 if self.nt(py).borrow().is_none() {
339 344 let readonly = Box::new(Vec::new());
340 345 let mut nt = NodeTree::load_bytes(readonly, 0);
341 346 self.fill_nodemap(py, &mut nt)?;
342 347 self.nt(py).borrow_mut().replace(nt);
343 348 }
344 349 Ok(self.nt(py))
345 350 }
346 351
347 352 /// forward a method call to the underlying C index
348 353 fn call_cindex(
349 354 &self,
350 355 py: Python,
351 356 name: &str,
352 357 args: &PyTuple,
353 358 kwargs: Option<&PyDict>,
354 359 ) -> PyResult<PyObject> {
355 360 self.cindex(py)
356 361 .borrow()
357 362 .inner()
358 363 .call_method(py, name, args, kwargs)
359 364 }
360 365
361 366 pub fn clone_cindex(&self, py: Python) -> cindex::Index {
362 367 self.cindex(py).borrow().clone_ref(py)
363 368 }
364 369
365 370 /// Returns the full nodemap bytes to be written as-is to disk
366 371 fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> {
367 372 let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap();
368 373 let (readonly, bytes) = nodemap.into_readonly_and_added_bytes();
369 374
370 375 // If there's anything readonly, we need to build the data again from
371 376 // scratch
372 377 let bytes = if readonly.len() > 0 {
373 378 let mut nt = NodeTree::load_bytes(Box::new(vec![]), 0);
374 379 self.fill_nodemap(py, &mut nt)?;
375 380
376 381 let (readonly, bytes) = nt.into_readonly_and_added_bytes();
377 382 assert_eq!(readonly.len(), 0);
378 383
379 384 bytes
380 385 } else {
381 386 bytes
382 387 };
383 388
384 389 let bytes = PyBytes::new(py, &bytes);
385 390 Ok(bytes)
386 391 }
387 392
388 393 /// Returns the last saved docket along with the size of any changed data
389 394 /// (in number of blocks), and said data as bytes.
390 395 fn inner_nodemap_data_incremental(
391 396 &self,
392 397 py: Python,
393 398 ) -> PyResult<PyObject> {
394 399 let docket = self.docket(py).borrow();
395 400 let docket = match docket.as_ref() {
396 401 Some(d) => d,
397 402 None => return Ok(py.None()),
398 403 };
399 404
400 405 let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap();
401 406 let masked_blocks = node_tree.masked_readonly_blocks();
402 407 let (_, data) = node_tree.into_readonly_and_added_bytes();
403 408 let changed = masked_blocks * std::mem::size_of::<Block>();
404 409
405 410 Ok((docket, changed, PyBytes::new(py, &data))
406 411 .to_py_object(py)
407 412 .into_object())
408 413 }
409 414
410 415 /// Update the nodemap from the new (mmaped) data.
411 416 /// The docket is kept as a reference for later incremental calls.
412 417 fn inner_update_nodemap_data(
413 418 &self,
414 419 py: Python,
415 420 docket: PyObject,
416 421 nm_data: PyObject,
417 422 ) -> PyResult<PyObject> {
418 423 let buf = PyBuffer::get(py, &nm_data)?;
419 424 let len = buf.item_count();
420 425
421 426 // Build a slice from the mmap'ed buffer data
422 427 let cbuf = buf.buf_ptr();
423 428 let bytes = if std::mem::size_of::<u8>() == buf.item_size()
424 429 && buf.is_c_contiguous()
425 430 && u8::is_compatible_format(buf.format())
426 431 {
427 432 unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) }
428 433 } else {
429 434 return Err(PyErr::new::<ValueError, _>(
430 435 py,
431 436 "Nodemap data buffer has an invalid memory representation"
432 437 .to_string(),
433 438 ));
434 439 };
435 440
436 441 // Keep a reference to the mmap'ed buffer, otherwise we get a dangling
437 442 // pointer.
438 443 self.mmap(py).borrow_mut().replace(buf);
439 444
440 445 let mut nt = NodeTree::load_bytes(Box::new(bytes), len);
441 446
442 447 let data_tip =
443 448 docket.getattr(py, "tip_rev")?.extract::<Revision>(py)?;
444 449 self.docket(py).borrow_mut().replace(docket.clone_ref(py));
445 450 let idx = self.cindex(py).borrow();
446 451 let current_tip = idx.len();
447 452
448 453 for r in (data_tip + 1)..current_tip as Revision {
449 454 let rev = r as Revision;
450 455 // in this case node() won't ever return None
451 456 nt.insert(&*idx, idx.node(rev).unwrap(), rev)
452 457 .map_err(|e| nodemap_error(py, e))?
453 458 }
454 459
455 460 *self.nt(py).borrow_mut() = Some(nt);
456 461
457 462 Ok(py.None())
458 463 }
459 464 }
460 465
461 466 fn revlog_error(py: Python) -> PyErr {
462 467 match py
463 468 .import("mercurial.error")
464 469 .and_then(|m| m.get(py, "RevlogError"))
465 470 {
466 471 Err(e) => e,
467 472 Ok(cls) => PyErr::from_instance(py, cls),
468 473 }
469 474 }
470 475
471 476 fn rev_not_in_index(py: Python, rev: Revision) -> PyErr {
472 477 PyErr::new::<ValueError, _>(
473 478 py,
474 479 format!(
475 480 "Inconsistency: Revision {} found in nodemap \
476 481 is not in revlog index",
477 482 rev
478 483 ),
479 484 )
480 485 }
481 486
482 487 /// Standard treatment of NodeMapError
483 488 fn nodemap_error(py: Python, err: NodeMapError) -> PyErr {
484 489 match err {
485 490 NodeMapError::MultipleResults => revlog_error(py),
486 491 NodeMapError::RevisionNotInIndex(r) => rev_not_in_index(py, r),
487 492 }
488 493 }
489 494
490 495 /// Create the module, with __package__ given from parent
491 496 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
492 497 let dotted_name = &format!("{}.revlog", package);
493 498 let m = PyModule::new(py, dotted_name)?;
494 499 m.add(py, "__package__", package)?;
495 500 m.add(py, "__doc__", "RevLog - Rust implementations")?;
496 501
497 502 m.add_class::<MixedIndex>(py)?;
498 503
499 504 let sys = PyModule::import(py, "sys")?;
500 505 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
501 506 sys_modules.set_item(py, dotted_name, &m)?;
502 507
503 508 Ok(m)
504 509 }
General Comments 0
You need to be logged in to leave comments. Login now