##// END OF EJS Templates
index: add a `rev` method (API)...
marmoute -
r43952:bd87114c default
parent child Browse files
Show More
@@ -1,762 +1,762 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #define PY_SSIZE_T_CLEAN
11 11 #include <Python.h>
12 12 #include <ctype.h>
13 13 #include <stddef.h>
14 14 #include <string.h>
15 15
16 16 #include "bitmanipulation.h"
17 17 #include "charencode.h"
18 18 #include "util.h"
19 19
20 20 #ifdef IS_PY3K
21 21 /* The mapping of Python types is meant to be temporary to get Python
22 22 * 3 to compile. We should remove this once Python 3 support is fully
23 23 * supported and proper types are used in the extensions themselves. */
24 24 #define PyInt_Check PyLong_Check
25 25 #define PyInt_FromLong PyLong_FromLong
26 26 #define PyInt_FromSsize_t PyLong_FromSsize_t
27 27 #define PyInt_AsLong PyLong_AsLong
28 28 #endif
29 29
30 30 static const char *const versionerrortext = "Python minor version mismatch";
31 31
32 32 static PyObject *dict_new_presized(PyObject *self, PyObject *args)
33 33 {
34 34 Py_ssize_t expected_size;
35 35
36 36 if (!PyArg_ParseTuple(args, "n:make_presized_dict", &expected_size)) {
37 37 return NULL;
38 38 }
39 39
40 40 return _dict_new_presized(expected_size);
41 41 }
42 42
43 43 static inline dirstateTupleObject *make_dirstate_tuple(char state, int mode,
44 44 int size, int mtime)
45 45 {
46 46 dirstateTupleObject *t =
47 47 PyObject_New(dirstateTupleObject, &dirstateTupleType);
48 48 if (!t) {
49 49 return NULL;
50 50 }
51 51 t->state = state;
52 52 t->mode = mode;
53 53 t->size = size;
54 54 t->mtime = mtime;
55 55 return t;
56 56 }
57 57
58 58 static PyObject *dirstate_tuple_new(PyTypeObject *subtype, PyObject *args,
59 59 PyObject *kwds)
60 60 {
61 61 /* We do all the initialization here and not a tp_init function because
62 62 * dirstate_tuple is immutable. */
63 63 dirstateTupleObject *t;
64 64 char state;
65 65 int size, mode, mtime;
66 66 if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime)) {
67 67 return NULL;
68 68 }
69 69
70 70 t = (dirstateTupleObject *)subtype->tp_alloc(subtype, 1);
71 71 if (!t) {
72 72 return NULL;
73 73 }
74 74 t->state = state;
75 75 t->mode = mode;
76 76 t->size = size;
77 77 t->mtime = mtime;
78 78
79 79 return (PyObject *)t;
80 80 }
81 81
82 82 static void dirstate_tuple_dealloc(PyObject *o)
83 83 {
84 84 PyObject_Del(o);
85 85 }
86 86
87 87 static Py_ssize_t dirstate_tuple_length(PyObject *o)
88 88 {
89 89 return 4;
90 90 }
91 91
92 92 static PyObject *dirstate_tuple_item(PyObject *o, Py_ssize_t i)
93 93 {
94 94 dirstateTupleObject *t = (dirstateTupleObject *)o;
95 95 switch (i) {
96 96 case 0:
97 97 return PyBytes_FromStringAndSize(&t->state, 1);
98 98 case 1:
99 99 return PyInt_FromLong(t->mode);
100 100 case 2:
101 101 return PyInt_FromLong(t->size);
102 102 case 3:
103 103 return PyInt_FromLong(t->mtime);
104 104 default:
105 105 PyErr_SetString(PyExc_IndexError, "index out of range");
106 106 return NULL;
107 107 }
108 108 }
109 109
110 110 static PySequenceMethods dirstate_tuple_sq = {
111 111 dirstate_tuple_length, /* sq_length */
112 112 0, /* sq_concat */
113 113 0, /* sq_repeat */
114 114 dirstate_tuple_item, /* sq_item */
115 115 0, /* sq_ass_item */
116 116 0, /* sq_contains */
117 117 0, /* sq_inplace_concat */
118 118 0 /* sq_inplace_repeat */
119 119 };
120 120
121 121 PyTypeObject dirstateTupleType = {
122 122 PyVarObject_HEAD_INIT(NULL, 0) /* header */
123 123 "dirstate_tuple", /* tp_name */
124 124 sizeof(dirstateTupleObject), /* tp_basicsize */
125 125 0, /* tp_itemsize */
126 126 (destructor)dirstate_tuple_dealloc, /* tp_dealloc */
127 127 0, /* tp_print */
128 128 0, /* tp_getattr */
129 129 0, /* tp_setattr */
130 130 0, /* tp_compare */
131 131 0, /* tp_repr */
132 132 0, /* tp_as_number */
133 133 &dirstate_tuple_sq, /* tp_as_sequence */
134 134 0, /* tp_as_mapping */
135 135 0, /* tp_hash */
136 136 0, /* tp_call */
137 137 0, /* tp_str */
138 138 0, /* tp_getattro */
139 139 0, /* tp_setattro */
140 140 0, /* tp_as_buffer */
141 141 Py_TPFLAGS_DEFAULT, /* tp_flags */
142 142 "dirstate tuple", /* tp_doc */
143 143 0, /* tp_traverse */
144 144 0, /* tp_clear */
145 145 0, /* tp_richcompare */
146 146 0, /* tp_weaklistoffset */
147 147 0, /* tp_iter */
148 148 0, /* tp_iternext */
149 149 0, /* tp_methods */
150 150 0, /* tp_members */
151 151 0, /* tp_getset */
152 152 0, /* tp_base */
153 153 0, /* tp_dict */
154 154 0, /* tp_descr_get */
155 155 0, /* tp_descr_set */
156 156 0, /* tp_dictoffset */
157 157 0, /* tp_init */
158 158 0, /* tp_alloc */
159 159 dirstate_tuple_new, /* tp_new */
160 160 };
161 161
162 162 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
163 163 {
164 164 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
165 165 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
166 166 char state, *cur, *str, *cpos;
167 167 int mode, size, mtime;
168 168 unsigned int flen, pos = 40;
169 169 Py_ssize_t len = 40;
170 170 Py_ssize_t readlen;
171 171
172 172 if (!PyArg_ParseTuple(
173 173 args, PY23("O!O!s#:parse_dirstate", "O!O!y#:parse_dirstate"),
174 174 &PyDict_Type, &dmap, &PyDict_Type, &cmap, &str, &readlen)) {
175 175 goto quit;
176 176 }
177 177
178 178 len = readlen;
179 179
180 180 /* read parents */
181 181 if (len < 40) {
182 182 PyErr_SetString(PyExc_ValueError,
183 183 "too little data for parents");
184 184 goto quit;
185 185 }
186 186
187 187 parents = Py_BuildValue(PY23("s#s#", "y#y#"), str, (Py_ssize_t)20,
188 188 str + 20, (Py_ssize_t)20);
189 189 if (!parents) {
190 190 goto quit;
191 191 }
192 192
193 193 /* read filenames */
194 194 while (pos >= 40 && pos < len) {
195 195 if (pos + 17 > len) {
196 196 PyErr_SetString(PyExc_ValueError,
197 197 "overflow in dirstate");
198 198 goto quit;
199 199 }
200 200 cur = str + pos;
201 201 /* unpack header */
202 202 state = *cur;
203 203 mode = getbe32(cur + 1);
204 204 size = getbe32(cur + 5);
205 205 mtime = getbe32(cur + 9);
206 206 flen = getbe32(cur + 13);
207 207 pos += 17;
208 208 cur += 17;
209 209 if (flen > len - pos) {
210 210 PyErr_SetString(PyExc_ValueError,
211 211 "overflow in dirstate");
212 212 goto quit;
213 213 }
214 214
215 215 entry =
216 216 (PyObject *)make_dirstate_tuple(state, mode, size, mtime);
217 217 cpos = memchr(cur, 0, flen);
218 218 if (cpos) {
219 219 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
220 220 cname = PyBytes_FromStringAndSize(
221 221 cpos + 1, flen - (cpos - cur) - 1);
222 222 if (!fname || !cname ||
223 223 PyDict_SetItem(cmap, fname, cname) == -1 ||
224 224 PyDict_SetItem(dmap, fname, entry) == -1) {
225 225 goto quit;
226 226 }
227 227 Py_DECREF(cname);
228 228 } else {
229 229 fname = PyBytes_FromStringAndSize(cur, flen);
230 230 if (!fname ||
231 231 PyDict_SetItem(dmap, fname, entry) == -1) {
232 232 goto quit;
233 233 }
234 234 }
235 235 Py_DECREF(fname);
236 236 Py_DECREF(entry);
237 237 fname = cname = entry = NULL;
238 238 pos += flen;
239 239 }
240 240
241 241 ret = parents;
242 242 Py_INCREF(ret);
243 243 quit:
244 244 Py_XDECREF(fname);
245 245 Py_XDECREF(cname);
246 246 Py_XDECREF(entry);
247 247 Py_XDECREF(parents);
248 248 return ret;
249 249 }
250 250
251 251 /*
252 252 * Build a set of non-normal and other parent entries from the dirstate dmap
253 253 */
254 254 static PyObject *nonnormalotherparententries(PyObject *self, PyObject *args)
255 255 {
256 256 PyObject *dmap, *fname, *v;
257 257 PyObject *nonnset = NULL, *otherpset = NULL, *result = NULL;
258 258 Py_ssize_t pos;
259 259
260 260 if (!PyArg_ParseTuple(args, "O!:nonnormalentries", &PyDict_Type,
261 261 &dmap)) {
262 262 goto bail;
263 263 }
264 264
265 265 nonnset = PySet_New(NULL);
266 266 if (nonnset == NULL) {
267 267 goto bail;
268 268 }
269 269
270 270 otherpset = PySet_New(NULL);
271 271 if (otherpset == NULL) {
272 272 goto bail;
273 273 }
274 274
275 275 pos = 0;
276 276 while (PyDict_Next(dmap, &pos, &fname, &v)) {
277 277 dirstateTupleObject *t;
278 278 if (!dirstate_tuple_check(v)) {
279 279 PyErr_SetString(PyExc_TypeError,
280 280 "expected a dirstate tuple");
281 281 goto bail;
282 282 }
283 283 t = (dirstateTupleObject *)v;
284 284
285 285 if (t->state == 'n' && t->size == -2) {
286 286 if (PySet_Add(otherpset, fname) == -1) {
287 287 goto bail;
288 288 }
289 289 }
290 290
291 291 if (t->state == 'n' && t->mtime != -1) {
292 292 continue;
293 293 }
294 294 if (PySet_Add(nonnset, fname) == -1) {
295 295 goto bail;
296 296 }
297 297 }
298 298
299 299 result = Py_BuildValue("(OO)", nonnset, otherpset);
300 300 if (result == NULL) {
301 301 goto bail;
302 302 }
303 303 Py_DECREF(nonnset);
304 304 Py_DECREF(otherpset);
305 305 return result;
306 306 bail:
307 307 Py_XDECREF(nonnset);
308 308 Py_XDECREF(otherpset);
309 309 Py_XDECREF(result);
310 310 return NULL;
311 311 }
312 312
313 313 /*
314 314 * Efficiently pack a dirstate object into its on-disk format.
315 315 */
316 316 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
317 317 {
318 318 PyObject *packobj = NULL;
319 319 PyObject *map, *copymap, *pl, *mtime_unset = NULL;
320 320 Py_ssize_t nbytes, pos, l;
321 321 PyObject *k, *v = NULL, *pn;
322 322 char *p, *s;
323 323 int now;
324 324
325 325 if (!PyArg_ParseTuple(args, "O!O!O!i:pack_dirstate", &PyDict_Type, &map,
326 326 &PyDict_Type, &copymap, &PyTuple_Type, &pl,
327 327 &now)) {
328 328 return NULL;
329 329 }
330 330
331 331 if (PyTuple_Size(pl) != 2) {
332 332 PyErr_SetString(PyExc_TypeError, "expected 2-element tuple");
333 333 return NULL;
334 334 }
335 335
336 336 /* Figure out how much we need to allocate. */
337 337 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
338 338 PyObject *c;
339 339 if (!PyBytes_Check(k)) {
340 340 PyErr_SetString(PyExc_TypeError, "expected string key");
341 341 goto bail;
342 342 }
343 343 nbytes += PyBytes_GET_SIZE(k) + 17;
344 344 c = PyDict_GetItem(copymap, k);
345 345 if (c) {
346 346 if (!PyBytes_Check(c)) {
347 347 PyErr_SetString(PyExc_TypeError,
348 348 "expected string key");
349 349 goto bail;
350 350 }
351 351 nbytes += PyBytes_GET_SIZE(c) + 1;
352 352 }
353 353 }
354 354
355 355 packobj = PyBytes_FromStringAndSize(NULL, nbytes);
356 356 if (packobj == NULL) {
357 357 goto bail;
358 358 }
359 359
360 360 p = PyBytes_AS_STRING(packobj);
361 361
362 362 pn = PyTuple_GET_ITEM(pl, 0);
363 363 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
364 364 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
365 365 goto bail;
366 366 }
367 367 memcpy(p, s, l);
368 368 p += 20;
369 369 pn = PyTuple_GET_ITEM(pl, 1);
370 370 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
371 371 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
372 372 goto bail;
373 373 }
374 374 memcpy(p, s, l);
375 375 p += 20;
376 376
377 377 for (pos = 0; PyDict_Next(map, &pos, &k, &v);) {
378 378 dirstateTupleObject *tuple;
379 379 char state;
380 380 int mode, size, mtime;
381 381 Py_ssize_t len, l;
382 382 PyObject *o;
383 383 char *t;
384 384
385 385 if (!dirstate_tuple_check(v)) {
386 386 PyErr_SetString(PyExc_TypeError,
387 387 "expected a dirstate tuple");
388 388 goto bail;
389 389 }
390 390 tuple = (dirstateTupleObject *)v;
391 391
392 392 state = tuple->state;
393 393 mode = tuple->mode;
394 394 size = tuple->size;
395 395 mtime = tuple->mtime;
396 396 if (state == 'n' && mtime == now) {
397 397 /* See pure/parsers.py:pack_dirstate for why we do
398 398 * this. */
399 399 mtime = -1;
400 400 mtime_unset = (PyObject *)make_dirstate_tuple(
401 401 state, mode, size, mtime);
402 402 if (!mtime_unset) {
403 403 goto bail;
404 404 }
405 405 if (PyDict_SetItem(map, k, mtime_unset) == -1) {
406 406 goto bail;
407 407 }
408 408 Py_DECREF(mtime_unset);
409 409 mtime_unset = NULL;
410 410 }
411 411 *p++ = state;
412 412 putbe32((uint32_t)mode, p);
413 413 putbe32((uint32_t)size, p + 4);
414 414 putbe32((uint32_t)mtime, p + 8);
415 415 t = p + 12;
416 416 p += 16;
417 417 len = PyBytes_GET_SIZE(k);
418 418 memcpy(p, PyBytes_AS_STRING(k), len);
419 419 p += len;
420 420 o = PyDict_GetItem(copymap, k);
421 421 if (o) {
422 422 *p++ = '\0';
423 423 l = PyBytes_GET_SIZE(o);
424 424 memcpy(p, PyBytes_AS_STRING(o), l);
425 425 p += l;
426 426 len += l + 1;
427 427 }
428 428 putbe32((uint32_t)len, t);
429 429 }
430 430
431 431 pos = p - PyBytes_AS_STRING(packobj);
432 432 if (pos != nbytes) {
433 433 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
434 434 (long)pos, (long)nbytes);
435 435 goto bail;
436 436 }
437 437
438 438 return packobj;
439 439 bail:
440 440 Py_XDECREF(mtime_unset);
441 441 Py_XDECREF(packobj);
442 442 Py_XDECREF(v);
443 443 return NULL;
444 444 }
445 445
446 446 #define BUMPED_FIX 1
447 447 #define USING_SHA_256 2
448 448 #define FM1_HEADER_SIZE (4 + 8 + 2 + 2 + 1 + 1 + 1)
449 449
450 450 static PyObject *readshas(const char *source, unsigned char num,
451 451 Py_ssize_t hashwidth)
452 452 {
453 453 int i;
454 454 PyObject *list = PyTuple_New(num);
455 455 if (list == NULL) {
456 456 return NULL;
457 457 }
458 458 for (i = 0; i < num; i++) {
459 459 PyObject *hash = PyBytes_FromStringAndSize(source, hashwidth);
460 460 if (hash == NULL) {
461 461 Py_DECREF(list);
462 462 return NULL;
463 463 }
464 464 PyTuple_SET_ITEM(list, i, hash);
465 465 source += hashwidth;
466 466 }
467 467 return list;
468 468 }
469 469
470 470 static PyObject *fm1readmarker(const char *databegin, const char *dataend,
471 471 uint32_t *msize)
472 472 {
473 473 const char *data = databegin;
474 474 const char *meta;
475 475
476 476 double mtime;
477 477 int16_t tz;
478 478 uint16_t flags;
479 479 unsigned char nsuccs, nparents, nmetadata;
480 480 Py_ssize_t hashwidth = 20;
481 481
482 482 PyObject *prec = NULL, *parents = NULL, *succs = NULL;
483 483 PyObject *metadata = NULL, *ret = NULL;
484 484 int i;
485 485
486 486 if (data + FM1_HEADER_SIZE > dataend) {
487 487 goto overflow;
488 488 }
489 489
490 490 *msize = getbe32(data);
491 491 data += 4;
492 492 mtime = getbefloat64(data);
493 493 data += 8;
494 494 tz = getbeint16(data);
495 495 data += 2;
496 496 flags = getbeuint16(data);
497 497 data += 2;
498 498
499 499 if (flags & USING_SHA_256) {
500 500 hashwidth = 32;
501 501 }
502 502
503 503 nsuccs = (unsigned char)(*data++);
504 504 nparents = (unsigned char)(*data++);
505 505 nmetadata = (unsigned char)(*data++);
506 506
507 507 if (databegin + *msize > dataend) {
508 508 goto overflow;
509 509 }
510 510 dataend = databegin + *msize; /* narrow down to marker size */
511 511
512 512 if (data + hashwidth > dataend) {
513 513 goto overflow;
514 514 }
515 515 prec = PyBytes_FromStringAndSize(data, hashwidth);
516 516 data += hashwidth;
517 517 if (prec == NULL) {
518 518 goto bail;
519 519 }
520 520
521 521 if (data + nsuccs * hashwidth > dataend) {
522 522 goto overflow;
523 523 }
524 524 succs = readshas(data, nsuccs, hashwidth);
525 525 if (succs == NULL) {
526 526 goto bail;
527 527 }
528 528 data += nsuccs * hashwidth;
529 529
530 530 if (nparents == 1 || nparents == 2) {
531 531 if (data + nparents * hashwidth > dataend) {
532 532 goto overflow;
533 533 }
534 534 parents = readshas(data, nparents, hashwidth);
535 535 if (parents == NULL) {
536 536 goto bail;
537 537 }
538 538 data += nparents * hashwidth;
539 539 } else {
540 540 parents = Py_None;
541 541 Py_INCREF(parents);
542 542 }
543 543
544 544 if (data + 2 * nmetadata > dataend) {
545 545 goto overflow;
546 546 }
547 547 meta = data + (2 * nmetadata);
548 548 metadata = PyTuple_New(nmetadata);
549 549 if (metadata == NULL) {
550 550 goto bail;
551 551 }
552 552 for (i = 0; i < nmetadata; i++) {
553 553 PyObject *tmp, *left = NULL, *right = NULL;
554 554 Py_ssize_t leftsize = (unsigned char)(*data++);
555 555 Py_ssize_t rightsize = (unsigned char)(*data++);
556 556 if (meta + leftsize + rightsize > dataend) {
557 557 goto overflow;
558 558 }
559 559 left = PyBytes_FromStringAndSize(meta, leftsize);
560 560 meta += leftsize;
561 561 right = PyBytes_FromStringAndSize(meta, rightsize);
562 562 meta += rightsize;
563 563 tmp = PyTuple_New(2);
564 564 if (!left || !right || !tmp) {
565 565 Py_XDECREF(left);
566 566 Py_XDECREF(right);
567 567 Py_XDECREF(tmp);
568 568 goto bail;
569 569 }
570 570 PyTuple_SET_ITEM(tmp, 0, left);
571 571 PyTuple_SET_ITEM(tmp, 1, right);
572 572 PyTuple_SET_ITEM(metadata, i, tmp);
573 573 }
574 574 ret = Py_BuildValue("(OOHO(di)O)", prec, succs, flags, metadata, mtime,
575 575 (int)tz * 60, parents);
576 576 goto bail; /* return successfully */
577 577
578 578 overflow:
579 579 PyErr_SetString(PyExc_ValueError, "overflow in obsstore");
580 580 bail:
581 581 Py_XDECREF(prec);
582 582 Py_XDECREF(succs);
583 583 Py_XDECREF(metadata);
584 584 Py_XDECREF(parents);
585 585 return ret;
586 586 }
587 587
588 588 static PyObject *fm1readmarkers(PyObject *self, PyObject *args)
589 589 {
590 590 const char *data, *dataend;
591 591 Py_ssize_t datalen, offset, stop;
592 592 PyObject *markers = NULL;
593 593
594 594 if (!PyArg_ParseTuple(args, PY23("s#nn", "y#nn"), &data, &datalen,
595 595 &offset, &stop)) {
596 596 return NULL;
597 597 }
598 598 if (offset < 0) {
599 599 PyErr_SetString(PyExc_ValueError,
600 600 "invalid negative offset in fm1readmarkers");
601 601 return NULL;
602 602 }
603 603 if (stop > datalen) {
604 604 PyErr_SetString(
605 605 PyExc_ValueError,
606 606 "stop longer than data length in fm1readmarkers");
607 607 return NULL;
608 608 }
609 609 dataend = data + datalen;
610 610 data += offset;
611 611 markers = PyList_New(0);
612 612 if (!markers) {
613 613 return NULL;
614 614 }
615 615 while (offset < stop) {
616 616 uint32_t msize;
617 617 int error;
618 618 PyObject *record = fm1readmarker(data, dataend, &msize);
619 619 if (!record) {
620 620 goto bail;
621 621 }
622 622 error = PyList_Append(markers, record);
623 623 Py_DECREF(record);
624 624 if (error) {
625 625 goto bail;
626 626 }
627 627 data += msize;
628 628 offset += msize;
629 629 }
630 630 return markers;
631 631 bail:
632 632 Py_DECREF(markers);
633 633 return NULL;
634 634 }
635 635
636 636 static char parsers_doc[] = "Efficient content parsing.";
637 637
638 638 PyObject *encodedir(PyObject *self, PyObject *args);
639 639 PyObject *pathencode(PyObject *self, PyObject *args);
640 640 PyObject *lowerencode(PyObject *self, PyObject *args);
641 641 PyObject *parse_index2(PyObject *self, PyObject *args);
642 642
643 643 static PyMethodDef methods[] = {
644 644 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
645 645 {"nonnormalotherparententries", nonnormalotherparententries, METH_VARARGS,
646 646 "create a set containing non-normal and other parent entries of given "
647 647 "dirstate\n"},
648 648 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
649 649 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
650 650 {"isasciistr", isasciistr, METH_VARARGS, "check if an ASCII string\n"},
651 651 {"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"},
652 652 {"asciiupper", asciiupper, METH_VARARGS, "uppercase an ASCII string\n"},
653 653 {"dict_new_presized", dict_new_presized, METH_VARARGS,
654 654 "construct a dict with an expected size\n"},
655 655 {"make_file_foldmap", make_file_foldmap, METH_VARARGS,
656 656 "make file foldmap\n"},
657 657 {"jsonescapeu8fast", jsonescapeu8fast, METH_VARARGS,
658 658 "escape a UTF-8 byte string to JSON (fast path)\n"},
659 659 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
660 660 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
661 661 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
662 662 {"fm1readmarkers", fm1readmarkers, METH_VARARGS,
663 663 "parse v1 obsolete markers\n"},
664 664 {NULL, NULL}};
665 665
666 666 void dirs_module_init(PyObject *mod);
667 667 void manifest_module_init(PyObject *mod);
668 668 void revlog_module_init(PyObject *mod);
669 669
670 static const int version = 14;
670 static const int version = 15;
671 671
672 672 static void module_init(PyObject *mod)
673 673 {
674 674 PyObject *capsule = NULL;
675 675 PyModule_AddIntConstant(mod, "version", version);
676 676
677 677 /* This module constant has two purposes. First, it lets us unit test
678 678 * the ImportError raised without hard-coding any error text. This
679 679 * means we can change the text in the future without breaking tests,
680 680 * even across changesets without a recompile. Second, its presence
681 681 * can be used to determine whether the version-checking logic is
682 682 * present, which also helps in testing across changesets without a
683 683 * recompile. Note that this means the pure-Python version of parsers
684 684 * should not have this module constant. */
685 685 PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
686 686
687 687 dirs_module_init(mod);
688 688 manifest_module_init(mod);
689 689 revlog_module_init(mod);
690 690
691 691 capsule = PyCapsule_New(
692 692 make_dirstate_tuple,
693 693 "mercurial.cext.parsers.make_dirstate_tuple_CAPI", NULL);
694 694 if (capsule != NULL)
695 695 PyModule_AddObject(mod, "make_dirstate_tuple_CAPI", capsule);
696 696
697 697 if (PyType_Ready(&dirstateTupleType) < 0) {
698 698 return;
699 699 }
700 700 Py_INCREF(&dirstateTupleType);
701 701 PyModule_AddObject(mod, "dirstatetuple",
702 702 (PyObject *)&dirstateTupleType);
703 703 }
704 704
705 705 static int check_python_version(void)
706 706 {
707 707 PyObject *sys = PyImport_ImportModule("sys"), *ver;
708 708 long hexversion;
709 709 if (!sys) {
710 710 return -1;
711 711 }
712 712 ver = PyObject_GetAttrString(sys, "hexversion");
713 713 Py_DECREF(sys);
714 714 if (!ver) {
715 715 return -1;
716 716 }
717 717 hexversion = PyInt_AsLong(ver);
718 718 Py_DECREF(ver);
719 719 /* sys.hexversion is a 32-bit number by default, so the -1 case
720 720 * should only occur in unusual circumstances (e.g. if sys.hexversion
721 721 * is manually set to an invalid value). */
722 722 if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
723 723 PyErr_Format(PyExc_ImportError,
724 724 "%s: The Mercurial extension "
725 725 "modules were compiled with Python " PY_VERSION
726 726 ", but "
727 727 "Mercurial is currently using Python with "
728 728 "sys.hexversion=%ld: "
729 729 "Python %s\n at: %s",
730 730 versionerrortext, hexversion, Py_GetVersion(),
731 731 Py_GetProgramFullPath());
732 732 return -1;
733 733 }
734 734 return 0;
735 735 }
736 736
737 737 #ifdef IS_PY3K
738 738 static struct PyModuleDef parsers_module = {PyModuleDef_HEAD_INIT, "parsers",
739 739 parsers_doc, -1, methods};
740 740
741 741 PyMODINIT_FUNC PyInit_parsers(void)
742 742 {
743 743 PyObject *mod;
744 744
745 745 if (check_python_version() == -1)
746 746 return NULL;
747 747 mod = PyModule_Create(&parsers_module);
748 748 module_init(mod);
749 749 return mod;
750 750 }
751 751 #else
752 752 PyMODINIT_FUNC initparsers(void)
753 753 {
754 754 PyObject *mod;
755 755
756 756 if (check_python_version() == -1) {
757 757 return;
758 758 }
759 759 mod = Py_InitModule3("parsers", methods, parsers_doc);
760 760 module_init(mod);
761 761 }
762 762 #endif
@@ -1,3051 +1,3068 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #define PY_SSIZE_T_CLEAN
11 11 #include <Python.h>
12 12 #include <assert.h>
13 13 #include <ctype.h>
14 14 #include <limits.h>
15 15 #include <stddef.h>
16 16 #include <stdlib.h>
17 17 #include <string.h>
18 18
19 19 #include "bitmanipulation.h"
20 20 #include "charencode.h"
21 21 #include "revlog.h"
22 22 #include "util.h"
23 23
24 24 #ifdef IS_PY3K
25 25 /* The mapping of Python types is meant to be temporary to get Python
26 26 * 3 to compile. We should remove this once Python 3 support is fully
27 27 * supported and proper types are used in the extensions themselves. */
28 28 #define PyInt_Check PyLong_Check
29 29 #define PyInt_FromLong PyLong_FromLong
30 30 #define PyInt_FromSsize_t PyLong_FromSsize_t
31 31 #define PyInt_AsLong PyLong_AsLong
32 32 #endif
33 33
34 34 typedef struct indexObjectStruct indexObject;
35 35
36 36 typedef struct {
37 37 int children[16];
38 38 } nodetreenode;
39 39
40 40 /*
41 41 * A base-16 trie for fast node->rev mapping.
42 42 *
43 43 * Positive value is index of the next node in the trie
44 44 * Negative value is a leaf: -(rev + 2)
45 45 * Zero is empty
46 46 */
47 47 typedef struct {
48 48 indexObject *index;
49 49 nodetreenode *nodes;
50 50 unsigned length; /* # nodes in use */
51 51 unsigned capacity; /* # nodes allocated */
52 52 int depth; /* maximum depth of tree */
53 53 int splits; /* # splits performed */
54 54 } nodetree;
55 55
56 56 typedef struct {
57 57 PyObject_HEAD /* ; */
58 58 nodetree nt;
59 59 } nodetreeObject;
60 60
61 61 /*
62 62 * This class has two behaviors.
63 63 *
64 64 * When used in a list-like way (with integer keys), we decode an
65 65 * entry in a RevlogNG index file on demand. Our last entry is a
66 66 * sentinel, always a nullid. We have limited support for
67 67 * integer-keyed insert and delete, only at elements right before the
68 68 * sentinel.
69 69 *
70 70 * With string keys, we lazily perform a reverse mapping from node to
71 71 * rev, using a base-16 trie.
72 72 */
73 73 struct indexObjectStruct {
74 74 PyObject_HEAD
75 75 /* Type-specific fields go here. */
76 76 PyObject *data; /* raw bytes of index */
77 77 Py_buffer buf; /* buffer of data */
78 78 PyObject **cache; /* cached tuples */
79 79 const char **offsets; /* populated on demand */
80 80 Py_ssize_t raw_length; /* original number of elements */
81 81 Py_ssize_t length; /* current number of elements */
82 82 PyObject *added; /* populated on demand */
83 83 PyObject *headrevs; /* cache, invalidated on changes */
84 84 PyObject *filteredrevs; /* filtered revs set */
85 85 nodetree nt; /* base-16 trie */
86 86 int ntinitialized; /* 0 or 1 */
87 87 int ntrev; /* last rev scanned */
88 88 int ntlookups; /* # lookups */
89 89 int ntmisses; /* # lookups that miss the cache */
90 90 int inlined;
91 91 };
92 92
93 93 static Py_ssize_t index_length(const indexObject *self)
94 94 {
95 95 if (self->added == NULL)
96 96 return self->length;
97 97 return self->length + PyList_GET_SIZE(self->added);
98 98 }
99 99
100 100 static PyObject *nullentry = NULL;
101 101 static const char nullid[20] = {0};
102 102 static const Py_ssize_t nullrev = -1;
103 103
104 104 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
105 105
106 106 #if LONG_MAX == 0x7fffffffL
107 107 static const char *const tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#");
108 108 #else
109 109 static const char *const tuple_format = PY23("kiiiiiis#", "kiiiiiiy#");
110 110 #endif
111 111
112 112 /* A RevlogNG v1 index entry is 64 bytes long. */
113 113 static const long v1_hdrsize = 64;
114 114
115 115 static void raise_revlog_error(void)
116 116 {
117 117 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
118 118
119 119 mod = PyImport_ImportModule("mercurial.error");
120 120 if (mod == NULL) {
121 121 goto cleanup;
122 122 }
123 123
124 124 dict = PyModule_GetDict(mod);
125 125 if (dict == NULL) {
126 126 goto cleanup;
127 127 }
128 128 Py_INCREF(dict);
129 129
130 130 errclass = PyDict_GetItemString(dict, "RevlogError");
131 131 if (errclass == NULL) {
132 132 PyErr_SetString(PyExc_SystemError,
133 133 "could not find RevlogError");
134 134 goto cleanup;
135 135 }
136 136
137 137 /* value of exception is ignored by callers */
138 138 PyErr_SetString(errclass, "RevlogError");
139 139
140 140 cleanup:
141 141 Py_XDECREF(dict);
142 142 Py_XDECREF(mod);
143 143 }
144 144
145 145 /*
146 146 * Return a pointer to the beginning of a RevlogNG record.
147 147 */
148 148 static const char *index_deref(indexObject *self, Py_ssize_t pos)
149 149 {
150 150 if (self->inlined && pos > 0) {
151 151 if (self->offsets == NULL) {
152 152 self->offsets = PyMem_Malloc(self->raw_length *
153 153 sizeof(*self->offsets));
154 154 if (self->offsets == NULL)
155 155 return (const char *)PyErr_NoMemory();
156 156 inline_scan(self, self->offsets);
157 157 }
158 158 return self->offsets[pos];
159 159 }
160 160
161 161 return (const char *)(self->buf.buf) + pos * v1_hdrsize;
162 162 }
163 163
164 164 /*
165 165 * Get parents of the given rev.
166 166 *
167 167 * The specified rev must be valid and must not be nullrev. A returned
168 168 * parent revision may be nullrev, but is guaranteed to be in valid range.
169 169 */
170 170 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
171 171 int maxrev)
172 172 {
173 173 if (rev >= self->length) {
174 174 long tmp;
175 175 PyObject *tuple =
176 176 PyList_GET_ITEM(self->added, rev - self->length);
177 177 if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 5), &tmp)) {
178 178 return -1;
179 179 }
180 180 ps[0] = (int)tmp;
181 181 if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 6), &tmp)) {
182 182 return -1;
183 183 }
184 184 ps[1] = (int)tmp;
185 185 } else {
186 186 const char *data = index_deref(self, rev);
187 187 ps[0] = getbe32(data + 24);
188 188 ps[1] = getbe32(data + 28);
189 189 }
190 190 /* If index file is corrupted, ps[] may point to invalid revisions. So
191 191 * there is a risk of buffer overflow to trust them unconditionally. */
192 192 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
193 193 PyErr_SetString(PyExc_ValueError, "parent out of range");
194 194 return -1;
195 195 }
196 196 return 0;
197 197 }
198 198
199 199 /*
200 200 * Get parents of the given rev.
201 201 *
202 202 * If the specified rev is out of range, IndexError will be raised. If the
203 203 * revlog entry is corrupted, ValueError may be raised.
204 204 *
205 205 * Returns 0 on success or -1 on failure.
206 206 */
207 207 int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps)
208 208 {
209 209 int tiprev;
210 210 if (!op || !HgRevlogIndex_Check(op) || !ps) {
211 211 PyErr_BadInternalCall();
212 212 return -1;
213 213 }
214 214 tiprev = (int)index_length((indexObject *)op) - 1;
215 215 if (rev < -1 || rev > tiprev) {
216 216 PyErr_Format(PyExc_IndexError, "rev out of range: %d", rev);
217 217 return -1;
218 218 } else if (rev == -1) {
219 219 ps[0] = ps[1] = -1;
220 220 return 0;
221 221 } else {
222 222 return index_get_parents((indexObject *)op, rev, ps, tiprev);
223 223 }
224 224 }
225 225
226 226 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
227 227 {
228 228 uint64_t offset;
229 229 if (rev == nullrev) {
230 230 return 0;
231 231 }
232 232 if (rev >= self->length) {
233 233 PyObject *tuple;
234 234 PyObject *pylong;
235 235 PY_LONG_LONG tmp;
236 236 tuple = PyList_GET_ITEM(self->added, rev - self->length);
237 237 pylong = PyTuple_GET_ITEM(tuple, 0);
238 238 tmp = PyLong_AsLongLong(pylong);
239 239 if (tmp == -1 && PyErr_Occurred()) {
240 240 return -1;
241 241 }
242 242 if (tmp < 0) {
243 243 PyErr_Format(PyExc_OverflowError,
244 244 "revlog entry size out of bound (%lld)",
245 245 (long long)tmp);
246 246 return -1;
247 247 }
248 248 offset = (uint64_t)tmp;
249 249 } else {
250 250 const char *data = index_deref(self, rev);
251 251 offset = getbe32(data + 4);
252 252 if (rev == 0) {
253 253 /* mask out version number for the first entry */
254 254 offset &= 0xFFFF;
255 255 } else {
256 256 uint32_t offset_high = getbe32(data);
257 257 offset |= ((uint64_t)offset_high) << 32;
258 258 }
259 259 }
260 260 return (int64_t)(offset >> 16);
261 261 }
262 262
263 263 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
264 264 {
265 265 if (rev == nullrev) {
266 266 return 0;
267 267 }
268 268 if (rev >= self->length) {
269 269 PyObject *tuple;
270 270 PyObject *pylong;
271 271 long ret;
272 272 tuple = PyList_GET_ITEM(self->added, rev - self->length);
273 273 pylong = PyTuple_GET_ITEM(tuple, 1);
274 274 ret = PyInt_AsLong(pylong);
275 275 if (ret == -1 && PyErr_Occurred()) {
276 276 return -1;
277 277 }
278 278 if (ret < 0 || ret > (long)INT_MAX) {
279 279 PyErr_Format(PyExc_OverflowError,
280 280 "revlog entry size out of bound (%ld)",
281 281 ret);
282 282 return -1;
283 283 }
284 284 return (int)ret;
285 285 } else {
286 286 const char *data = index_deref(self, rev);
287 287 int tmp = (int)getbe32(data + 8);
288 288 if (tmp < 0) {
289 289 PyErr_Format(PyExc_OverflowError,
290 290 "revlog entry size out of bound (%d)",
291 291 tmp);
292 292 return -1;
293 293 }
294 294 return tmp;
295 295 }
296 296 }
297 297
298 298 /*
299 299 * RevlogNG format (all in big endian, data may be inlined):
300 300 * 6 bytes: offset
301 301 * 2 bytes: flags
302 302 * 4 bytes: compressed length
303 303 * 4 bytes: uncompressed length
304 304 * 4 bytes: base revision
305 305 * 4 bytes: link revision
306 306 * 4 bytes: parent 1 revision
307 307 * 4 bytes: parent 2 revision
308 308 * 32 bytes: nodeid (only 20 bytes used)
309 309 */
310 310 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
311 311 {
312 312 uint64_t offset_flags;
313 313 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
314 314 const char *c_node_id;
315 315 const char *data;
316 316 Py_ssize_t length = index_length(self);
317 317 PyObject *entry;
318 318
319 319 if (pos == nullrev) {
320 320 Py_INCREF(nullentry);
321 321 return nullentry;
322 322 }
323 323
324 324 if (pos < 0 || pos >= length) {
325 325 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
326 326 return NULL;
327 327 }
328 328
329 329 if (pos >= self->length) {
330 330 PyObject *obj;
331 331 obj = PyList_GET_ITEM(self->added, pos - self->length);
332 332 Py_INCREF(obj);
333 333 return obj;
334 334 }
335 335
336 336 if (self->cache) {
337 337 if (self->cache[pos]) {
338 338 Py_INCREF(self->cache[pos]);
339 339 return self->cache[pos];
340 340 }
341 341 } else {
342 342 self->cache = calloc(self->raw_length, sizeof(PyObject *));
343 343 if (self->cache == NULL)
344 344 return PyErr_NoMemory();
345 345 }
346 346
347 347 data = index_deref(self, pos);
348 348 if (data == NULL)
349 349 return NULL;
350 350
351 351 offset_flags = getbe32(data + 4);
352 352 if (pos == 0) /* mask out version number for the first entry */
353 353 offset_flags &= 0xFFFF;
354 354 else {
355 355 uint32_t offset_high = getbe32(data);
356 356 offset_flags |= ((uint64_t)offset_high) << 32;
357 357 }
358 358
359 359 comp_len = getbe32(data + 8);
360 360 uncomp_len = getbe32(data + 12);
361 361 base_rev = getbe32(data + 16);
362 362 link_rev = getbe32(data + 20);
363 363 parent_1 = getbe32(data + 24);
364 364 parent_2 = getbe32(data + 28);
365 365 c_node_id = data + 32;
366 366
367 367 entry = Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
368 368 base_rev, link_rev, parent_1, parent_2, c_node_id,
369 369 (Py_ssize_t)20);
370 370
371 371 if (entry) {
372 372 PyObject_GC_UnTrack(entry);
373 373 Py_INCREF(entry);
374 374 }
375 375
376 376 self->cache[pos] = entry;
377 377
378 378 return entry;
379 379 }
380 380
381 381 /*
382 382 * Return the 20-byte SHA of the node corresponding to the given rev.
383 383 */
384 384 static const char *index_node(indexObject *self, Py_ssize_t pos)
385 385 {
386 386 Py_ssize_t length = index_length(self);
387 387 const char *data;
388 388
389 389 if (pos == nullrev)
390 390 return nullid;
391 391
392 392 if (pos >= length)
393 393 return NULL;
394 394
395 395 if (pos >= self->length) {
396 396 PyObject *tuple, *str;
397 397 tuple = PyList_GET_ITEM(self->added, pos - self->length);
398 398 str = PyTuple_GetItem(tuple, 7);
399 399 return str ? PyBytes_AS_STRING(str) : NULL;
400 400 }
401 401
402 402 data = index_deref(self, pos);
403 403 return data ? data + 32 : NULL;
404 404 }
405 405
406 406 /*
407 407 * Return the 20-byte SHA of the node corresponding to the given rev. The
408 408 * rev is assumed to be existing. If not, an exception is set.
409 409 */
410 410 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
411 411 {
412 412 const char *node = index_node(self, pos);
413 413 if (node == NULL) {
414 414 PyErr_Format(PyExc_IndexError, "could not access rev %d",
415 415 (int)pos);
416 416 }
417 417 return node;
418 418 }
419 419
420 420 static int nt_insert(nodetree *self, const char *node, int rev);
421 421
422 422 static int node_check(PyObject *obj, char **node)
423 423 {
424 424 Py_ssize_t nodelen;
425 425 if (PyBytes_AsStringAndSize(obj, node, &nodelen) == -1)
426 426 return -1;
427 427 if (nodelen == 20)
428 428 return 0;
429 429 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
430 430 return -1;
431 431 }
432 432
433 433 static PyObject *index_append(indexObject *self, PyObject *obj)
434 434 {
435 435 char *node;
436 436 Py_ssize_t len;
437 437
438 438 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
439 439 PyErr_SetString(PyExc_TypeError, "8-tuple required");
440 440 return NULL;
441 441 }
442 442
443 443 if (node_check(PyTuple_GET_ITEM(obj, 7), &node) == -1)
444 444 return NULL;
445 445
446 446 len = index_length(self);
447 447
448 448 if (self->added == NULL) {
449 449 self->added = PyList_New(0);
450 450 if (self->added == NULL)
451 451 return NULL;
452 452 }
453 453
454 454 if (PyList_Append(self->added, obj) == -1)
455 455 return NULL;
456 456
457 457 if (self->ntinitialized)
458 458 nt_insert(&self->nt, node, (int)len);
459 459
460 460 Py_CLEAR(self->headrevs);
461 461 Py_RETURN_NONE;
462 462 }
463 463
464 464 static PyObject *index_stats(indexObject *self)
465 465 {
466 466 PyObject *obj = PyDict_New();
467 467 PyObject *s = NULL;
468 468 PyObject *t = NULL;
469 469
470 470 if (obj == NULL)
471 471 return NULL;
472 472
473 473 #define istat(__n, __d) \
474 474 do { \
475 475 s = PyBytes_FromString(__d); \
476 476 t = PyInt_FromSsize_t(self->__n); \
477 477 if (!s || !t) \
478 478 goto bail; \
479 479 if (PyDict_SetItem(obj, s, t) == -1) \
480 480 goto bail; \
481 481 Py_CLEAR(s); \
482 482 Py_CLEAR(t); \
483 483 } while (0)
484 484
485 485 if (self->added) {
486 486 Py_ssize_t len = PyList_GET_SIZE(self->added);
487 487 s = PyBytes_FromString("index entries added");
488 488 t = PyInt_FromSsize_t(len);
489 489 if (!s || !t)
490 490 goto bail;
491 491 if (PyDict_SetItem(obj, s, t) == -1)
492 492 goto bail;
493 493 Py_CLEAR(s);
494 494 Py_CLEAR(t);
495 495 }
496 496
497 497 if (self->raw_length != self->length)
498 498 istat(raw_length, "revs on disk");
499 499 istat(length, "revs in memory");
500 500 istat(ntlookups, "node trie lookups");
501 501 istat(ntmisses, "node trie misses");
502 502 istat(ntrev, "node trie last rev scanned");
503 503 if (self->ntinitialized) {
504 504 istat(nt.capacity, "node trie capacity");
505 505 istat(nt.depth, "node trie depth");
506 506 istat(nt.length, "node trie count");
507 507 istat(nt.splits, "node trie splits");
508 508 }
509 509
510 510 #undef istat
511 511
512 512 return obj;
513 513
514 514 bail:
515 515 Py_XDECREF(obj);
516 516 Py_XDECREF(s);
517 517 Py_XDECREF(t);
518 518 return NULL;
519 519 }
520 520
521 521 /*
522 522 * When we cache a list, we want to be sure the caller can't mutate
523 523 * the cached copy.
524 524 */
525 525 static PyObject *list_copy(PyObject *list)
526 526 {
527 527 Py_ssize_t len = PyList_GET_SIZE(list);
528 528 PyObject *newlist = PyList_New(len);
529 529 Py_ssize_t i;
530 530
531 531 if (newlist == NULL)
532 532 return NULL;
533 533
534 534 for (i = 0; i < len; i++) {
535 535 PyObject *obj = PyList_GET_ITEM(list, i);
536 536 Py_INCREF(obj);
537 537 PyList_SET_ITEM(newlist, i, obj);
538 538 }
539 539
540 540 return newlist;
541 541 }
542 542
543 543 static int check_filter(PyObject *filter, Py_ssize_t arg)
544 544 {
545 545 if (filter) {
546 546 PyObject *arglist, *result;
547 547 int isfiltered;
548 548
549 549 arglist = Py_BuildValue("(n)", arg);
550 550 if (!arglist) {
551 551 return -1;
552 552 }
553 553
554 554 result = PyEval_CallObject(filter, arglist);
555 555 Py_DECREF(arglist);
556 556 if (!result) {
557 557 return -1;
558 558 }
559 559
560 560 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
561 561 * same as this function, so we can just return it directly.*/
562 562 isfiltered = PyObject_IsTrue(result);
563 563 Py_DECREF(result);
564 564 return isfiltered;
565 565 } else {
566 566 return 0;
567 567 }
568 568 }
569 569
570 570 static Py_ssize_t add_roots_get_min(indexObject *self, PyObject *list,
571 571 Py_ssize_t marker, char *phases)
572 572 {
573 573 PyObject *iter = NULL;
574 574 PyObject *iter_item = NULL;
575 575 Py_ssize_t min_idx = index_length(self) + 2;
576 576 long iter_item_long;
577 577
578 578 if (PyList_GET_SIZE(list) != 0) {
579 579 iter = PyObject_GetIter(list);
580 580 if (iter == NULL)
581 581 return -2;
582 582 while ((iter_item = PyIter_Next(iter))) {
583 583 if (!pylong_to_long(iter_item, &iter_item_long)) {
584 584 Py_DECREF(iter_item);
585 585 return -2;
586 586 }
587 587 Py_DECREF(iter_item);
588 588 if (iter_item_long < min_idx)
589 589 min_idx = iter_item_long;
590 590 phases[iter_item_long] = (char)marker;
591 591 }
592 592 Py_DECREF(iter);
593 593 }
594 594
595 595 return min_idx;
596 596 }
597 597
598 598 static inline void set_phase_from_parents(char *phases, int parent_1,
599 599 int parent_2, Py_ssize_t i)
600 600 {
601 601 if (parent_1 >= 0 && phases[parent_1] > phases[i])
602 602 phases[i] = phases[parent_1];
603 603 if (parent_2 >= 0 && phases[parent_2] > phases[i])
604 604 phases[i] = phases[parent_2];
605 605 }
606 606
607 607 static PyObject *reachableroots2(indexObject *self, PyObject *args)
608 608 {
609 609
610 610 /* Input */
611 611 long minroot;
612 612 PyObject *includepatharg = NULL;
613 613 int includepath = 0;
614 614 /* heads and roots are lists */
615 615 PyObject *heads = NULL;
616 616 PyObject *roots = NULL;
617 617 PyObject *reachable = NULL;
618 618
619 619 PyObject *val;
620 620 Py_ssize_t len = index_length(self);
621 621 long revnum;
622 622 Py_ssize_t k;
623 623 Py_ssize_t i;
624 624 Py_ssize_t l;
625 625 int r;
626 626 int parents[2];
627 627
628 628 /* Internal data structure:
629 629 * tovisit: array of length len+1 (all revs + nullrev), filled upto
630 630 * lentovisit
631 631 *
632 632 * revstates: array of length len+1 (all revs + nullrev) */
633 633 int *tovisit = NULL;
634 634 long lentovisit = 0;
635 635 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
636 636 char *revstates = NULL;
637 637
638 638 /* Get arguments */
639 639 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
640 640 &PyList_Type, &roots, &PyBool_Type,
641 641 &includepatharg))
642 642 goto bail;
643 643
644 644 if (includepatharg == Py_True)
645 645 includepath = 1;
646 646
647 647 /* Initialize return set */
648 648 reachable = PyList_New(0);
649 649 if (reachable == NULL)
650 650 goto bail;
651 651
652 652 /* Initialize internal datastructures */
653 653 tovisit = (int *)malloc((len + 1) * sizeof(int));
654 654 if (tovisit == NULL) {
655 655 PyErr_NoMemory();
656 656 goto bail;
657 657 }
658 658
659 659 revstates = (char *)calloc(len + 1, 1);
660 660 if (revstates == NULL) {
661 661 PyErr_NoMemory();
662 662 goto bail;
663 663 }
664 664
665 665 l = PyList_GET_SIZE(roots);
666 666 for (i = 0; i < l; i++) {
667 667 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
668 668 if (revnum == -1 && PyErr_Occurred())
669 669 goto bail;
670 670 /* If root is out of range, e.g. wdir(), it must be unreachable
671 671 * from heads. So we can just ignore it. */
672 672 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
673 673 continue;
674 674 revstates[revnum + 1] |= RS_ROOT;
675 675 }
676 676
677 677 /* Populate tovisit with all the heads */
678 678 l = PyList_GET_SIZE(heads);
679 679 for (i = 0; i < l; i++) {
680 680 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
681 681 if (revnum == -1 && PyErr_Occurred())
682 682 goto bail;
683 683 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
684 684 PyErr_SetString(PyExc_IndexError, "head out of range");
685 685 goto bail;
686 686 }
687 687 if (!(revstates[revnum + 1] & RS_SEEN)) {
688 688 tovisit[lentovisit++] = (int)revnum;
689 689 revstates[revnum + 1] |= RS_SEEN;
690 690 }
691 691 }
692 692
693 693 /* Visit the tovisit list and find the reachable roots */
694 694 k = 0;
695 695 while (k < lentovisit) {
696 696 /* Add the node to reachable if it is a root*/
697 697 revnum = tovisit[k++];
698 698 if (revstates[revnum + 1] & RS_ROOT) {
699 699 revstates[revnum + 1] |= RS_REACHABLE;
700 700 val = PyInt_FromLong(revnum);
701 701 if (val == NULL)
702 702 goto bail;
703 703 r = PyList_Append(reachable, val);
704 704 Py_DECREF(val);
705 705 if (r < 0)
706 706 goto bail;
707 707 if (includepath == 0)
708 708 continue;
709 709 }
710 710
711 711 /* Add its parents to the list of nodes to visit */
712 712 if (revnum == nullrev)
713 713 continue;
714 714 r = index_get_parents(self, revnum, parents, (int)len - 1);
715 715 if (r < 0)
716 716 goto bail;
717 717 for (i = 0; i < 2; i++) {
718 718 if (!(revstates[parents[i] + 1] & RS_SEEN) &&
719 719 parents[i] >= minroot) {
720 720 tovisit[lentovisit++] = parents[i];
721 721 revstates[parents[i] + 1] |= RS_SEEN;
722 722 }
723 723 }
724 724 }
725 725
726 726 /* Find all the nodes in between the roots we found and the heads
727 727 * and add them to the reachable set */
728 728 if (includepath == 1) {
729 729 long minidx = minroot;
730 730 if (minidx < 0)
731 731 minidx = 0;
732 732 for (i = minidx; i < len; i++) {
733 733 if (!(revstates[i + 1] & RS_SEEN))
734 734 continue;
735 735 r = index_get_parents(self, i, parents, (int)len - 1);
736 736 /* Corrupted index file, error is set from
737 737 * index_get_parents */
738 738 if (r < 0)
739 739 goto bail;
740 740 if (((revstates[parents[0] + 1] |
741 741 revstates[parents[1] + 1]) &
742 742 RS_REACHABLE) &&
743 743 !(revstates[i + 1] & RS_REACHABLE)) {
744 744 revstates[i + 1] |= RS_REACHABLE;
745 745 val = PyInt_FromSsize_t(i);
746 746 if (val == NULL)
747 747 goto bail;
748 748 r = PyList_Append(reachable, val);
749 749 Py_DECREF(val);
750 750 if (r < 0)
751 751 goto bail;
752 752 }
753 753 }
754 754 }
755 755
756 756 free(revstates);
757 757 free(tovisit);
758 758 return reachable;
759 759 bail:
760 760 Py_XDECREF(reachable);
761 761 free(revstates);
762 762 free(tovisit);
763 763 return NULL;
764 764 }
765 765
766 766 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
767 767 {
768 768 PyObject *roots = Py_None;
769 769 PyObject *ret = NULL;
770 770 PyObject *phasessize = NULL;
771 771 PyObject *phaseroots = NULL;
772 772 PyObject *phaseset = NULL;
773 773 PyObject *phasessetlist = NULL;
774 774 PyObject *rev = NULL;
775 775 Py_ssize_t len = index_length(self);
776 776 Py_ssize_t numphase = 0;
777 777 Py_ssize_t minrevallphases = 0;
778 778 Py_ssize_t minrevphase = 0;
779 779 Py_ssize_t i = 0;
780 780 char *phases = NULL;
781 781 long phase;
782 782
783 783 if (!PyArg_ParseTuple(args, "O", &roots))
784 784 goto done;
785 785 if (roots == NULL || !PyList_Check(roots)) {
786 786 PyErr_SetString(PyExc_TypeError, "roots must be a list");
787 787 goto done;
788 788 }
789 789
790 790 phases = calloc(
791 791 len, 1); /* phase per rev: {0: public, 1: draft, 2: secret} */
792 792 if (phases == NULL) {
793 793 PyErr_NoMemory();
794 794 goto done;
795 795 }
796 796 /* Put the phase information of all the roots in phases */
797 797 numphase = PyList_GET_SIZE(roots) + 1;
798 798 minrevallphases = len + 1;
799 799 phasessetlist = PyList_New(numphase);
800 800 if (phasessetlist == NULL)
801 801 goto done;
802 802
803 803 PyList_SET_ITEM(phasessetlist, 0, Py_None);
804 804 Py_INCREF(Py_None);
805 805
806 806 for (i = 0; i < numphase - 1; i++) {
807 807 phaseroots = PyList_GET_ITEM(roots, i);
808 808 phaseset = PySet_New(NULL);
809 809 if (phaseset == NULL)
810 810 goto release;
811 811 PyList_SET_ITEM(phasessetlist, i + 1, phaseset);
812 812 if (!PyList_Check(phaseroots)) {
813 813 PyErr_SetString(PyExc_TypeError,
814 814 "roots item must be a list");
815 815 goto release;
816 816 }
817 817 minrevphase =
818 818 add_roots_get_min(self, phaseroots, i + 1, phases);
819 819 if (minrevphase == -2) /* Error from add_roots_get_min */
820 820 goto release;
821 821 minrevallphases = MIN(minrevallphases, minrevphase);
822 822 }
823 823 /* Propagate the phase information from the roots to the revs */
824 824 if (minrevallphases != -1) {
825 825 int parents[2];
826 826 for (i = minrevallphases; i < len; i++) {
827 827 if (index_get_parents(self, i, parents, (int)len - 1) <
828 828 0)
829 829 goto release;
830 830 set_phase_from_parents(phases, parents[0], parents[1],
831 831 i);
832 832 }
833 833 }
834 834 /* Transform phase list to a python list */
835 835 phasessize = PyInt_FromSsize_t(len);
836 836 if (phasessize == NULL)
837 837 goto release;
838 838 for (i = 0; i < len; i++) {
839 839 phase = phases[i];
840 840 /* We only store the sets of phase for non public phase, the
841 841 * public phase is computed as a difference */
842 842 if (phase != 0) {
843 843 phaseset = PyList_GET_ITEM(phasessetlist, phase);
844 844 rev = PyInt_FromSsize_t(i);
845 845 if (rev == NULL)
846 846 goto release;
847 847 PySet_Add(phaseset, rev);
848 848 Py_XDECREF(rev);
849 849 }
850 850 }
851 851 ret = PyTuple_Pack(2, phasessize, phasessetlist);
852 852
853 853 release:
854 854 Py_XDECREF(phasessize);
855 855 Py_XDECREF(phasessetlist);
856 856 done:
857 857 free(phases);
858 858 return ret;
859 859 }
860 860
861 861 static PyObject *index_headrevs(indexObject *self, PyObject *args)
862 862 {
863 863 Py_ssize_t i, j, len;
864 864 char *nothead = NULL;
865 865 PyObject *heads = NULL;
866 866 PyObject *filter = NULL;
867 867 PyObject *filteredrevs = Py_None;
868 868
869 869 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
870 870 return NULL;
871 871 }
872 872
873 873 if (self->headrevs && filteredrevs == self->filteredrevs)
874 874 return list_copy(self->headrevs);
875 875
876 876 Py_DECREF(self->filteredrevs);
877 877 self->filteredrevs = filteredrevs;
878 878 Py_INCREF(filteredrevs);
879 879
880 880 if (filteredrevs != Py_None) {
881 881 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
882 882 if (!filter) {
883 883 PyErr_SetString(
884 884 PyExc_TypeError,
885 885 "filteredrevs has no attribute __contains__");
886 886 goto bail;
887 887 }
888 888 }
889 889
890 890 len = index_length(self);
891 891 heads = PyList_New(0);
892 892 if (heads == NULL)
893 893 goto bail;
894 894 if (len == 0) {
895 895 PyObject *nullid = PyInt_FromLong(-1);
896 896 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
897 897 Py_XDECREF(nullid);
898 898 goto bail;
899 899 }
900 900 goto done;
901 901 }
902 902
903 903 nothead = calloc(len, 1);
904 904 if (nothead == NULL) {
905 905 PyErr_NoMemory();
906 906 goto bail;
907 907 }
908 908
909 909 for (i = len - 1; i >= 0; i--) {
910 910 int isfiltered;
911 911 int parents[2];
912 912
913 913 /* If nothead[i] == 1, it means we've seen an unfiltered child
914 914 * of this node already, and therefore this node is not
915 915 * filtered. So we can skip the expensive check_filter step.
916 916 */
917 917 if (nothead[i] != 1) {
918 918 isfiltered = check_filter(filter, i);
919 919 if (isfiltered == -1) {
920 920 PyErr_SetString(PyExc_TypeError,
921 921 "unable to check filter");
922 922 goto bail;
923 923 }
924 924
925 925 if (isfiltered) {
926 926 nothead[i] = 1;
927 927 continue;
928 928 }
929 929 }
930 930
931 931 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
932 932 goto bail;
933 933 for (j = 0; j < 2; j++) {
934 934 if (parents[j] >= 0)
935 935 nothead[parents[j]] = 1;
936 936 }
937 937 }
938 938
939 939 for (i = 0; i < len; i++) {
940 940 PyObject *head;
941 941
942 942 if (nothead[i])
943 943 continue;
944 944 head = PyInt_FromSsize_t(i);
945 945 if (head == NULL || PyList_Append(heads, head) == -1) {
946 946 Py_XDECREF(head);
947 947 goto bail;
948 948 }
949 949 }
950 950
951 951 done:
952 952 self->headrevs = heads;
953 953 Py_XDECREF(filter);
954 954 free(nothead);
955 955 return list_copy(self->headrevs);
956 956 bail:
957 957 Py_XDECREF(filter);
958 958 Py_XDECREF(heads);
959 959 free(nothead);
960 960 return NULL;
961 961 }
962 962
963 963 /**
964 964 * Obtain the base revision index entry.
965 965 *
966 966 * Callers must ensure that rev >= 0 or illegal memory access may occur.
967 967 */
968 968 static inline int index_baserev(indexObject *self, int rev)
969 969 {
970 970 const char *data;
971 971 int result;
972 972
973 973 if (rev >= self->length) {
974 974 PyObject *tuple =
975 975 PyList_GET_ITEM(self->added, rev - self->length);
976 976 long ret;
977 977 if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 3), &ret)) {
978 978 return -2;
979 979 }
980 980 result = (int)ret;
981 981 } else {
982 982 data = index_deref(self, rev);
983 983 if (data == NULL) {
984 984 return -2;
985 985 }
986 986
987 987 result = getbe32(data + 16);
988 988 }
989 989 if (result > rev) {
990 990 PyErr_Format(
991 991 PyExc_ValueError,
992 992 "corrupted revlog, revision base above revision: %d, %d",
993 993 rev, result);
994 994 return -2;
995 995 }
996 996 if (result < -1) {
997 997 PyErr_Format(
998 998 PyExc_ValueError,
999 999 "corrupted revlog, revision base out of range: %d, %d", rev,
1000 1000 result);
1001 1001 return -2;
1002 1002 }
1003 1003 return result;
1004 1004 }
1005 1005
1006 1006 /**
1007 1007 * Find if a revision is a snapshot or not
1008 1008 *
1009 1009 * Only relevant for sparse-revlog case.
1010 1010 * Callers must ensure that rev is in a valid range.
1011 1011 */
1012 1012 static int index_issnapshotrev(indexObject *self, Py_ssize_t rev)
1013 1013 {
1014 1014 int ps[2];
1015 1015 Py_ssize_t base;
1016 1016 while (rev >= 0) {
1017 1017 base = (Py_ssize_t)index_baserev(self, rev);
1018 1018 if (base == rev) {
1019 1019 base = -1;
1020 1020 }
1021 1021 if (base == -2) {
1022 1022 assert(PyErr_Occurred());
1023 1023 return -1;
1024 1024 }
1025 1025 if (base == -1) {
1026 1026 return 1;
1027 1027 }
1028 1028 if (index_get_parents(self, rev, ps, (int)rev) < 0) {
1029 1029 assert(PyErr_Occurred());
1030 1030 return -1;
1031 1031 };
1032 1032 if (base == ps[0] || base == ps[1]) {
1033 1033 return 0;
1034 1034 }
1035 1035 rev = base;
1036 1036 }
1037 1037 return rev == -1;
1038 1038 }
1039 1039
1040 1040 static PyObject *index_issnapshot(indexObject *self, PyObject *value)
1041 1041 {
1042 1042 long rev;
1043 1043 int issnap;
1044 1044 Py_ssize_t length = index_length(self);
1045 1045
1046 1046 if (!pylong_to_long(value, &rev)) {
1047 1047 return NULL;
1048 1048 }
1049 1049 if (rev < -1 || rev >= length) {
1050 1050 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
1051 1051 rev);
1052 1052 return NULL;
1053 1053 };
1054 1054 issnap = index_issnapshotrev(self, (Py_ssize_t)rev);
1055 1055 if (issnap < 0) {
1056 1056 return NULL;
1057 1057 };
1058 1058 return PyBool_FromLong((long)issnap);
1059 1059 }
1060 1060
1061 1061 static PyObject *index_findsnapshots(indexObject *self, PyObject *args)
1062 1062 {
1063 1063 Py_ssize_t start_rev;
1064 1064 PyObject *cache;
1065 1065 Py_ssize_t base;
1066 1066 Py_ssize_t rev;
1067 1067 PyObject *key = NULL;
1068 1068 PyObject *value = NULL;
1069 1069 const Py_ssize_t length = index_length(self);
1070 1070 if (!PyArg_ParseTuple(args, "O!n", &PyDict_Type, &cache, &start_rev)) {
1071 1071 return NULL;
1072 1072 }
1073 1073 for (rev = start_rev; rev < length; rev++) {
1074 1074 int issnap;
1075 1075 PyObject *allvalues = NULL;
1076 1076 issnap = index_issnapshotrev(self, rev);
1077 1077 if (issnap < 0) {
1078 1078 goto bail;
1079 1079 }
1080 1080 if (issnap == 0) {
1081 1081 continue;
1082 1082 }
1083 1083 base = (Py_ssize_t)index_baserev(self, rev);
1084 1084 if (base == rev) {
1085 1085 base = -1;
1086 1086 }
1087 1087 if (base == -2) {
1088 1088 assert(PyErr_Occurred());
1089 1089 goto bail;
1090 1090 }
1091 1091 key = PyInt_FromSsize_t(base);
1092 1092 allvalues = PyDict_GetItem(cache, key);
1093 1093 if (allvalues == NULL && PyErr_Occurred()) {
1094 1094 goto bail;
1095 1095 }
1096 1096 if (allvalues == NULL) {
1097 1097 int r;
1098 1098 allvalues = PyList_New(0);
1099 1099 if (!allvalues) {
1100 1100 goto bail;
1101 1101 }
1102 1102 r = PyDict_SetItem(cache, key, allvalues);
1103 1103 Py_DECREF(allvalues);
1104 1104 if (r < 0) {
1105 1105 goto bail;
1106 1106 }
1107 1107 }
1108 1108 value = PyInt_FromSsize_t(rev);
1109 1109 if (PyList_Append(allvalues, value)) {
1110 1110 goto bail;
1111 1111 }
1112 1112 Py_CLEAR(key);
1113 1113 Py_CLEAR(value);
1114 1114 }
1115 1115 Py_RETURN_NONE;
1116 1116 bail:
1117 1117 Py_XDECREF(key);
1118 1118 Py_XDECREF(value);
1119 1119 return NULL;
1120 1120 }
1121 1121
1122 1122 static PyObject *index_deltachain(indexObject *self, PyObject *args)
1123 1123 {
1124 1124 int rev, generaldelta;
1125 1125 PyObject *stoparg;
1126 1126 int stoprev, iterrev, baserev = -1;
1127 1127 int stopped;
1128 1128 PyObject *chain = NULL, *result = NULL;
1129 1129 const Py_ssize_t length = index_length(self);
1130 1130
1131 1131 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
1132 1132 return NULL;
1133 1133 }
1134 1134
1135 1135 if (PyInt_Check(stoparg)) {
1136 1136 stoprev = (int)PyInt_AsLong(stoparg);
1137 1137 if (stoprev == -1 && PyErr_Occurred()) {
1138 1138 return NULL;
1139 1139 }
1140 1140 } else if (stoparg == Py_None) {
1141 1141 stoprev = -2;
1142 1142 } else {
1143 1143 PyErr_SetString(PyExc_ValueError,
1144 1144 "stoprev must be integer or None");
1145 1145 return NULL;
1146 1146 }
1147 1147
1148 1148 if (rev < 0 || rev >= length) {
1149 1149 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1150 1150 return NULL;
1151 1151 }
1152 1152
1153 1153 chain = PyList_New(0);
1154 1154 if (chain == NULL) {
1155 1155 return NULL;
1156 1156 }
1157 1157
1158 1158 baserev = index_baserev(self, rev);
1159 1159
1160 1160 /* This should never happen. */
1161 1161 if (baserev <= -2) {
1162 1162 /* Error should be set by index_deref() */
1163 1163 assert(PyErr_Occurred());
1164 1164 goto bail;
1165 1165 }
1166 1166
1167 1167 iterrev = rev;
1168 1168
1169 1169 while (iterrev != baserev && iterrev != stoprev) {
1170 1170 PyObject *value = PyInt_FromLong(iterrev);
1171 1171 if (value == NULL) {
1172 1172 goto bail;
1173 1173 }
1174 1174 if (PyList_Append(chain, value)) {
1175 1175 Py_DECREF(value);
1176 1176 goto bail;
1177 1177 }
1178 1178 Py_DECREF(value);
1179 1179
1180 1180 if (generaldelta) {
1181 1181 iterrev = baserev;
1182 1182 } else {
1183 1183 iterrev--;
1184 1184 }
1185 1185
1186 1186 if (iterrev < 0) {
1187 1187 break;
1188 1188 }
1189 1189
1190 1190 if (iterrev >= length) {
1191 1191 PyErr_SetString(PyExc_IndexError,
1192 1192 "revision outside index");
1193 1193 return NULL;
1194 1194 }
1195 1195
1196 1196 baserev = index_baserev(self, iterrev);
1197 1197
1198 1198 /* This should never happen. */
1199 1199 if (baserev <= -2) {
1200 1200 /* Error should be set by index_deref() */
1201 1201 assert(PyErr_Occurred());
1202 1202 goto bail;
1203 1203 }
1204 1204 }
1205 1205
1206 1206 if (iterrev == stoprev) {
1207 1207 stopped = 1;
1208 1208 } else {
1209 1209 PyObject *value = PyInt_FromLong(iterrev);
1210 1210 if (value == NULL) {
1211 1211 goto bail;
1212 1212 }
1213 1213 if (PyList_Append(chain, value)) {
1214 1214 Py_DECREF(value);
1215 1215 goto bail;
1216 1216 }
1217 1217 Py_DECREF(value);
1218 1218
1219 1219 stopped = 0;
1220 1220 }
1221 1221
1222 1222 if (PyList_Reverse(chain)) {
1223 1223 goto bail;
1224 1224 }
1225 1225
1226 1226 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
1227 1227 Py_DECREF(chain);
1228 1228 return result;
1229 1229
1230 1230 bail:
1231 1231 Py_DECREF(chain);
1232 1232 return NULL;
1233 1233 }
1234 1234
1235 1235 static inline int64_t
1236 1236 index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
1237 1237 {
1238 1238 int64_t start_offset;
1239 1239 int64_t end_offset;
1240 1240 int end_size;
1241 1241 start_offset = index_get_start(self, start_rev);
1242 1242 if (start_offset < 0) {
1243 1243 return -1;
1244 1244 }
1245 1245 end_offset = index_get_start(self, end_rev);
1246 1246 if (end_offset < 0) {
1247 1247 return -1;
1248 1248 }
1249 1249 end_size = index_get_length(self, end_rev);
1250 1250 if (end_size < 0) {
1251 1251 return -1;
1252 1252 }
1253 1253 if (end_offset < start_offset) {
1254 1254 PyErr_Format(PyExc_ValueError,
1255 1255 "corrupted revlog index: inconsistent offset "
1256 1256 "between revisions (%zd) and (%zd)",
1257 1257 start_rev, end_rev);
1258 1258 return -1;
1259 1259 }
1260 1260 return (end_offset - start_offset) + (int64_t)end_size;
1261 1261 }
1262 1262
1263 1263 /* returns endidx so that revs[startidx:endidx] has no empty trailing revs */
1264 1264 static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
1265 1265 Py_ssize_t startidx, Py_ssize_t endidx)
1266 1266 {
1267 1267 int length;
1268 1268 while (endidx > 1 && endidx > startidx) {
1269 1269 length = index_get_length(self, revs[endidx - 1]);
1270 1270 if (length < 0) {
1271 1271 return -1;
1272 1272 }
1273 1273 if (length != 0) {
1274 1274 break;
1275 1275 }
1276 1276 endidx -= 1;
1277 1277 }
1278 1278 return endidx;
1279 1279 }
1280 1280
1281 1281 struct Gap {
1282 1282 int64_t size;
1283 1283 Py_ssize_t idx;
1284 1284 };
1285 1285
1286 1286 static int gap_compare(const void *left, const void *right)
1287 1287 {
1288 1288 const struct Gap *l_left = ((const struct Gap *)left);
1289 1289 const struct Gap *l_right = ((const struct Gap *)right);
1290 1290 if (l_left->size < l_right->size) {
1291 1291 return -1;
1292 1292 } else if (l_left->size > l_right->size) {
1293 1293 return 1;
1294 1294 }
1295 1295 return 0;
1296 1296 }
1297 1297 static int Py_ssize_t_compare(const void *left, const void *right)
1298 1298 {
1299 1299 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1300 1300 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1301 1301 if (l_left < l_right) {
1302 1302 return -1;
1303 1303 } else if (l_left > l_right) {
1304 1304 return 1;
1305 1305 }
1306 1306 return 0;
1307 1307 }
1308 1308
1309 1309 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1310 1310 {
1311 1311 /* method arguments */
1312 1312 PyObject *list_revs = NULL; /* revisions in the chain */
1313 1313 double targetdensity = 0; /* min density to achieve */
1314 1314 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1315 1315
1316 1316 /* other core variables */
1317 1317 Py_ssize_t idxlen = index_length(self);
1318 1318 Py_ssize_t i; /* used for various iteration */
1319 1319 PyObject *result = NULL; /* the final return of the function */
1320 1320
1321 1321 /* generic information about the delta chain being slice */
1322 1322 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1323 1323 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1324 1324 int64_t chainpayload = 0; /* sum of all delta in the chain */
1325 1325 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1326 1326
1327 1327 /* variable used for slicing the delta chain */
1328 1328 int64_t readdata = 0; /* amount of data currently planned to be read */
1329 1329 double density = 0; /* ration of payload data compared to read ones */
1330 1330 int64_t previous_end;
1331 1331 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1332 1332 Py_ssize_t num_gaps =
1333 1333 0; /* total number of notable gap recorded so far */
1334 1334 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1335 1335 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1336 1336 PyObject *chunk = NULL; /* individual slice */
1337 1337 PyObject *allchunks = NULL; /* all slices */
1338 1338 Py_ssize_t previdx;
1339 1339
1340 1340 /* parsing argument */
1341 1341 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1342 1342 &targetdensity, &mingapsize)) {
1343 1343 goto bail;
1344 1344 }
1345 1345
1346 1346 /* If the delta chain contains a single element, we do not need slicing
1347 1347 */
1348 1348 num_revs = PyList_GET_SIZE(list_revs);
1349 1349 if (num_revs <= 1) {
1350 1350 result = PyTuple_Pack(1, list_revs);
1351 1351 goto done;
1352 1352 }
1353 1353
1354 1354 /* Turn the python list into a native integer array (for efficiency) */
1355 1355 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1356 1356 if (revs == NULL) {
1357 1357 PyErr_NoMemory();
1358 1358 goto bail;
1359 1359 }
1360 1360 for (i = 0; i < num_revs; i++) {
1361 1361 Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i));
1362 1362 if (revnum == -1 && PyErr_Occurred()) {
1363 1363 goto bail;
1364 1364 }
1365 1365 if (revnum < nullrev || revnum >= idxlen) {
1366 1366 PyErr_Format(PyExc_IndexError,
1367 1367 "index out of range: %zd", revnum);
1368 1368 goto bail;
1369 1369 }
1370 1370 revs[i] = revnum;
1371 1371 }
1372 1372
1373 1373 /* Compute and check various property of the unsliced delta chain */
1374 1374 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1375 1375 if (deltachainspan < 0) {
1376 1376 goto bail;
1377 1377 }
1378 1378
1379 1379 if (deltachainspan <= mingapsize) {
1380 1380 result = PyTuple_Pack(1, list_revs);
1381 1381 goto done;
1382 1382 }
1383 1383 chainpayload = 0;
1384 1384 for (i = 0; i < num_revs; i++) {
1385 1385 int tmp = index_get_length(self, revs[i]);
1386 1386 if (tmp < 0) {
1387 1387 goto bail;
1388 1388 }
1389 1389 chainpayload += tmp;
1390 1390 }
1391 1391
1392 1392 readdata = deltachainspan;
1393 1393 density = 1.0;
1394 1394
1395 1395 if (0 < deltachainspan) {
1396 1396 density = (double)chainpayload / (double)deltachainspan;
1397 1397 }
1398 1398
1399 1399 if (density >= targetdensity) {
1400 1400 result = PyTuple_Pack(1, list_revs);
1401 1401 goto done;
1402 1402 }
1403 1403
1404 1404 /* if chain is too sparse, look for relevant gaps */
1405 1405 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1406 1406 if (gaps == NULL) {
1407 1407 PyErr_NoMemory();
1408 1408 goto bail;
1409 1409 }
1410 1410
1411 1411 previous_end = -1;
1412 1412 for (i = 0; i < num_revs; i++) {
1413 1413 int64_t revstart;
1414 1414 int revsize;
1415 1415 revstart = index_get_start(self, revs[i]);
1416 1416 if (revstart < 0) {
1417 1417 goto bail;
1418 1418 };
1419 1419 revsize = index_get_length(self, revs[i]);
1420 1420 if (revsize < 0) {
1421 1421 goto bail;
1422 1422 };
1423 1423 if (revsize == 0) {
1424 1424 continue;
1425 1425 }
1426 1426 if (previous_end >= 0) {
1427 1427 int64_t gapsize = revstart - previous_end;
1428 1428 if (gapsize > mingapsize) {
1429 1429 gaps[num_gaps].size = gapsize;
1430 1430 gaps[num_gaps].idx = i;
1431 1431 num_gaps += 1;
1432 1432 }
1433 1433 }
1434 1434 previous_end = revstart + revsize;
1435 1435 }
1436 1436 if (num_gaps == 0) {
1437 1437 result = PyTuple_Pack(1, list_revs);
1438 1438 goto done;
1439 1439 }
1440 1440 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
1441 1441
1442 1442 /* Slice the largest gap first, they improve the density the most */
1443 1443 selected_indices =
1444 1444 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
1445 1445 if (selected_indices == NULL) {
1446 1446 PyErr_NoMemory();
1447 1447 goto bail;
1448 1448 }
1449 1449
1450 1450 for (i = num_gaps - 1; i >= 0; i--) {
1451 1451 selected_indices[num_selected] = gaps[i].idx;
1452 1452 readdata -= gaps[i].size;
1453 1453 num_selected += 1;
1454 1454 if (readdata <= 0) {
1455 1455 density = 1.0;
1456 1456 } else {
1457 1457 density = (double)chainpayload / (double)readdata;
1458 1458 }
1459 1459 if (density >= targetdensity) {
1460 1460 break;
1461 1461 }
1462 1462 }
1463 1463 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
1464 1464 &Py_ssize_t_compare);
1465 1465
1466 1466 /* create the resulting slice */
1467 1467 allchunks = PyList_New(0);
1468 1468 if (allchunks == NULL) {
1469 1469 goto bail;
1470 1470 }
1471 1471 previdx = 0;
1472 1472 selected_indices[num_selected] = num_revs;
1473 1473 for (i = 0; i <= num_selected; i++) {
1474 1474 Py_ssize_t idx = selected_indices[i];
1475 1475 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
1476 1476 if (endidx < 0) {
1477 1477 goto bail;
1478 1478 }
1479 1479 if (previdx < endidx) {
1480 1480 chunk = PyList_GetSlice(list_revs, previdx, endidx);
1481 1481 if (chunk == NULL) {
1482 1482 goto bail;
1483 1483 }
1484 1484 if (PyList_Append(allchunks, chunk) == -1) {
1485 1485 goto bail;
1486 1486 }
1487 1487 Py_DECREF(chunk);
1488 1488 chunk = NULL;
1489 1489 }
1490 1490 previdx = idx;
1491 1491 }
1492 1492 result = allchunks;
1493 1493 goto done;
1494 1494
1495 1495 bail:
1496 1496 Py_XDECREF(allchunks);
1497 1497 Py_XDECREF(chunk);
1498 1498 done:
1499 1499 free(revs);
1500 1500 free(gaps);
1501 1501 free(selected_indices);
1502 1502 return result;
1503 1503 }
1504 1504
1505 1505 static inline int nt_level(const char *node, Py_ssize_t level)
1506 1506 {
1507 1507 int v = node[level >> 1];
1508 1508 if (!(level & 1))
1509 1509 v >>= 4;
1510 1510 return v & 0xf;
1511 1511 }
1512 1512
1513 1513 /*
1514 1514 * Return values:
1515 1515 *
1516 1516 * -4: match is ambiguous (multiple candidates)
1517 1517 * -2: not found
1518 1518 * rest: valid rev
1519 1519 */
1520 1520 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
1521 1521 int hex)
1522 1522 {
1523 1523 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1524 1524 int level, maxlevel, off;
1525 1525
1526 1526 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
1527 1527 return -1;
1528 1528
1529 1529 if (hex)
1530 1530 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
1531 1531 else
1532 1532 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
1533 1533
1534 1534 for (level = off = 0; level < maxlevel; level++) {
1535 1535 int k = getnybble(node, level);
1536 1536 nodetreenode *n = &self->nodes[off];
1537 1537 int v = n->children[k];
1538 1538
1539 1539 if (v < 0) {
1540 1540 const char *n;
1541 1541 Py_ssize_t i;
1542 1542
1543 1543 v = -(v + 2);
1544 1544 n = index_node(self->index, v);
1545 1545 if (n == NULL)
1546 1546 return -2;
1547 1547 for (i = level; i < maxlevel; i++)
1548 1548 if (getnybble(node, i) != nt_level(n, i))
1549 1549 return -2;
1550 1550 return v;
1551 1551 }
1552 1552 if (v == 0)
1553 1553 return -2;
1554 1554 off = v;
1555 1555 }
1556 1556 /* multiple matches against an ambiguous prefix */
1557 1557 return -4;
1558 1558 }
1559 1559
1560 1560 static int nt_new(nodetree *self)
1561 1561 {
1562 1562 if (self->length == self->capacity) {
1563 1563 unsigned newcapacity;
1564 1564 nodetreenode *newnodes;
1565 1565 newcapacity = self->capacity * 2;
1566 1566 if (newcapacity >= INT_MAX / sizeof(nodetreenode)) {
1567 1567 PyErr_SetString(PyExc_MemoryError,
1568 1568 "overflow in nt_new");
1569 1569 return -1;
1570 1570 }
1571 1571 newnodes =
1572 1572 realloc(self->nodes, newcapacity * sizeof(nodetreenode));
1573 1573 if (newnodes == NULL) {
1574 1574 PyErr_SetString(PyExc_MemoryError, "out of memory");
1575 1575 return -1;
1576 1576 }
1577 1577 self->capacity = newcapacity;
1578 1578 self->nodes = newnodes;
1579 1579 memset(&self->nodes[self->length], 0,
1580 1580 sizeof(nodetreenode) * (self->capacity - self->length));
1581 1581 }
1582 1582 return self->length++;
1583 1583 }
1584 1584
1585 1585 static int nt_insert(nodetree *self, const char *node, int rev)
1586 1586 {
1587 1587 int level = 0;
1588 1588 int off = 0;
1589 1589
1590 1590 while (level < 40) {
1591 1591 int k = nt_level(node, level);
1592 1592 nodetreenode *n;
1593 1593 int v;
1594 1594
1595 1595 n = &self->nodes[off];
1596 1596 v = n->children[k];
1597 1597
1598 1598 if (v == 0) {
1599 1599 n->children[k] = -rev - 2;
1600 1600 return 0;
1601 1601 }
1602 1602 if (v < 0) {
1603 1603 const char *oldnode =
1604 1604 index_node_existing(self->index, -(v + 2));
1605 1605 int noff;
1606 1606
1607 1607 if (oldnode == NULL)
1608 1608 return -1;
1609 1609 if (!memcmp(oldnode, node, 20)) {
1610 1610 n->children[k] = -rev - 2;
1611 1611 return 0;
1612 1612 }
1613 1613 noff = nt_new(self);
1614 1614 if (noff == -1)
1615 1615 return -1;
1616 1616 /* self->nodes may have been changed by realloc */
1617 1617 self->nodes[off].children[k] = noff;
1618 1618 off = noff;
1619 1619 n = &self->nodes[off];
1620 1620 n->children[nt_level(oldnode, ++level)] = v;
1621 1621 if (level > self->depth)
1622 1622 self->depth = level;
1623 1623 self->splits += 1;
1624 1624 } else {
1625 1625 level += 1;
1626 1626 off = v;
1627 1627 }
1628 1628 }
1629 1629
1630 1630 return -1;
1631 1631 }
1632 1632
1633 1633 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
1634 1634 {
1635 1635 Py_ssize_t rev;
1636 1636 const char *node;
1637 1637 Py_ssize_t length;
1638 1638 if (!PyArg_ParseTuple(args, "n", &rev))
1639 1639 return NULL;
1640 1640 length = index_length(self->nt.index);
1641 1641 if (rev < 0 || rev >= length) {
1642 1642 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1643 1643 return NULL;
1644 1644 }
1645 1645 node = index_node_existing(self->nt.index, rev);
1646 1646 if (nt_insert(&self->nt, node, (int)rev) == -1)
1647 1647 return NULL;
1648 1648 Py_RETURN_NONE;
1649 1649 }
1650 1650
1651 1651 static int nt_delete_node(nodetree *self, const char *node)
1652 1652 {
1653 1653 /* rev==-2 happens to get encoded as 0, which is interpreted as not set
1654 1654 */
1655 1655 return nt_insert(self, node, -2);
1656 1656 }
1657 1657
1658 1658 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
1659 1659 {
1660 1660 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
1661 1661 self->nodes = NULL;
1662 1662
1663 1663 self->index = index;
1664 1664 /* The input capacity is in terms of revisions, while the field is in
1665 1665 * terms of nodetree nodes. */
1666 1666 self->capacity = (capacity < 4 ? 4 : capacity / 2);
1667 1667 self->depth = 0;
1668 1668 self->splits = 0;
1669 1669 if ((size_t)self->capacity > INT_MAX / sizeof(nodetreenode)) {
1670 1670 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
1671 1671 return -1;
1672 1672 }
1673 1673 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
1674 1674 if (self->nodes == NULL) {
1675 1675 PyErr_NoMemory();
1676 1676 return -1;
1677 1677 }
1678 1678 self->length = 1;
1679 1679 return 0;
1680 1680 }
1681 1681
1682 1682 static int ntobj_init(nodetreeObject *self, PyObject *args)
1683 1683 {
1684 1684 PyObject *index;
1685 1685 unsigned capacity;
1686 1686 if (!PyArg_ParseTuple(args, "O!I", &HgRevlogIndex_Type, &index,
1687 1687 &capacity))
1688 1688 return -1;
1689 1689 Py_INCREF(index);
1690 1690 return nt_init(&self->nt, (indexObject *)index, capacity);
1691 1691 }
1692 1692
1693 1693 static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
1694 1694 {
1695 1695 return nt_find(self, node, nodelen, 1);
1696 1696 }
1697 1697
1698 1698 /*
1699 1699 * Find the length of the shortest unique prefix of node.
1700 1700 *
1701 1701 * Return values:
1702 1702 *
1703 1703 * -3: error (exception set)
1704 1704 * -2: not found (no exception set)
1705 1705 * rest: length of shortest prefix
1706 1706 */
1707 1707 static int nt_shortest(nodetree *self, const char *node)
1708 1708 {
1709 1709 int level, off;
1710 1710
1711 1711 for (level = off = 0; level < 40; level++) {
1712 1712 int k, v;
1713 1713 nodetreenode *n = &self->nodes[off];
1714 1714 k = nt_level(node, level);
1715 1715 v = n->children[k];
1716 1716 if (v < 0) {
1717 1717 const char *n;
1718 1718 v = -(v + 2);
1719 1719 n = index_node_existing(self->index, v);
1720 1720 if (n == NULL)
1721 1721 return -3;
1722 1722 if (memcmp(node, n, 20) != 0)
1723 1723 /*
1724 1724 * Found a unique prefix, but it wasn't for the
1725 1725 * requested node (i.e the requested node does
1726 1726 * not exist).
1727 1727 */
1728 1728 return -2;
1729 1729 return level + 1;
1730 1730 }
1731 1731 if (v == 0)
1732 1732 return -2;
1733 1733 off = v;
1734 1734 }
1735 1735 /*
1736 1736 * The node was still not unique after 40 hex digits, so this won't
1737 1737 * happen. Also, if we get here, then there's a programming error in
1738 1738 * this file that made us insert a node longer than 40 hex digits.
1739 1739 */
1740 1740 PyErr_SetString(PyExc_Exception, "broken node tree");
1741 1741 return -3;
1742 1742 }
1743 1743
1744 1744 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
1745 1745 {
1746 1746 PyObject *val;
1747 1747 char *node;
1748 1748 int length;
1749 1749
1750 1750 if (!PyArg_ParseTuple(args, "O", &val))
1751 1751 return NULL;
1752 1752 if (node_check(val, &node) == -1)
1753 1753 return NULL;
1754 1754
1755 1755 length = nt_shortest(&self->nt, node);
1756 1756 if (length == -3)
1757 1757 return NULL;
1758 1758 if (length == -2) {
1759 1759 raise_revlog_error();
1760 1760 return NULL;
1761 1761 }
1762 1762 return PyInt_FromLong(length);
1763 1763 }
1764 1764
1765 1765 static void nt_dealloc(nodetree *self)
1766 1766 {
1767 1767 free(self->nodes);
1768 1768 self->nodes = NULL;
1769 1769 }
1770 1770
1771 1771 static void ntobj_dealloc(nodetreeObject *self)
1772 1772 {
1773 1773 Py_XDECREF(self->nt.index);
1774 1774 nt_dealloc(&self->nt);
1775 1775 PyObject_Del(self);
1776 1776 }
1777 1777
1778 1778 static PyMethodDef ntobj_methods[] = {
1779 1779 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
1780 1780 "insert an index entry"},
1781 1781 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
1782 1782 "find length of shortest hex nodeid of a binary ID"},
1783 1783 {NULL} /* Sentinel */
1784 1784 };
1785 1785
1786 1786 static PyTypeObject nodetreeType = {
1787 1787 PyVarObject_HEAD_INIT(NULL, 0) /* header */
1788 1788 "parsers.nodetree", /* tp_name */
1789 1789 sizeof(nodetreeObject), /* tp_basicsize */
1790 1790 0, /* tp_itemsize */
1791 1791 (destructor)ntobj_dealloc, /* tp_dealloc */
1792 1792 0, /* tp_print */
1793 1793 0, /* tp_getattr */
1794 1794 0, /* tp_setattr */
1795 1795 0, /* tp_compare */
1796 1796 0, /* tp_repr */
1797 1797 0, /* tp_as_number */
1798 1798 0, /* tp_as_sequence */
1799 1799 0, /* tp_as_mapping */
1800 1800 0, /* tp_hash */
1801 1801 0, /* tp_call */
1802 1802 0, /* tp_str */
1803 1803 0, /* tp_getattro */
1804 1804 0, /* tp_setattro */
1805 1805 0, /* tp_as_buffer */
1806 1806 Py_TPFLAGS_DEFAULT, /* tp_flags */
1807 1807 "nodetree", /* tp_doc */
1808 1808 0, /* tp_traverse */
1809 1809 0, /* tp_clear */
1810 1810 0, /* tp_richcompare */
1811 1811 0, /* tp_weaklistoffset */
1812 1812 0, /* tp_iter */
1813 1813 0, /* tp_iternext */
1814 1814 ntobj_methods, /* tp_methods */
1815 1815 0, /* tp_members */
1816 1816 0, /* tp_getset */
1817 1817 0, /* tp_base */
1818 1818 0, /* tp_dict */
1819 1819 0, /* tp_descr_get */
1820 1820 0, /* tp_descr_set */
1821 1821 0, /* tp_dictoffset */
1822 1822 (initproc)ntobj_init, /* tp_init */
1823 1823 0, /* tp_alloc */
1824 1824 };
1825 1825
1826 1826 static int index_init_nt(indexObject *self)
1827 1827 {
1828 1828 if (!self->ntinitialized) {
1829 1829 if (nt_init(&self->nt, self, (int)self->raw_length) == -1) {
1830 1830 nt_dealloc(&self->nt);
1831 1831 return -1;
1832 1832 }
1833 1833 if (nt_insert(&self->nt, nullid, -1) == -1) {
1834 1834 nt_dealloc(&self->nt);
1835 1835 return -1;
1836 1836 }
1837 1837 self->ntinitialized = 1;
1838 1838 self->ntrev = (int)index_length(self);
1839 1839 self->ntlookups = 1;
1840 1840 self->ntmisses = 0;
1841 1841 }
1842 1842 return 0;
1843 1843 }
1844 1844
1845 1845 /*
1846 1846 * Return values:
1847 1847 *
1848 1848 * -3: error (exception set)
1849 1849 * -2: not found (no exception set)
1850 1850 * rest: valid rev
1851 1851 */
1852 1852 static int index_find_node(indexObject *self, const char *node,
1853 1853 Py_ssize_t nodelen)
1854 1854 {
1855 1855 int rev;
1856 1856
1857 1857 if (index_init_nt(self) == -1)
1858 1858 return -3;
1859 1859
1860 1860 self->ntlookups++;
1861 1861 rev = nt_find(&self->nt, node, nodelen, 0);
1862 1862 if (rev >= -1)
1863 1863 return rev;
1864 1864
1865 1865 /*
1866 1866 * For the first handful of lookups, we scan the entire index,
1867 1867 * and cache only the matching nodes. This optimizes for cases
1868 1868 * like "hg tip", where only a few nodes are accessed.
1869 1869 *
1870 1870 * After that, we cache every node we visit, using a single
1871 1871 * scan amortized over multiple lookups. This gives the best
1872 1872 * bulk performance, e.g. for "hg log".
1873 1873 */
1874 1874 if (self->ntmisses++ < 4) {
1875 1875 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1876 1876 const char *n = index_node_existing(self, rev);
1877 1877 if (n == NULL)
1878 1878 return -3;
1879 1879 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1880 1880 if (nt_insert(&self->nt, n, rev) == -1)
1881 1881 return -3;
1882 1882 break;
1883 1883 }
1884 1884 }
1885 1885 } else {
1886 1886 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1887 1887 const char *n = index_node_existing(self, rev);
1888 1888 if (n == NULL)
1889 1889 return -3;
1890 1890 if (nt_insert(&self->nt, n, rev) == -1) {
1891 1891 self->ntrev = rev + 1;
1892 1892 return -3;
1893 1893 }
1894 1894 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1895 1895 break;
1896 1896 }
1897 1897 }
1898 1898 self->ntrev = rev;
1899 1899 }
1900 1900
1901 1901 if (rev >= 0)
1902 1902 return rev;
1903 1903 return -2;
1904 1904 }
1905 1905
1906 1906 static PyObject *index_getitem(indexObject *self, PyObject *value)
1907 1907 {
1908 1908 char *node;
1909 1909 int rev;
1910 1910
1911 1911 if (PyInt_Check(value)) {
1912 1912 long idx;
1913 1913 if (!pylong_to_long(value, &idx)) {
1914 1914 return NULL;
1915 1915 }
1916 1916 return index_get(self, idx);
1917 1917 }
1918 1918
1919 1919 if (node_check(value, &node) == -1)
1920 1920 return NULL;
1921 1921 rev = index_find_node(self, node, 20);
1922 1922 if (rev >= -1)
1923 1923 return PyInt_FromLong(rev);
1924 1924 if (rev == -2)
1925 1925 raise_revlog_error();
1926 1926 return NULL;
1927 1927 }
1928 1928
1929 1929 /*
1930 1930 * Fully populate the radix tree.
1931 1931 */
1932 1932 static int index_populate_nt(indexObject *self)
1933 1933 {
1934 1934 int rev;
1935 1935 if (self->ntrev > 0) {
1936 1936 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1937 1937 const char *n = index_node_existing(self, rev);
1938 1938 if (n == NULL)
1939 1939 return -1;
1940 1940 if (nt_insert(&self->nt, n, rev) == -1)
1941 1941 return -1;
1942 1942 }
1943 1943 self->ntrev = -1;
1944 1944 }
1945 1945 return 0;
1946 1946 }
1947 1947
1948 1948 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1949 1949 {
1950 1950 const char *fullnode;
1951 1951 Py_ssize_t nodelen;
1952 1952 char *node;
1953 1953 int rev, i;
1954 1954
1955 1955 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
1956 1956 return NULL;
1957 1957
1958 1958 if (nodelen < 1) {
1959 1959 PyErr_SetString(PyExc_ValueError, "key too short");
1960 1960 return NULL;
1961 1961 }
1962 1962
1963 1963 if (nodelen > 40) {
1964 1964 PyErr_SetString(PyExc_ValueError, "key too long");
1965 1965 return NULL;
1966 1966 }
1967 1967
1968 1968 for (i = 0; i < nodelen; i++)
1969 1969 hexdigit(node, i);
1970 1970 if (PyErr_Occurred()) {
1971 1971 /* input contains non-hex characters */
1972 1972 PyErr_Clear();
1973 1973 Py_RETURN_NONE;
1974 1974 }
1975 1975
1976 1976 if (index_init_nt(self) == -1)
1977 1977 return NULL;
1978 1978 if (index_populate_nt(self) == -1)
1979 1979 return NULL;
1980 1980 rev = nt_partialmatch(&self->nt, node, nodelen);
1981 1981
1982 1982 switch (rev) {
1983 1983 case -4:
1984 1984 raise_revlog_error();
1985 1985 return NULL;
1986 1986 case -2:
1987 1987 Py_RETURN_NONE;
1988 1988 case -1:
1989 1989 return PyBytes_FromStringAndSize(nullid, 20);
1990 1990 }
1991 1991
1992 1992 fullnode = index_node_existing(self, rev);
1993 1993 if (fullnode == NULL) {
1994 1994 return NULL;
1995 1995 }
1996 1996 return PyBytes_FromStringAndSize(fullnode, 20);
1997 1997 }
1998 1998
1999 1999 static PyObject *index_shortest(indexObject *self, PyObject *args)
2000 2000 {
2001 2001 PyObject *val;
2002 2002 char *node;
2003 2003 int length;
2004 2004
2005 2005 if (!PyArg_ParseTuple(args, "O", &val))
2006 2006 return NULL;
2007 2007 if (node_check(val, &node) == -1)
2008 2008 return NULL;
2009 2009
2010 2010 self->ntlookups++;
2011 2011 if (index_init_nt(self) == -1)
2012 2012 return NULL;
2013 2013 if (index_populate_nt(self) == -1)
2014 2014 return NULL;
2015 2015 length = nt_shortest(&self->nt, node);
2016 2016 if (length == -3)
2017 2017 return NULL;
2018 2018 if (length == -2) {
2019 2019 raise_revlog_error();
2020 2020 return NULL;
2021 2021 }
2022 2022 return PyInt_FromLong(length);
2023 2023 }
2024 2024
2025 2025 static PyObject *index_m_get(indexObject *self, PyObject *args)
2026 2026 {
2027 2027 PyObject *val;
2028 2028 char *node;
2029 2029 int rev;
2030 2030
2031 2031 if (!PyArg_ParseTuple(args, "O", &val))
2032 2032 return NULL;
2033 2033 if (node_check(val, &node) == -1)
2034 2034 return NULL;
2035 2035 rev = index_find_node(self, node, 20);
2036 2036 if (rev == -3)
2037 2037 return NULL;
2038 2038 if (rev == -2)
2039 2039 Py_RETURN_NONE;
2040 2040 return PyInt_FromLong(rev);
2041 2041 }
2042 2042
2043 2043 static int index_contains(indexObject *self, PyObject *value)
2044 2044 {
2045 2045 char *node;
2046 2046
2047 2047 if (PyInt_Check(value)) {
2048 2048 long rev;
2049 2049 if (!pylong_to_long(value, &rev)) {
2050 2050 return -1;
2051 2051 }
2052 2052 return rev >= -1 && rev < index_length(self);
2053 2053 }
2054 2054
2055 2055 if (node_check(value, &node) == -1)
2056 2056 return -1;
2057 2057
2058 2058 switch (index_find_node(self, node, 20)) {
2059 2059 case -3:
2060 2060 return -1;
2061 2061 case -2:
2062 2062 return 0;
2063 2063 default:
2064 2064 return 1;
2065 2065 }
2066 2066 }
2067 2067
2068 2068 static PyObject *index_m_has_node(indexObject *self, PyObject *args)
2069 2069 {
2070 2070 int ret = index_contains(self, args);
2071 2071 if (ret < 0)
2072 2072 return NULL;
2073 2073 return PyBool_FromLong((long)ret);
2074 2074 }
2075 2075
2076 static PyObject *index_m_rev(indexObject *self, PyObject *val)
2077 {
2078 char *node;
2079 int rev;
2080
2081 if (node_check(val, &node) == -1)
2082 return NULL;
2083 rev = index_find_node(self, node, 20);
2084 if (rev >= -1)
2085 return PyInt_FromLong(rev);
2086 if (rev == -2)
2087 raise_revlog_error();
2088 return NULL;
2089 }
2090
2076 2091 typedef uint64_t bitmask;
2077 2092
2078 2093 /*
2079 2094 * Given a disjoint set of revs, return all candidates for the
2080 2095 * greatest common ancestor. In revset notation, this is the set
2081 2096 * "heads(::a and ::b and ...)"
2082 2097 */
2083 2098 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
2084 2099 int revcount)
2085 2100 {
2086 2101 const bitmask allseen = (1ull << revcount) - 1;
2087 2102 const bitmask poison = 1ull << revcount;
2088 2103 PyObject *gca = PyList_New(0);
2089 2104 int i, v, interesting;
2090 2105 int maxrev = -1;
2091 2106 bitmask sp;
2092 2107 bitmask *seen;
2093 2108
2094 2109 if (gca == NULL)
2095 2110 return PyErr_NoMemory();
2096 2111
2097 2112 for (i = 0; i < revcount; i++) {
2098 2113 if (revs[i] > maxrev)
2099 2114 maxrev = revs[i];
2100 2115 }
2101 2116
2102 2117 seen = calloc(sizeof(*seen), maxrev + 1);
2103 2118 if (seen == NULL) {
2104 2119 Py_DECREF(gca);
2105 2120 return PyErr_NoMemory();
2106 2121 }
2107 2122
2108 2123 for (i = 0; i < revcount; i++)
2109 2124 seen[revs[i]] = 1ull << i;
2110 2125
2111 2126 interesting = revcount;
2112 2127
2113 2128 for (v = maxrev; v >= 0 && interesting; v--) {
2114 2129 bitmask sv = seen[v];
2115 2130 int parents[2];
2116 2131
2117 2132 if (!sv)
2118 2133 continue;
2119 2134
2120 2135 if (sv < poison) {
2121 2136 interesting -= 1;
2122 2137 if (sv == allseen) {
2123 2138 PyObject *obj = PyInt_FromLong(v);
2124 2139 if (obj == NULL)
2125 2140 goto bail;
2126 2141 if (PyList_Append(gca, obj) == -1) {
2127 2142 Py_DECREF(obj);
2128 2143 goto bail;
2129 2144 }
2130 2145 sv |= poison;
2131 2146 for (i = 0; i < revcount; i++) {
2132 2147 if (revs[i] == v)
2133 2148 goto done;
2134 2149 }
2135 2150 }
2136 2151 }
2137 2152 if (index_get_parents(self, v, parents, maxrev) < 0)
2138 2153 goto bail;
2139 2154
2140 2155 for (i = 0; i < 2; i++) {
2141 2156 int p = parents[i];
2142 2157 if (p == -1)
2143 2158 continue;
2144 2159 sp = seen[p];
2145 2160 if (sv < poison) {
2146 2161 if (sp == 0) {
2147 2162 seen[p] = sv;
2148 2163 interesting++;
2149 2164 } else if (sp != sv)
2150 2165 seen[p] |= sv;
2151 2166 } else {
2152 2167 if (sp && sp < poison)
2153 2168 interesting--;
2154 2169 seen[p] = sv;
2155 2170 }
2156 2171 }
2157 2172 }
2158 2173
2159 2174 done:
2160 2175 free(seen);
2161 2176 return gca;
2162 2177 bail:
2163 2178 free(seen);
2164 2179 Py_XDECREF(gca);
2165 2180 return NULL;
2166 2181 }
2167 2182
2168 2183 /*
2169 2184 * Given a disjoint set of revs, return the subset with the longest
2170 2185 * path to the root.
2171 2186 */
2172 2187 static PyObject *find_deepest(indexObject *self, PyObject *revs)
2173 2188 {
2174 2189 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
2175 2190 static const Py_ssize_t capacity = 24;
2176 2191 int *depth, *interesting = NULL;
2177 2192 int i, j, v, ninteresting;
2178 2193 PyObject *dict = NULL, *keys = NULL;
2179 2194 long *seen = NULL;
2180 2195 int maxrev = -1;
2181 2196 long final;
2182 2197
2183 2198 if (revcount > capacity) {
2184 2199 PyErr_Format(PyExc_OverflowError,
2185 2200 "bitset size (%ld) > capacity (%ld)",
2186 2201 (long)revcount, (long)capacity);
2187 2202 return NULL;
2188 2203 }
2189 2204
2190 2205 for (i = 0; i < revcount; i++) {
2191 2206 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2192 2207 if (n > maxrev)
2193 2208 maxrev = n;
2194 2209 }
2195 2210
2196 2211 depth = calloc(sizeof(*depth), maxrev + 1);
2197 2212 if (depth == NULL)
2198 2213 return PyErr_NoMemory();
2199 2214
2200 2215 seen = calloc(sizeof(*seen), maxrev + 1);
2201 2216 if (seen == NULL) {
2202 2217 PyErr_NoMemory();
2203 2218 goto bail;
2204 2219 }
2205 2220
2206 2221 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
2207 2222 if (interesting == NULL) {
2208 2223 PyErr_NoMemory();
2209 2224 goto bail;
2210 2225 }
2211 2226
2212 2227 if (PyList_Sort(revs) == -1)
2213 2228 goto bail;
2214 2229
2215 2230 for (i = 0; i < revcount; i++) {
2216 2231 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2217 2232 long b = 1l << i;
2218 2233 depth[n] = 1;
2219 2234 seen[n] = b;
2220 2235 interesting[b] = 1;
2221 2236 }
2222 2237
2223 2238 /* invariant: ninteresting is the number of non-zero entries in
2224 2239 * interesting. */
2225 2240 ninteresting = (int)revcount;
2226 2241
2227 2242 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2228 2243 int dv = depth[v];
2229 2244 int parents[2];
2230 2245 long sv;
2231 2246
2232 2247 if (dv == 0)
2233 2248 continue;
2234 2249
2235 2250 sv = seen[v];
2236 2251 if (index_get_parents(self, v, parents, maxrev) < 0)
2237 2252 goto bail;
2238 2253
2239 2254 for (i = 0; i < 2; i++) {
2240 2255 int p = parents[i];
2241 2256 long sp;
2242 2257 int dp;
2243 2258
2244 2259 if (p == -1)
2245 2260 continue;
2246 2261
2247 2262 dp = depth[p];
2248 2263 sp = seen[p];
2249 2264 if (dp <= dv) {
2250 2265 depth[p] = dv + 1;
2251 2266 if (sp != sv) {
2252 2267 interesting[sv] += 1;
2253 2268 seen[p] = sv;
2254 2269 if (sp) {
2255 2270 interesting[sp] -= 1;
2256 2271 if (interesting[sp] == 0)
2257 2272 ninteresting -= 1;
2258 2273 }
2259 2274 }
2260 2275 } else if (dv == dp - 1) {
2261 2276 long nsp = sp | sv;
2262 2277 if (nsp == sp)
2263 2278 continue;
2264 2279 seen[p] = nsp;
2265 2280 interesting[sp] -= 1;
2266 2281 if (interesting[sp] == 0)
2267 2282 ninteresting -= 1;
2268 2283 if (interesting[nsp] == 0)
2269 2284 ninteresting += 1;
2270 2285 interesting[nsp] += 1;
2271 2286 }
2272 2287 }
2273 2288 interesting[sv] -= 1;
2274 2289 if (interesting[sv] == 0)
2275 2290 ninteresting -= 1;
2276 2291 }
2277 2292
2278 2293 final = 0;
2279 2294 j = ninteresting;
2280 2295 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2281 2296 if (interesting[i] == 0)
2282 2297 continue;
2283 2298 final |= i;
2284 2299 j -= 1;
2285 2300 }
2286 2301 if (final == 0) {
2287 2302 keys = PyList_New(0);
2288 2303 goto bail;
2289 2304 }
2290 2305
2291 2306 dict = PyDict_New();
2292 2307 if (dict == NULL)
2293 2308 goto bail;
2294 2309
2295 2310 for (i = 0; i < revcount; i++) {
2296 2311 PyObject *key;
2297 2312
2298 2313 if ((final & (1 << i)) == 0)
2299 2314 continue;
2300 2315
2301 2316 key = PyList_GET_ITEM(revs, i);
2302 2317 Py_INCREF(key);
2303 2318 Py_INCREF(Py_None);
2304 2319 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2305 2320 Py_DECREF(key);
2306 2321 Py_DECREF(Py_None);
2307 2322 goto bail;
2308 2323 }
2309 2324 }
2310 2325
2311 2326 keys = PyDict_Keys(dict);
2312 2327
2313 2328 bail:
2314 2329 free(depth);
2315 2330 free(seen);
2316 2331 free(interesting);
2317 2332 Py_XDECREF(dict);
2318 2333
2319 2334 return keys;
2320 2335 }
2321 2336
2322 2337 /*
2323 2338 * Given a (possibly overlapping) set of revs, return all the
2324 2339 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2325 2340 */
2326 2341 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2327 2342 {
2328 2343 PyObject *ret = NULL;
2329 2344 Py_ssize_t argcount, i, len;
2330 2345 bitmask repeat = 0;
2331 2346 int revcount = 0;
2332 2347 int *revs;
2333 2348
2334 2349 argcount = PySequence_Length(args);
2335 2350 revs = PyMem_Malloc(argcount * sizeof(*revs));
2336 2351 if (argcount > 0 && revs == NULL)
2337 2352 return PyErr_NoMemory();
2338 2353 len = index_length(self);
2339 2354
2340 2355 for (i = 0; i < argcount; i++) {
2341 2356 static const int capacity = 24;
2342 2357 PyObject *obj = PySequence_GetItem(args, i);
2343 2358 bitmask x;
2344 2359 long val;
2345 2360
2346 2361 if (!PyInt_Check(obj)) {
2347 2362 PyErr_SetString(PyExc_TypeError,
2348 2363 "arguments must all be ints");
2349 2364 Py_DECREF(obj);
2350 2365 goto bail;
2351 2366 }
2352 2367 val = PyInt_AsLong(obj);
2353 2368 Py_DECREF(obj);
2354 2369 if (val == -1) {
2355 2370 ret = PyList_New(0);
2356 2371 goto done;
2357 2372 }
2358 2373 if (val < 0 || val >= len) {
2359 2374 PyErr_SetString(PyExc_IndexError, "index out of range");
2360 2375 goto bail;
2361 2376 }
2362 2377 /* this cheesy bloom filter lets us avoid some more
2363 2378 * expensive duplicate checks in the common set-is-disjoint
2364 2379 * case */
2365 2380 x = 1ull << (val & 0x3f);
2366 2381 if (repeat & x) {
2367 2382 int k;
2368 2383 for (k = 0; k < revcount; k++) {
2369 2384 if (val == revs[k])
2370 2385 goto duplicate;
2371 2386 }
2372 2387 } else
2373 2388 repeat |= x;
2374 2389 if (revcount >= capacity) {
2375 2390 PyErr_Format(PyExc_OverflowError,
2376 2391 "bitset size (%d) > capacity (%d)",
2377 2392 revcount, capacity);
2378 2393 goto bail;
2379 2394 }
2380 2395 revs[revcount++] = (int)val;
2381 2396 duplicate:;
2382 2397 }
2383 2398
2384 2399 if (revcount == 0) {
2385 2400 ret = PyList_New(0);
2386 2401 goto done;
2387 2402 }
2388 2403 if (revcount == 1) {
2389 2404 PyObject *obj;
2390 2405 ret = PyList_New(1);
2391 2406 if (ret == NULL)
2392 2407 goto bail;
2393 2408 obj = PyInt_FromLong(revs[0]);
2394 2409 if (obj == NULL)
2395 2410 goto bail;
2396 2411 PyList_SET_ITEM(ret, 0, obj);
2397 2412 goto done;
2398 2413 }
2399 2414
2400 2415 ret = find_gca_candidates(self, revs, revcount);
2401 2416 if (ret == NULL)
2402 2417 goto bail;
2403 2418
2404 2419 done:
2405 2420 PyMem_Free(revs);
2406 2421 return ret;
2407 2422
2408 2423 bail:
2409 2424 PyMem_Free(revs);
2410 2425 Py_XDECREF(ret);
2411 2426 return NULL;
2412 2427 }
2413 2428
2414 2429 /*
2415 2430 * Given a (possibly overlapping) set of revs, return the greatest
2416 2431 * common ancestors: those with the longest path to the root.
2417 2432 */
2418 2433 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2419 2434 {
2420 2435 PyObject *ret;
2421 2436 PyObject *gca = index_commonancestorsheads(self, args);
2422 2437 if (gca == NULL)
2423 2438 return NULL;
2424 2439
2425 2440 if (PyList_GET_SIZE(gca) <= 1) {
2426 2441 return gca;
2427 2442 }
2428 2443
2429 2444 ret = find_deepest(self, gca);
2430 2445 Py_DECREF(gca);
2431 2446 return ret;
2432 2447 }
2433 2448
2434 2449 /*
2435 2450 * Invalidate any trie entries introduced by added revs.
2436 2451 */
2437 2452 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2438 2453 {
2439 2454 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
2440 2455
2441 2456 for (i = start; i < len; i++) {
2442 2457 PyObject *tuple = PyList_GET_ITEM(self->added, i);
2443 2458 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
2444 2459
2445 2460 nt_delete_node(&self->nt, PyBytes_AS_STRING(node));
2446 2461 }
2447 2462
2448 2463 if (start == 0)
2449 2464 Py_CLEAR(self->added);
2450 2465 }
2451 2466
2452 2467 /*
2453 2468 * Delete a numeric range of revs, which must be at the end of the
2454 2469 * range, but exclude the sentinel nullid entry.
2455 2470 */
2456 2471 static int index_slice_del(indexObject *self, PyObject *item)
2457 2472 {
2458 2473 Py_ssize_t start, stop, step, slicelength;
2459 2474 Py_ssize_t length = index_length(self) + 1;
2460 2475 int ret = 0;
2461 2476
2462 2477 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
2463 2478 #ifdef IS_PY3K
2464 2479 if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
2465 2480 &slicelength) < 0)
2466 2481 #else
2467 2482 if (PySlice_GetIndicesEx((PySliceObject *)item, length, &start, &stop,
2468 2483 &step, &slicelength) < 0)
2469 2484 #endif
2470 2485 return -1;
2471 2486
2472 2487 if (slicelength <= 0)
2473 2488 return 0;
2474 2489
2475 2490 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2476 2491 stop = start;
2477 2492
2478 2493 if (step < 0) {
2479 2494 stop = start + 1;
2480 2495 start = stop + step * (slicelength - 1) - 1;
2481 2496 step = -step;
2482 2497 }
2483 2498
2484 2499 if (step != 1) {
2485 2500 PyErr_SetString(PyExc_ValueError,
2486 2501 "revlog index delete requires step size of 1");
2487 2502 return -1;
2488 2503 }
2489 2504
2490 2505 if (stop != length - 1) {
2491 2506 PyErr_SetString(PyExc_IndexError,
2492 2507 "revlog index deletion indices are invalid");
2493 2508 return -1;
2494 2509 }
2495 2510
2496 2511 if (start < self->length) {
2497 2512 if (self->ntinitialized) {
2498 2513 Py_ssize_t i;
2499 2514
2500 2515 for (i = start; i < self->length; i++) {
2501 2516 const char *node = index_node_existing(self, i);
2502 2517 if (node == NULL)
2503 2518 return -1;
2504 2519
2505 2520 nt_delete_node(&self->nt, node);
2506 2521 }
2507 2522 if (self->added)
2508 2523 index_invalidate_added(self, 0);
2509 2524 if (self->ntrev > start)
2510 2525 self->ntrev = (int)start;
2511 2526 }
2512 2527 self->length = start;
2513 2528 if (start < self->raw_length) {
2514 2529 if (self->cache) {
2515 2530 Py_ssize_t i;
2516 2531 for (i = start; i < self->raw_length; i++)
2517 2532 Py_CLEAR(self->cache[i]);
2518 2533 }
2519 2534 self->raw_length = start;
2520 2535 }
2521 2536 goto done;
2522 2537 }
2523 2538
2524 2539 if (self->ntinitialized) {
2525 2540 index_invalidate_added(self, start - self->length);
2526 2541 if (self->ntrev > start)
2527 2542 self->ntrev = (int)start;
2528 2543 }
2529 2544 if (self->added)
2530 2545 ret = PyList_SetSlice(self->added, start - self->length,
2531 2546 PyList_GET_SIZE(self->added), NULL);
2532 2547 done:
2533 2548 Py_CLEAR(self->headrevs);
2534 2549 return ret;
2535 2550 }
2536 2551
2537 2552 /*
2538 2553 * Supported ops:
2539 2554 *
2540 2555 * slice deletion
2541 2556 * string assignment (extend node->rev mapping)
2542 2557 * string deletion (shrink node->rev mapping)
2543 2558 */
2544 2559 static int index_assign_subscript(indexObject *self, PyObject *item,
2545 2560 PyObject *value)
2546 2561 {
2547 2562 char *node;
2548 2563 long rev;
2549 2564
2550 2565 if (PySlice_Check(item) && value == NULL)
2551 2566 return index_slice_del(self, item);
2552 2567
2553 2568 if (node_check(item, &node) == -1)
2554 2569 return -1;
2555 2570
2556 2571 if (value == NULL)
2557 2572 return self->ntinitialized ? nt_delete_node(&self->nt, node)
2558 2573 : 0;
2559 2574 rev = PyInt_AsLong(value);
2560 2575 if (rev > INT_MAX || rev < 0) {
2561 2576 if (!PyErr_Occurred())
2562 2577 PyErr_SetString(PyExc_ValueError, "rev out of range");
2563 2578 return -1;
2564 2579 }
2565 2580
2566 2581 if (index_init_nt(self) == -1)
2567 2582 return -1;
2568 2583 return nt_insert(&self->nt, node, (int)rev);
2569 2584 }
2570 2585
2571 2586 /*
2572 2587 * Find all RevlogNG entries in an index that has inline data. Update
2573 2588 * the optional "offsets" table with those entries.
2574 2589 */
2575 2590 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2576 2591 {
2577 2592 const char *data = (const char *)self->buf.buf;
2578 2593 Py_ssize_t pos = 0;
2579 2594 Py_ssize_t end = self->buf.len;
2580 2595 long incr = v1_hdrsize;
2581 2596 Py_ssize_t len = 0;
2582 2597
2583 2598 while (pos + v1_hdrsize <= end && pos >= 0) {
2584 2599 uint32_t comp_len;
2585 2600 /* 3rd element of header is length of compressed inline data */
2586 2601 comp_len = getbe32(data + pos + 8);
2587 2602 incr = v1_hdrsize + comp_len;
2588 2603 if (offsets)
2589 2604 offsets[len] = data + pos;
2590 2605 len++;
2591 2606 pos += incr;
2592 2607 }
2593 2608
2594 2609 if (pos != end) {
2595 2610 if (!PyErr_Occurred())
2596 2611 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2597 2612 return -1;
2598 2613 }
2599 2614
2600 2615 return len;
2601 2616 }
2602 2617
2603 2618 static int index_init(indexObject *self, PyObject *args)
2604 2619 {
2605 2620 PyObject *data_obj, *inlined_obj;
2606 2621 Py_ssize_t size;
2607 2622
2608 2623 /* Initialize before argument-checking to avoid index_dealloc() crash.
2609 2624 */
2610 2625 self->raw_length = 0;
2611 2626 self->added = NULL;
2612 2627 self->cache = NULL;
2613 2628 self->data = NULL;
2614 2629 memset(&self->buf, 0, sizeof(self->buf));
2615 2630 self->headrevs = NULL;
2616 2631 self->filteredrevs = Py_None;
2617 2632 Py_INCREF(Py_None);
2618 2633 self->ntinitialized = 0;
2619 2634 self->offsets = NULL;
2620 2635
2621 2636 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
2622 2637 return -1;
2623 2638 if (!PyObject_CheckBuffer(data_obj)) {
2624 2639 PyErr_SetString(PyExc_TypeError,
2625 2640 "data does not support buffer interface");
2626 2641 return -1;
2627 2642 }
2628 2643
2629 2644 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
2630 2645 return -1;
2631 2646 size = self->buf.len;
2632 2647
2633 2648 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
2634 2649 self->data = data_obj;
2635 2650
2636 2651 self->ntlookups = self->ntmisses = 0;
2637 2652 self->ntrev = -1;
2638 2653 Py_INCREF(self->data);
2639 2654
2640 2655 if (self->inlined) {
2641 2656 Py_ssize_t len = inline_scan(self, NULL);
2642 2657 if (len == -1)
2643 2658 goto bail;
2644 2659 self->raw_length = len;
2645 2660 self->length = len;
2646 2661 } else {
2647 2662 if (size % v1_hdrsize) {
2648 2663 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2649 2664 goto bail;
2650 2665 }
2651 2666 self->raw_length = size / v1_hdrsize;
2652 2667 self->length = self->raw_length;
2653 2668 }
2654 2669
2655 2670 return 0;
2656 2671 bail:
2657 2672 return -1;
2658 2673 }
2659 2674
2660 2675 static PyObject *index_nodemap(indexObject *self)
2661 2676 {
2662 2677 Py_INCREF(self);
2663 2678 return (PyObject *)self;
2664 2679 }
2665 2680
2666 2681 static void _index_clearcaches(indexObject *self)
2667 2682 {
2668 2683 if (self->cache) {
2669 2684 Py_ssize_t i;
2670 2685
2671 2686 for (i = 0; i < self->raw_length; i++)
2672 2687 Py_CLEAR(self->cache[i]);
2673 2688 free(self->cache);
2674 2689 self->cache = NULL;
2675 2690 }
2676 2691 if (self->offsets) {
2677 2692 PyMem_Free((void *)self->offsets);
2678 2693 self->offsets = NULL;
2679 2694 }
2680 2695 if (self->ntinitialized) {
2681 2696 nt_dealloc(&self->nt);
2682 2697 }
2683 2698 self->ntinitialized = 0;
2684 2699 Py_CLEAR(self->headrevs);
2685 2700 }
2686 2701
2687 2702 static PyObject *index_clearcaches(indexObject *self)
2688 2703 {
2689 2704 _index_clearcaches(self);
2690 2705 self->ntrev = -1;
2691 2706 self->ntlookups = self->ntmisses = 0;
2692 2707 Py_RETURN_NONE;
2693 2708 }
2694 2709
2695 2710 static void index_dealloc(indexObject *self)
2696 2711 {
2697 2712 _index_clearcaches(self);
2698 2713 Py_XDECREF(self->filteredrevs);
2699 2714 if (self->buf.buf) {
2700 2715 PyBuffer_Release(&self->buf);
2701 2716 memset(&self->buf, 0, sizeof(self->buf));
2702 2717 }
2703 2718 Py_XDECREF(self->data);
2704 2719 Py_XDECREF(self->added);
2705 2720 PyObject_Del(self);
2706 2721 }
2707 2722
2708 2723 static PySequenceMethods index_sequence_methods = {
2709 2724 (lenfunc)index_length, /* sq_length */
2710 2725 0, /* sq_concat */
2711 2726 0, /* sq_repeat */
2712 2727 (ssizeargfunc)index_get, /* sq_item */
2713 2728 0, /* sq_slice */
2714 2729 0, /* sq_ass_item */
2715 2730 0, /* sq_ass_slice */
2716 2731 (objobjproc)index_contains, /* sq_contains */
2717 2732 };
2718 2733
2719 2734 static PyMappingMethods index_mapping_methods = {
2720 2735 (lenfunc)index_length, /* mp_length */
2721 2736 (binaryfunc)index_getitem, /* mp_subscript */
2722 2737 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
2723 2738 };
2724 2739
2725 2740 static PyMethodDef index_methods[] = {
2726 2741 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
2727 2742 "return the gca set of the given revs"},
2728 2743 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
2729 2744 METH_VARARGS,
2730 2745 "return the heads of the common ancestors of the given revs"},
2731 2746 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
2732 2747 "clear the index caches"},
2733 2748 {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
2734 2749 {"has_node", (PyCFunction)index_m_has_node, METH_O,
2735 2750 "return True if the node exist in the index"},
2751 {"rev", (PyCFunction)index_m_rev, METH_O,
2752 "return `rev` associated with a node or raise RevlogError"},
2736 2753 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
2737 2754 "compute phases"},
2738 2755 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
2739 2756 "reachableroots"},
2740 2757 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
2741 2758 "get head revisions"}, /* Can do filtering since 3.2 */
2742 2759 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
2743 2760 "get filtered head revisions"}, /* Can always do filtering */
2744 2761 {"issnapshot", (PyCFunction)index_issnapshot, METH_O,
2745 2762 "True if the object is a snapshot"},
2746 2763 {"findsnapshots", (PyCFunction)index_findsnapshots, METH_VARARGS,
2747 2764 "Gather snapshot data in a cache dict"},
2748 2765 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
2749 2766 "determine revisions with deltas to reconstruct fulltext"},
2750 2767 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
2751 2768 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
2752 2769 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
2753 2770 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2754 2771 "match a potentially ambiguous node ID"},
2755 2772 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
2756 2773 "find length of shortest hex nodeid of a binary ID"},
2757 2774 {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
2758 2775 {NULL} /* Sentinel */
2759 2776 };
2760 2777
2761 2778 static PyGetSetDef index_getset[] = {
2762 2779 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2763 2780 {NULL} /* Sentinel */
2764 2781 };
2765 2782
2766 2783 PyTypeObject HgRevlogIndex_Type = {
2767 2784 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2768 2785 "parsers.index", /* tp_name */
2769 2786 sizeof(indexObject), /* tp_basicsize */
2770 2787 0, /* tp_itemsize */
2771 2788 (destructor)index_dealloc, /* tp_dealloc */
2772 2789 0, /* tp_print */
2773 2790 0, /* tp_getattr */
2774 2791 0, /* tp_setattr */
2775 2792 0, /* tp_compare */
2776 2793 0, /* tp_repr */
2777 2794 0, /* tp_as_number */
2778 2795 &index_sequence_methods, /* tp_as_sequence */
2779 2796 &index_mapping_methods, /* tp_as_mapping */
2780 2797 0, /* tp_hash */
2781 2798 0, /* tp_call */
2782 2799 0, /* tp_str */
2783 2800 0, /* tp_getattro */
2784 2801 0, /* tp_setattro */
2785 2802 0, /* tp_as_buffer */
2786 2803 Py_TPFLAGS_DEFAULT, /* tp_flags */
2787 2804 "revlog index", /* tp_doc */
2788 2805 0, /* tp_traverse */
2789 2806 0, /* tp_clear */
2790 2807 0, /* tp_richcompare */
2791 2808 0, /* tp_weaklistoffset */
2792 2809 0, /* tp_iter */
2793 2810 0, /* tp_iternext */
2794 2811 index_methods, /* tp_methods */
2795 2812 0, /* tp_members */
2796 2813 index_getset, /* tp_getset */
2797 2814 0, /* tp_base */
2798 2815 0, /* tp_dict */
2799 2816 0, /* tp_descr_get */
2800 2817 0, /* tp_descr_set */
2801 2818 0, /* tp_dictoffset */
2802 2819 (initproc)index_init, /* tp_init */
2803 2820 0, /* tp_alloc */
2804 2821 };
2805 2822
2806 2823 /*
2807 2824 * returns a tuple of the form (index, index, cache) with elements as
2808 2825 * follows:
2809 2826 *
2810 2827 * index: an index object that lazily parses RevlogNG records
2811 2828 * cache: if data is inlined, a tuple (0, index_file_content), else None
2812 2829 * index_file_content could be a string, or a buffer
2813 2830 *
2814 2831 * added complications are for backwards compatibility
2815 2832 */
2816 2833 PyObject *parse_index2(PyObject *self, PyObject *args)
2817 2834 {
2818 2835 PyObject *tuple = NULL, *cache = NULL;
2819 2836 indexObject *idx;
2820 2837 int ret;
2821 2838
2822 2839 idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
2823 2840 if (idx == NULL)
2824 2841 goto bail;
2825 2842
2826 2843 ret = index_init(idx, args);
2827 2844 if (ret == -1)
2828 2845 goto bail;
2829 2846
2830 2847 if (idx->inlined) {
2831 2848 cache = Py_BuildValue("iO", 0, idx->data);
2832 2849 if (cache == NULL)
2833 2850 goto bail;
2834 2851 } else {
2835 2852 cache = Py_None;
2836 2853 Py_INCREF(cache);
2837 2854 }
2838 2855
2839 2856 tuple = Py_BuildValue("NN", idx, cache);
2840 2857 if (!tuple)
2841 2858 goto bail;
2842 2859 return tuple;
2843 2860
2844 2861 bail:
2845 2862 Py_XDECREF(idx);
2846 2863 Py_XDECREF(cache);
2847 2864 Py_XDECREF(tuple);
2848 2865 return NULL;
2849 2866 }
2850 2867
2851 2868 #ifdef WITH_RUST
2852 2869
2853 2870 /* rustlazyancestors: iteration over ancestors implemented in Rust
2854 2871 *
2855 2872 * This class holds a reference to an index and to the Rust iterator.
2856 2873 */
2857 2874 typedef struct rustlazyancestorsObjectStruct rustlazyancestorsObject;
2858 2875
2859 2876 struct rustlazyancestorsObjectStruct {
2860 2877 PyObject_HEAD
2861 2878 /* Type-specific fields go here. */
2862 2879 indexObject *index; /* Ref kept to avoid GC'ing the index */
2863 2880 void *iter; /* Rust iterator */
2864 2881 };
2865 2882
2866 2883 /* FFI exposed from Rust code */
2867 2884 rustlazyancestorsObject *rustlazyancestors_init(indexObject *index,
2868 2885 /* intrevs vector */
2869 2886 Py_ssize_t initrevslen,
2870 2887 long *initrevs, long stoprev,
2871 2888 int inclusive);
2872 2889 void rustlazyancestors_drop(rustlazyancestorsObject *self);
2873 2890 int rustlazyancestors_next(rustlazyancestorsObject *self);
2874 2891 int rustlazyancestors_contains(rustlazyancestorsObject *self, long rev);
2875 2892
2876 2893 /* CPython instance methods */
2877 2894 static int rustla_init(rustlazyancestorsObject *self, PyObject *args)
2878 2895 {
2879 2896 PyObject *initrevsarg = NULL;
2880 2897 PyObject *inclusivearg = NULL;
2881 2898 long stoprev = 0;
2882 2899 long *initrevs = NULL;
2883 2900 int inclusive = 0;
2884 2901 Py_ssize_t i;
2885 2902
2886 2903 indexObject *index;
2887 2904 if (!PyArg_ParseTuple(args, "O!O!lO!", &HgRevlogIndex_Type, &index,
2888 2905 &PyList_Type, &initrevsarg, &stoprev,
2889 2906 &PyBool_Type, &inclusivearg))
2890 2907 return -1;
2891 2908
2892 2909 Py_INCREF(index);
2893 2910 self->index = index;
2894 2911
2895 2912 if (inclusivearg == Py_True)
2896 2913 inclusive = 1;
2897 2914
2898 2915 Py_ssize_t linit = PyList_GET_SIZE(initrevsarg);
2899 2916
2900 2917 initrevs = (long *)calloc(linit, sizeof(long));
2901 2918
2902 2919 if (initrevs == NULL) {
2903 2920 PyErr_NoMemory();
2904 2921 goto bail;
2905 2922 }
2906 2923
2907 2924 for (i = 0; i < linit; i++) {
2908 2925 initrevs[i] = PyInt_AsLong(PyList_GET_ITEM(initrevsarg, i));
2909 2926 }
2910 2927 if (PyErr_Occurred())
2911 2928 goto bail;
2912 2929
2913 2930 self->iter =
2914 2931 rustlazyancestors_init(index, linit, initrevs, stoprev, inclusive);
2915 2932 if (self->iter == NULL) {
2916 2933 /* if this is because of GraphError::ParentOutOfRange
2917 2934 * HgRevlogIndex_GetParents() has already set the proper
2918 2935 * exception */
2919 2936 goto bail;
2920 2937 }
2921 2938
2922 2939 free(initrevs);
2923 2940 return 0;
2924 2941
2925 2942 bail:
2926 2943 free(initrevs);
2927 2944 return -1;
2928 2945 };
2929 2946
2930 2947 static void rustla_dealloc(rustlazyancestorsObject *self)
2931 2948 {
2932 2949 Py_XDECREF(self->index);
2933 2950 if (self->iter != NULL) { /* can happen if rustla_init failed */
2934 2951 rustlazyancestors_drop(self->iter);
2935 2952 }
2936 2953 PyObject_Del(self);
2937 2954 }
2938 2955
2939 2956 static PyObject *rustla_next(rustlazyancestorsObject *self)
2940 2957 {
2941 2958 int res = rustlazyancestors_next(self->iter);
2942 2959 if (res == -1) {
2943 2960 /* Setting an explicit exception seems unnecessary
2944 2961 * as examples from Python source code (Objects/rangeobjets.c
2945 2962 * and Modules/_io/stringio.c) seem to demonstrate.
2946 2963 */
2947 2964 return NULL;
2948 2965 }
2949 2966 return PyInt_FromLong(res);
2950 2967 }
2951 2968
2952 2969 static int rustla_contains(rustlazyancestorsObject *self, PyObject *rev)
2953 2970 {
2954 2971 long lrev;
2955 2972 if (!pylong_to_long(rev, &lrev)) {
2956 2973 PyErr_Clear();
2957 2974 return 0;
2958 2975 }
2959 2976 return rustlazyancestors_contains(self->iter, lrev);
2960 2977 }
2961 2978
2962 2979 static PySequenceMethods rustla_sequence_methods = {
2963 2980 0, /* sq_length */
2964 2981 0, /* sq_concat */
2965 2982 0, /* sq_repeat */
2966 2983 0, /* sq_item */
2967 2984 0, /* sq_slice */
2968 2985 0, /* sq_ass_item */
2969 2986 0, /* sq_ass_slice */
2970 2987 (objobjproc)rustla_contains, /* sq_contains */
2971 2988 };
2972 2989
2973 2990 static PyTypeObject rustlazyancestorsType = {
2974 2991 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2975 2992 "parsers.rustlazyancestors", /* tp_name */
2976 2993 sizeof(rustlazyancestorsObject), /* tp_basicsize */
2977 2994 0, /* tp_itemsize */
2978 2995 (destructor)rustla_dealloc, /* tp_dealloc */
2979 2996 0, /* tp_print */
2980 2997 0, /* tp_getattr */
2981 2998 0, /* tp_setattr */
2982 2999 0, /* tp_compare */
2983 3000 0, /* tp_repr */
2984 3001 0, /* tp_as_number */
2985 3002 &rustla_sequence_methods, /* tp_as_sequence */
2986 3003 0, /* tp_as_mapping */
2987 3004 0, /* tp_hash */
2988 3005 0, /* tp_call */
2989 3006 0, /* tp_str */
2990 3007 0, /* tp_getattro */
2991 3008 0, /* tp_setattro */
2992 3009 0, /* tp_as_buffer */
2993 3010 Py_TPFLAGS_DEFAULT, /* tp_flags */
2994 3011 "Iterator over ancestors, implemented in Rust", /* tp_doc */
2995 3012 0, /* tp_traverse */
2996 3013 0, /* tp_clear */
2997 3014 0, /* tp_richcompare */
2998 3015 0, /* tp_weaklistoffset */
2999 3016 0, /* tp_iter */
3000 3017 (iternextfunc)rustla_next, /* tp_iternext */
3001 3018 0, /* tp_methods */
3002 3019 0, /* tp_members */
3003 3020 0, /* tp_getset */
3004 3021 0, /* tp_base */
3005 3022 0, /* tp_dict */
3006 3023 0, /* tp_descr_get */
3007 3024 0, /* tp_descr_set */
3008 3025 0, /* tp_dictoffset */
3009 3026 (initproc)rustla_init, /* tp_init */
3010 3027 0, /* tp_alloc */
3011 3028 };
3012 3029 #endif /* WITH_RUST */
3013 3030
3014 3031 void revlog_module_init(PyObject *mod)
3015 3032 {
3016 3033 PyObject *caps = NULL;
3017 3034 HgRevlogIndex_Type.tp_new = PyType_GenericNew;
3018 3035 if (PyType_Ready(&HgRevlogIndex_Type) < 0)
3019 3036 return;
3020 3037 Py_INCREF(&HgRevlogIndex_Type);
3021 3038 PyModule_AddObject(mod, "index", (PyObject *)&HgRevlogIndex_Type);
3022 3039
3023 3040 nodetreeType.tp_new = PyType_GenericNew;
3024 3041 if (PyType_Ready(&nodetreeType) < 0)
3025 3042 return;
3026 3043 Py_INCREF(&nodetreeType);
3027 3044 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
3028 3045
3029 3046 if (!nullentry) {
3030 3047 nullentry =
3031 3048 Py_BuildValue(PY23("iiiiiiis#", "iiiiiiiy#"), 0, 0, 0, -1,
3032 3049 -1, -1, -1, nullid, (Py_ssize_t)20);
3033 3050 }
3034 3051 if (nullentry)
3035 3052 PyObject_GC_UnTrack(nullentry);
3036 3053
3037 3054 caps = PyCapsule_New(HgRevlogIndex_GetParents,
3038 3055 "mercurial.cext.parsers.index_get_parents_CAPI",
3039 3056 NULL);
3040 3057 if (caps != NULL)
3041 3058 PyModule_AddObject(mod, "index_get_parents_CAPI", caps);
3042 3059
3043 3060 #ifdef WITH_RUST
3044 3061 rustlazyancestorsType.tp_new = PyType_GenericNew;
3045 3062 if (PyType_Ready(&rustlazyancestorsType) < 0)
3046 3063 return;
3047 3064 Py_INCREF(&rustlazyancestorsType);
3048 3065 PyModule_AddObject(mod, "rustlazyancestors",
3049 3066 (PyObject *)&rustlazyancestorsType);
3050 3067 #endif
3051 3068 }
@@ -1,157 +1,157 b''
1 1 # policy.py - module policy logic for Mercurial.
2 2 #
3 3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import sys
12 12
13 13 from .pycompat import getattr
14 14
15 15 # Rules for how modules can be loaded. Values are:
16 16 #
17 17 # c - require C extensions
18 18 # rust+c - require Rust and C extensions
19 19 # rust+c-allow - allow Rust and C extensions with fallback to pure Python
20 20 # for each
21 21 # allow - allow pure Python implementation when C loading fails
22 22 # cffi - required cffi versions (implemented within pure module)
23 23 # cffi-allow - allow pure Python implementation if cffi version is missing
24 24 # py - only load pure Python modules
25 25 #
26 26 # By default, fall back to the pure modules so the in-place build can
27 27 # run without recompiling the C extensions. This will be overridden by
28 28 # __modulepolicy__ generated by setup.py.
29 29 policy = b'allow'
30 30 _packageprefs = {
31 31 # policy: (versioned package, pure package)
32 32 b'c': ('cext', None),
33 33 b'allow': ('cext', 'pure'),
34 34 b'cffi': ('cffi', None),
35 35 b'cffi-allow': ('cffi', 'pure'),
36 36 b'py': (None, 'pure'),
37 37 # For now, rust policies impact importrust only
38 38 b'rust+c': ('cext', None),
39 39 b'rust+c-allow': ('cext', 'pure'),
40 40 }
41 41
42 42 try:
43 43 from . import __modulepolicy__
44 44
45 45 policy = __modulepolicy__.modulepolicy
46 46 except ImportError:
47 47 pass
48 48
49 49 # PyPy doesn't load C extensions.
50 50 #
51 51 # The canonical way to do this is to test platform.python_implementation().
52 52 # But we don't import platform and don't bloat for it here.
53 53 if '__pypy__' in sys.builtin_module_names:
54 54 policy = b'cffi'
55 55
56 56 # Environment variable can always force settings.
57 57 if sys.version_info[0] >= 3:
58 58 if 'HGMODULEPOLICY' in os.environ:
59 59 policy = os.environ['HGMODULEPOLICY'].encode('utf-8')
60 60 else:
61 61 policy = os.environ.get('HGMODULEPOLICY', policy)
62 62
63 63
64 64 def _importfrom(pkgname, modname):
65 65 # from .<pkgname> import <modname> (where . is looked through this module)
66 66 fakelocals = {}
67 67 pkg = __import__(pkgname, globals(), fakelocals, [modname], level=1)
68 68 try:
69 69 fakelocals[modname] = mod = getattr(pkg, modname)
70 70 except AttributeError:
71 71 raise ImportError('cannot import name %s' % modname)
72 72 # force import; fakelocals[modname] may be replaced with the real module
73 73 getattr(mod, '__doc__', None)
74 74 return fakelocals[modname]
75 75
76 76
77 77 # keep in sync with "version" in C modules
78 78 _cextversions = {
79 79 ('cext', 'base85'): 1,
80 80 ('cext', 'bdiff'): 3,
81 81 ('cext', 'mpatch'): 1,
82 82 ('cext', 'osutil'): 4,
83 ('cext', 'parsers'): 14,
83 ('cext', 'parsers'): 15,
84 84 }
85 85
86 86 # map import request to other package or module
87 87 _modredirects = {
88 88 ('cext', 'charencode'): ('cext', 'parsers'),
89 89 ('cffi', 'base85'): ('pure', 'base85'),
90 90 ('cffi', 'charencode'): ('pure', 'charencode'),
91 91 ('cffi', 'parsers'): ('pure', 'parsers'),
92 92 }
93 93
94 94
95 95 def _checkmod(pkgname, modname, mod):
96 96 expected = _cextversions.get((pkgname, modname))
97 97 actual = getattr(mod, 'version', None)
98 98 if actual != expected:
99 99 raise ImportError(
100 100 'cannot import module %s.%s '
101 101 '(expected version: %d, actual: %r)'
102 102 % (pkgname, modname, expected, actual)
103 103 )
104 104
105 105
106 106 def importmod(modname):
107 107 """Import module according to policy and check API version"""
108 108 try:
109 109 verpkg, purepkg = _packageprefs[policy]
110 110 except KeyError:
111 111 raise ImportError('invalid HGMODULEPOLICY %r' % policy)
112 112 assert verpkg or purepkg
113 113 if verpkg:
114 114 pn, mn = _modredirects.get((verpkg, modname), (verpkg, modname))
115 115 try:
116 116 mod = _importfrom(pn, mn)
117 117 if pn == verpkg:
118 118 _checkmod(pn, mn, mod)
119 119 return mod
120 120 except ImportError:
121 121 if not purepkg:
122 122 raise
123 123 pn, mn = _modredirects.get((purepkg, modname), (purepkg, modname))
124 124 return _importfrom(pn, mn)
125 125
126 126
127 127 def _isrustpermissive():
128 128 """Assuming the policy is a Rust one, tell if it's permissive."""
129 129 return policy.endswith(b'-allow')
130 130
131 131
132 132 def importrust(modname, member=None, default=None):
133 133 """Import Rust module according to policy and availability.
134 134
135 135 If policy isn't a Rust one, this returns `default`.
136 136
137 137 If either the module or its member is not available, this returns `default`
138 138 if policy is permissive and raises `ImportError` if not.
139 139 """
140 140 if not policy.startswith(b'rust'):
141 141 return default
142 142
143 143 try:
144 144 mod = _importfrom('rustext', modname)
145 145 except ImportError:
146 146 if _isrustpermissive():
147 147 return default
148 148 raise
149 149 if member is None:
150 150 return mod
151 151
152 152 try:
153 153 return getattr(mod, member)
154 154 except AttributeError:
155 155 if _isrustpermissive():
156 156 return default
157 157 raise ImportError("Cannot import name %s" % member)
@@ -1,217 +1,223 b''
1 1 # parsers.py - Python implementation of parsers.c
2 2 #
3 3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import struct
11 11 import zlib
12 12
13 13 from ..node import nullid, nullrev
14 14 from .. import (
15 15 pycompat,
16 16 revlogutils,
17 17 util,
18 18 )
19 19
20 20 stringio = pycompat.bytesio
21 21
22 22
23 23 _pack = struct.pack
24 24 _unpack = struct.unpack
25 25 _compress = zlib.compress
26 26 _decompress = zlib.decompress
27 27
28 28 # Some code below makes tuples directly because it's more convenient. However,
29 29 # code outside this module should always use dirstatetuple.
30 30 def dirstatetuple(*x):
31 31 # x is a tuple
32 32 return x
33 33
34 34
35 35 indexformatng = b">Qiiiiii20s12x"
36 36 indexfirst = struct.calcsize(b'Q')
37 37 sizeint = struct.calcsize(b'i')
38 38 indexsize = struct.calcsize(indexformatng)
39 39
40 40
41 41 def gettype(q):
42 42 return int(q & 0xFFFF)
43 43
44 44
45 45 def offset_type(offset, type):
46 46 return int(int(offset) << 16 | type)
47 47
48 48
49 49 class BaseIndexObject(object):
50 50 @util.propertycache
51 51 def nodemap(self):
52 52 nodemap = revlogutils.NodeMap({nullid: nullrev})
53 53 for r in range(0, len(self)):
54 54 n = self[r][7]
55 55 nodemap[n] = r
56 56 return nodemap
57 57
58 58 def has_node(self, node):
59 59 """return True if the node exist in the index"""
60 60 return node in self.nodemap
61 61
62 def rev(self, node):
63 """return a revision for a node
64
65 If the node is unknown, raise a RevlogError"""
66 return self.nodemap[node]
67
62 68 def _stripnodes(self, start):
63 69 if 'nodemap' in vars(self):
64 70 for r in range(start, len(self)):
65 71 n = self[r][7]
66 72 del self.nodemap[n]
67 73
68 74 def clearcaches(self):
69 75 self.__dict__.pop('nodemap', None)
70 76
71 77 def __len__(self):
72 78 return self._lgt + len(self._extra)
73 79
74 80 def append(self, tup):
75 81 if 'nodemap' in vars(self):
76 82 self.nodemap[tup[7]] = len(self)
77 83 self._extra.append(tup)
78 84
79 85 def _check_index(self, i):
80 86 if not isinstance(i, int):
81 87 raise TypeError(b"expecting int indexes")
82 88 if i < 0 or i >= len(self):
83 89 raise IndexError
84 90
85 91 def __getitem__(self, i):
86 92 if i == -1:
87 93 return (0, 0, 0, -1, -1, -1, -1, nullid)
88 94 self._check_index(i)
89 95 if i >= self._lgt:
90 96 return self._extra[i - self._lgt]
91 97 index = self._calculate_index(i)
92 98 r = struct.unpack(indexformatng, self._data[index : index + indexsize])
93 99 if i == 0:
94 100 e = list(r)
95 101 type = gettype(e[0])
96 102 e[0] = offset_type(0, type)
97 103 return tuple(e)
98 104 return r
99 105
100 106
101 107 class IndexObject(BaseIndexObject):
102 108 def __init__(self, data):
103 109 assert len(data) % indexsize == 0
104 110 self._data = data
105 111 self._lgt = len(data) // indexsize
106 112 self._extra = []
107 113
108 114 def _calculate_index(self, i):
109 115 return i * indexsize
110 116
111 117 def __delitem__(self, i):
112 118 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
113 119 raise ValueError(b"deleting slices only supports a:-1 with step 1")
114 120 i = i.start
115 121 self._check_index(i)
116 122 self._stripnodes(i)
117 123 if i < self._lgt:
118 124 self._data = self._data[: i * indexsize]
119 125 self._lgt = i
120 126 self._extra = []
121 127 else:
122 128 self._extra = self._extra[: i - self._lgt]
123 129
124 130
125 131 class InlinedIndexObject(BaseIndexObject):
126 132 def __init__(self, data, inline=0):
127 133 self._data = data
128 134 self._lgt = self._inline_scan(None)
129 135 self._inline_scan(self._lgt)
130 136 self._extra = []
131 137
132 138 def _inline_scan(self, lgt):
133 139 off = 0
134 140 if lgt is not None:
135 141 self._offsets = [0] * lgt
136 142 count = 0
137 143 while off <= len(self._data) - indexsize:
138 144 (s,) = struct.unpack(
139 145 b'>i', self._data[off + indexfirst : off + sizeint + indexfirst]
140 146 )
141 147 if lgt is not None:
142 148 self._offsets[count] = off
143 149 count += 1
144 150 off += indexsize + s
145 151 if off != len(self._data):
146 152 raise ValueError(b"corrupted data")
147 153 return count
148 154
149 155 def __delitem__(self, i):
150 156 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
151 157 raise ValueError(b"deleting slices only supports a:-1 with step 1")
152 158 i = i.start
153 159 self._check_index(i)
154 160 self._stripnodes(i)
155 161 if i < self._lgt:
156 162 self._offsets = self._offsets[:i]
157 163 self._lgt = i
158 164 self._extra = []
159 165 else:
160 166 self._extra = self._extra[: i - self._lgt]
161 167
162 168 def _calculate_index(self, i):
163 169 return self._offsets[i]
164 170
165 171
166 172 def parse_index2(data, inline):
167 173 if not inline:
168 174 return IndexObject(data), None
169 175 return InlinedIndexObject(data, inline), (0, data)
170 176
171 177
172 178 def parse_dirstate(dmap, copymap, st):
173 179 parents = [st[:20], st[20:40]]
174 180 # dereference fields so they will be local in loop
175 181 format = b">cllll"
176 182 e_size = struct.calcsize(format)
177 183 pos1 = 40
178 184 l = len(st)
179 185
180 186 # the inner loop
181 187 while pos1 < l:
182 188 pos2 = pos1 + e_size
183 189 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
184 190 pos1 = pos2 + e[4]
185 191 f = st[pos2:pos1]
186 192 if b'\0' in f:
187 193 f, c = f.split(b'\0')
188 194 copymap[f] = c
189 195 dmap[f] = e[:4]
190 196 return parents
191 197
192 198
193 199 def pack_dirstate(dmap, copymap, pl, now):
194 200 now = int(now)
195 201 cs = stringio()
196 202 write = cs.write
197 203 write(b"".join(pl))
198 204 for f, e in pycompat.iteritems(dmap):
199 205 if e[0] == b'n' and e[3] == now:
200 206 # The file was last modified "simultaneously" with the current
201 207 # write to dirstate (i.e. within the same second for file-
202 208 # systems with a granularity of 1 sec). This commonly happens
203 209 # for at least a couple of files on 'update'.
204 210 # The user could change the file without changing its size
205 211 # within the same second. Invalidate the file's mtime in
206 212 # dirstate, forcing future 'status' calls to compare the
207 213 # contents of the file if the size is the same. This prevents
208 214 # mistakenly treating such files as clean.
209 215 e = dirstatetuple(e[0], e[1], e[2], -1)
210 216 dmap[f] = e
211 217
212 218 if f in copymap:
213 219 f = b"%s\0%s" % (f, copymap[f])
214 220 e = _pack(b">cllll", e[0], e[1], e[2], e[3], len(f))
215 221 write(e)
216 222 write(f)
217 223 return cs.getvalue()
@@ -1,2964 +1,2970 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import io
20 20 import os
21 21 import struct
22 22 import zlib
23 23
24 24 # import stuff from node for others to import from revlog
25 25 from .node import (
26 26 bin,
27 27 hex,
28 28 nullhex,
29 29 nullid,
30 30 nullrev,
31 31 short,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .pycompat import getattr
39 39 from .revlogutils.constants import (
40 40 FLAG_GENERALDELTA,
41 41 FLAG_INLINE_DATA,
42 42 REVLOGV0,
43 43 REVLOGV1,
44 44 REVLOGV1_FLAGS,
45 45 REVLOGV2,
46 46 REVLOGV2_FLAGS,
47 47 REVLOG_DEFAULT_FLAGS,
48 48 REVLOG_DEFAULT_FORMAT,
49 49 REVLOG_DEFAULT_VERSION,
50 50 )
51 51 from .revlogutils.flagutil import (
52 52 REVIDX_DEFAULT_FLAGS,
53 53 REVIDX_ELLIPSIS,
54 54 REVIDX_EXTSTORED,
55 55 REVIDX_FLAGS_ORDER,
56 56 REVIDX_ISCENSORED,
57 57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 58 REVIDX_SIDEDATA,
59 59 )
60 60 from .thirdparty import attr
61 61 from . import (
62 62 ancestor,
63 63 dagop,
64 64 error,
65 65 mdiff,
66 66 policy,
67 67 pycompat,
68 68 revlogutils,
69 69 templatefilters,
70 70 util,
71 71 )
72 72 from .interfaces import (
73 73 repository,
74 74 util as interfaceutil,
75 75 )
76 76 from .revlogutils import (
77 77 deltas as deltautil,
78 78 flagutil,
79 79 sidedata as sidedatautil,
80 80 )
81 81 from .utils import (
82 82 storageutil,
83 83 stringutil,
84 84 )
85 85
86 86 # blanked usage of all the name to prevent pyflakes constraints
87 87 # We need these name available in the module for extensions.
88 88 REVLOGV0
89 89 REVLOGV1
90 90 REVLOGV2
91 91 FLAG_INLINE_DATA
92 92 FLAG_GENERALDELTA
93 93 REVLOG_DEFAULT_FLAGS
94 94 REVLOG_DEFAULT_FORMAT
95 95 REVLOG_DEFAULT_VERSION
96 96 REVLOGV1_FLAGS
97 97 REVLOGV2_FLAGS
98 98 REVIDX_ISCENSORED
99 99 REVIDX_ELLIPSIS
100 100 REVIDX_SIDEDATA
101 101 REVIDX_EXTSTORED
102 102 REVIDX_DEFAULT_FLAGS
103 103 REVIDX_FLAGS_ORDER
104 104 REVIDX_RAWTEXT_CHANGING_FLAGS
105 105
106 106 parsers = policy.importmod('parsers')
107 107 rustancestor = policy.importrust('ancestor')
108 108 rustdagop = policy.importrust('dagop')
109 109
110 110 # Aliased for performance.
111 111 _zlibdecompress = zlib.decompress
112 112
113 113 # max size of revlog with inline data
114 114 _maxinline = 131072
115 115 _chunksize = 1048576
116 116
117 117 # Flag processors for REVIDX_ELLIPSIS.
118 118 def ellipsisreadprocessor(rl, text):
119 119 return text, False, {}
120 120
121 121
122 122 def ellipsiswriteprocessor(rl, text, sidedata):
123 123 return text, False
124 124
125 125
126 126 def ellipsisrawprocessor(rl, text):
127 127 return False
128 128
129 129
130 130 ellipsisprocessor = (
131 131 ellipsisreadprocessor,
132 132 ellipsiswriteprocessor,
133 133 ellipsisrawprocessor,
134 134 )
135 135
136 136
137 137 def getoffset(q):
138 138 return int(q >> 16)
139 139
140 140
141 141 def gettype(q):
142 142 return int(q & 0xFFFF)
143 143
144 144
145 145 def offset_type(offset, type):
146 146 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
147 147 raise ValueError(b'unknown revlog index flags')
148 148 return int(int(offset) << 16 | type)
149 149
150 150
151 151 @attr.s(slots=True, frozen=True)
152 152 class _revisioninfo(object):
153 153 """Information about a revision that allows building its fulltext
154 154 node: expected hash of the revision
155 155 p1, p2: parent revs of the revision
156 156 btext: built text cache consisting of a one-element list
157 157 cachedelta: (baserev, uncompressed_delta) or None
158 158 flags: flags associated to the revision storage
159 159
160 160 One of btext[0] or cachedelta must be set.
161 161 """
162 162
163 163 node = attr.ib()
164 164 p1 = attr.ib()
165 165 p2 = attr.ib()
166 166 btext = attr.ib()
167 167 textlen = attr.ib()
168 168 cachedelta = attr.ib()
169 169 flags = attr.ib()
170 170
171 171
172 172 @interfaceutil.implementer(repository.irevisiondelta)
173 173 @attr.s(slots=True)
174 174 class revlogrevisiondelta(object):
175 175 node = attr.ib()
176 176 p1node = attr.ib()
177 177 p2node = attr.ib()
178 178 basenode = attr.ib()
179 179 flags = attr.ib()
180 180 baserevisionsize = attr.ib()
181 181 revision = attr.ib()
182 182 delta = attr.ib()
183 183 linknode = attr.ib(default=None)
184 184
185 185
186 186 @interfaceutil.implementer(repository.iverifyproblem)
187 187 @attr.s(frozen=True)
188 188 class revlogproblem(object):
189 189 warning = attr.ib(default=None)
190 190 error = attr.ib(default=None)
191 191 node = attr.ib(default=None)
192 192
193 193
194 194 # index v0:
195 195 # 4 bytes: offset
196 196 # 4 bytes: compressed length
197 197 # 4 bytes: base rev
198 198 # 4 bytes: link rev
199 199 # 20 bytes: parent 1 nodeid
200 200 # 20 bytes: parent 2 nodeid
201 201 # 20 bytes: nodeid
202 202 indexformatv0 = struct.Struct(b">4l20s20s20s")
203 203 indexformatv0_pack = indexformatv0.pack
204 204 indexformatv0_unpack = indexformatv0.unpack
205 205
206 206
207 207 class revlogoldindex(list):
208 208 @util.propertycache
209 209 def nodemap(self):
210 210 nodemap = revlogutils.NodeMap({nullid: nullrev})
211 211 for r in range(0, len(self)):
212 212 n = self[r][7]
213 213 nodemap[n] = r
214 214 return nodemap
215 215
216 216 def has_node(self, node):
217 217 """return True if the node exist in the index"""
218 218 return node in self.nodemap
219 219
220 def rev(self, node):
221 """return a revision for a node
222
223 If the node is unknown, raise a RevlogError"""
224 return self.nodemap[node]
225
220 226 def append(self, tup):
221 227 self.nodemap[tup[7]] = len(self)
222 228 super(revlogoldindex, self).append(tup)
223 229
224 230 def __delitem__(self, i):
225 231 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
226 232 raise ValueError(b"deleting slices only supports a:-1 with step 1")
227 233 for r in pycompat.xrange(i.start, len(self)):
228 234 del self.nodemap[self[r][7]]
229 235 super(revlogoldindex, self).__delitem__(i)
230 236
231 237 def clearcaches(self):
232 238 self.__dict__.pop('nodemap', None)
233 239
234 240 def __getitem__(self, i):
235 241 if i == -1:
236 242 return (0, 0, 0, -1, -1, -1, -1, nullid)
237 243 return list.__getitem__(self, i)
238 244
239 245
240 246 class revlogoldio(object):
241 247 def __init__(self):
242 248 self.size = indexformatv0.size
243 249
244 250 def parseindex(self, data, inline):
245 251 s = self.size
246 252 index = []
247 253 nodemap = revlogutils.NodeMap({nullid: nullrev})
248 254 n = off = 0
249 255 l = len(data)
250 256 while off + s <= l:
251 257 cur = data[off : off + s]
252 258 off += s
253 259 e = indexformatv0_unpack(cur)
254 260 # transform to revlogv1 format
255 261 e2 = (
256 262 offset_type(e[0], 0),
257 263 e[1],
258 264 -1,
259 265 e[2],
260 266 e[3],
261 267 nodemap.get(e[4], nullrev),
262 268 nodemap.get(e[5], nullrev),
263 269 e[6],
264 270 )
265 271 index.append(e2)
266 272 nodemap[e[6]] = n
267 273 n += 1
268 274
269 275 index = revlogoldindex(index)
270 276 return index, None
271 277
272 278 def packentry(self, entry, node, version, rev):
273 279 if gettype(entry[0]):
274 280 raise error.RevlogError(
275 281 _(b'index entry flags need revlog version 1')
276 282 )
277 283 e2 = (
278 284 getoffset(entry[0]),
279 285 entry[1],
280 286 entry[3],
281 287 entry[4],
282 288 node(entry[5]),
283 289 node(entry[6]),
284 290 entry[7],
285 291 )
286 292 return indexformatv0_pack(*e2)
287 293
288 294
289 295 # index ng:
290 296 # 6 bytes: offset
291 297 # 2 bytes: flags
292 298 # 4 bytes: compressed length
293 299 # 4 bytes: uncompressed length
294 300 # 4 bytes: base rev
295 301 # 4 bytes: link rev
296 302 # 4 bytes: parent 1 rev
297 303 # 4 bytes: parent 2 rev
298 304 # 32 bytes: nodeid
299 305 indexformatng = struct.Struct(b">Qiiiiii20s12x")
300 306 indexformatng_pack = indexformatng.pack
301 307 versionformat = struct.Struct(b">I")
302 308 versionformat_pack = versionformat.pack
303 309 versionformat_unpack = versionformat.unpack
304 310
305 311 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
306 312 # signed integer)
307 313 _maxentrysize = 0x7FFFFFFF
308 314
309 315
310 316 class revlogio(object):
311 317 def __init__(self):
312 318 self.size = indexformatng.size
313 319
314 320 def parseindex(self, data, inline):
315 321 # call the C implementation to parse the index data
316 322 index, cache = parsers.parse_index2(data, inline)
317 323 return index, cache
318 324
319 325 def packentry(self, entry, node, version, rev):
320 326 p = indexformatng_pack(*entry)
321 327 if rev == 0:
322 328 p = versionformat_pack(version) + p[4:]
323 329 return p
324 330
325 331
326 332 class revlog(object):
327 333 """
328 334 the underlying revision storage object
329 335
330 336 A revlog consists of two parts, an index and the revision data.
331 337
332 338 The index is a file with a fixed record size containing
333 339 information on each revision, including its nodeid (hash), the
334 340 nodeids of its parents, the position and offset of its data within
335 341 the data file, and the revision it's based on. Finally, each entry
336 342 contains a linkrev entry that can serve as a pointer to external
337 343 data.
338 344
339 345 The revision data itself is a linear collection of data chunks.
340 346 Each chunk represents a revision and is usually represented as a
341 347 delta against the previous chunk. To bound lookup time, runs of
342 348 deltas are limited to about 2 times the length of the original
343 349 version data. This makes retrieval of a version proportional to
344 350 its size, or O(1) relative to the number of revisions.
345 351
346 352 Both pieces of the revlog are written to in an append-only
347 353 fashion, which means we never need to rewrite a file to insert or
348 354 remove data, and can use some simple techniques to avoid the need
349 355 for locking while reading.
350 356
351 357 If checkambig, indexfile is opened with checkambig=True at
352 358 writing, to avoid file stat ambiguity.
353 359
354 360 If mmaplargeindex is True, and an mmapindexthreshold is set, the
355 361 index will be mmapped rather than read if it is larger than the
356 362 configured threshold.
357 363
358 364 If censorable is True, the revlog can have censored revisions.
359 365
360 366 If `upperboundcomp` is not None, this is the expected maximal gain from
361 367 compression for the data content.
362 368 """
363 369
364 370 _flagserrorclass = error.RevlogError
365 371
366 372 def __init__(
367 373 self,
368 374 opener,
369 375 indexfile,
370 376 datafile=None,
371 377 checkambig=False,
372 378 mmaplargeindex=False,
373 379 censorable=False,
374 380 upperboundcomp=None,
375 381 ):
376 382 """
377 383 create a revlog object
378 384
379 385 opener is a function that abstracts the file opening operation
380 386 and can be used to implement COW semantics or the like.
381 387
382 388 """
383 389 self.upperboundcomp = upperboundcomp
384 390 self.indexfile = indexfile
385 391 self.datafile = datafile or (indexfile[:-2] + b".d")
386 392 self.opener = opener
387 393 # When True, indexfile is opened with checkambig=True at writing, to
388 394 # avoid file stat ambiguity.
389 395 self._checkambig = checkambig
390 396 self._mmaplargeindex = mmaplargeindex
391 397 self._censorable = censorable
392 398 # 3-tuple of (node, rev, text) for a raw revision.
393 399 self._revisioncache = None
394 400 # Maps rev to chain base rev.
395 401 self._chainbasecache = util.lrucachedict(100)
396 402 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
397 403 self._chunkcache = (0, b'')
398 404 # How much data to read and cache into the raw revlog data cache.
399 405 self._chunkcachesize = 65536
400 406 self._maxchainlen = None
401 407 self._deltabothparents = True
402 408 self.index = None
403 409 # Mapping of partial identifiers to full nodes.
404 410 self._pcache = {}
405 411 # Mapping of revision integer to full node.
406 412 self._nodepos = None
407 413 self._compengine = b'zlib'
408 414 self._compengineopts = {}
409 415 self._maxdeltachainspan = -1
410 416 self._withsparseread = False
411 417 self._sparserevlog = False
412 418 self._srdensitythreshold = 0.50
413 419 self._srmingapsize = 262144
414 420
415 421 # Make copy of flag processors so each revlog instance can support
416 422 # custom flags.
417 423 self._flagprocessors = dict(flagutil.flagprocessors)
418 424
419 425 # 2-tuple of file handles being used for active writing.
420 426 self._writinghandles = None
421 427
422 428 self._loadindex()
423 429
424 430 def _loadindex(self):
425 431 mmapindexthreshold = None
426 432 opts = self.opener.options
427 433
428 434 if b'revlogv2' in opts:
429 435 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
430 436 elif b'revlogv1' in opts:
431 437 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
432 438 if b'generaldelta' in opts:
433 439 newversionflags |= FLAG_GENERALDELTA
434 440 elif b'revlogv0' in self.opener.options:
435 441 newversionflags = REVLOGV0
436 442 else:
437 443 newversionflags = REVLOG_DEFAULT_VERSION
438 444
439 445 if b'chunkcachesize' in opts:
440 446 self._chunkcachesize = opts[b'chunkcachesize']
441 447 if b'maxchainlen' in opts:
442 448 self._maxchainlen = opts[b'maxchainlen']
443 449 if b'deltabothparents' in opts:
444 450 self._deltabothparents = opts[b'deltabothparents']
445 451 self._lazydelta = bool(opts.get(b'lazydelta', True))
446 452 self._lazydeltabase = False
447 453 if self._lazydelta:
448 454 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
449 455 if b'compengine' in opts:
450 456 self._compengine = opts[b'compengine']
451 457 if b'zlib.level' in opts:
452 458 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
453 459 if b'zstd.level' in opts:
454 460 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
455 461 if b'maxdeltachainspan' in opts:
456 462 self._maxdeltachainspan = opts[b'maxdeltachainspan']
457 463 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
458 464 mmapindexthreshold = opts[b'mmapindexthreshold']
459 465 self.hassidedata = bool(opts.get(b'side-data', False))
460 466 if self.hassidedata:
461 467 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
462 468 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
463 469 withsparseread = bool(opts.get(b'with-sparse-read', False))
464 470 # sparse-revlog forces sparse-read
465 471 self._withsparseread = self._sparserevlog or withsparseread
466 472 if b'sparse-read-density-threshold' in opts:
467 473 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
468 474 if b'sparse-read-min-gap-size' in opts:
469 475 self._srmingapsize = opts[b'sparse-read-min-gap-size']
470 476 if opts.get(b'enableellipsis'):
471 477 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
472 478
473 479 # revlog v0 doesn't have flag processors
474 480 for flag, processor in pycompat.iteritems(
475 481 opts.get(b'flagprocessors', {})
476 482 ):
477 483 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
478 484
479 485 if self._chunkcachesize <= 0:
480 486 raise error.RevlogError(
481 487 _(b'revlog chunk cache size %r is not greater than 0')
482 488 % self._chunkcachesize
483 489 )
484 490 elif self._chunkcachesize & (self._chunkcachesize - 1):
485 491 raise error.RevlogError(
486 492 _(b'revlog chunk cache size %r is not a power of 2')
487 493 % self._chunkcachesize
488 494 )
489 495
490 496 indexdata = b''
491 497 self._initempty = True
492 498 try:
493 499 with self._indexfp() as f:
494 500 if (
495 501 mmapindexthreshold is not None
496 502 and self.opener.fstat(f).st_size >= mmapindexthreshold
497 503 ):
498 504 # TODO: should .close() to release resources without
499 505 # relying on Python GC
500 506 indexdata = util.buffer(util.mmapread(f))
501 507 else:
502 508 indexdata = f.read()
503 509 if len(indexdata) > 0:
504 510 versionflags = versionformat_unpack(indexdata[:4])[0]
505 511 self._initempty = False
506 512 else:
507 513 versionflags = newversionflags
508 514 except IOError as inst:
509 515 if inst.errno != errno.ENOENT:
510 516 raise
511 517
512 518 versionflags = newversionflags
513 519
514 520 self.version = versionflags
515 521
516 522 flags = versionflags & ~0xFFFF
517 523 fmt = versionflags & 0xFFFF
518 524
519 525 if fmt == REVLOGV0:
520 526 if flags:
521 527 raise error.RevlogError(
522 528 _(b'unknown flags (%#04x) in version %d revlog %s')
523 529 % (flags >> 16, fmt, self.indexfile)
524 530 )
525 531
526 532 self._inline = False
527 533 self._generaldelta = False
528 534
529 535 elif fmt == REVLOGV1:
530 536 if flags & ~REVLOGV1_FLAGS:
531 537 raise error.RevlogError(
532 538 _(b'unknown flags (%#04x) in version %d revlog %s')
533 539 % (flags >> 16, fmt, self.indexfile)
534 540 )
535 541
536 542 self._inline = versionflags & FLAG_INLINE_DATA
537 543 self._generaldelta = versionflags & FLAG_GENERALDELTA
538 544
539 545 elif fmt == REVLOGV2:
540 546 if flags & ~REVLOGV2_FLAGS:
541 547 raise error.RevlogError(
542 548 _(b'unknown flags (%#04x) in version %d revlog %s')
543 549 % (flags >> 16, fmt, self.indexfile)
544 550 )
545 551
546 552 self._inline = versionflags & FLAG_INLINE_DATA
547 553 # generaldelta implied by version 2 revlogs.
548 554 self._generaldelta = True
549 555
550 556 else:
551 557 raise error.RevlogError(
552 558 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
553 559 )
554 560 # sparse-revlog can't be on without general-delta (issue6056)
555 561 if not self._generaldelta:
556 562 self._sparserevlog = False
557 563
558 564 self._storedeltachains = True
559 565
560 566 self._io = revlogio()
561 567 if self.version == REVLOGV0:
562 568 self._io = revlogoldio()
563 569 try:
564 570 d = self._io.parseindex(indexdata, self._inline)
565 571 except (ValueError, IndexError):
566 572 raise error.RevlogError(
567 573 _(b"index %s is corrupted") % self.indexfile
568 574 )
569 575 self.index, self._chunkcache = d
570 576 self.nodemap = self.index.nodemap
571 577 if not self._chunkcache:
572 578 self._chunkclear()
573 579 # revnum -> (chain-length, sum-delta-length)
574 580 self._chaininfocache = {}
575 581 # revlog header -> revlog compressor
576 582 self._decompressors = {}
577 583
578 584 @util.propertycache
579 585 def _compressor(self):
580 586 engine = util.compengines[self._compengine]
581 587 return engine.revlogcompressor(self._compengineopts)
582 588
583 589 def _indexfp(self, mode=b'r'):
584 590 """file object for the revlog's index file"""
585 591 args = {'mode': mode}
586 592 if mode != b'r':
587 593 args['checkambig'] = self._checkambig
588 594 if mode == b'w':
589 595 args['atomictemp'] = True
590 596 return self.opener(self.indexfile, **args)
591 597
592 598 def _datafp(self, mode=b'r'):
593 599 """file object for the revlog's data file"""
594 600 return self.opener(self.datafile, mode=mode)
595 601
596 602 @contextlib.contextmanager
597 603 def _datareadfp(self, existingfp=None):
598 604 """file object suitable to read data"""
599 605 # Use explicit file handle, if given.
600 606 if existingfp is not None:
601 607 yield existingfp
602 608
603 609 # Use a file handle being actively used for writes, if available.
604 610 # There is some danger to doing this because reads will seek the
605 611 # file. However, _writeentry() performs a SEEK_END before all writes,
606 612 # so we should be safe.
607 613 elif self._writinghandles:
608 614 if self._inline:
609 615 yield self._writinghandles[0]
610 616 else:
611 617 yield self._writinghandles[1]
612 618
613 619 # Otherwise open a new file handle.
614 620 else:
615 621 if self._inline:
616 622 func = self._indexfp
617 623 else:
618 624 func = self._datafp
619 625 with func() as fp:
620 626 yield fp
621 627
622 628 def tiprev(self):
623 629 return len(self.index) - 1
624 630
625 631 def tip(self):
626 632 return self.node(self.tiprev())
627 633
628 634 def __contains__(self, rev):
629 635 return 0 <= rev < len(self)
630 636
631 637 def __len__(self):
632 638 return len(self.index)
633 639
634 640 def __iter__(self):
635 641 return iter(pycompat.xrange(len(self)))
636 642
637 643 def revs(self, start=0, stop=None):
638 644 """iterate over all rev in this revlog (from start to stop)"""
639 645 return storageutil.iterrevs(len(self), start=start, stop=stop)
640 646
641 647 @util.propertycache
642 648 def nodemap(self):
643 649 if self.index:
644 650 # populate mapping down to the initial node
645 651 node0 = self.index[0][7] # get around changelog filtering
646 652 self.rev(node0)
647 653 return self.index.nodemap
648 654
649 655 @property
650 656 def _nodecache(self):
651 657 msg = "revlog._nodecache is deprecated, use revlog.index.nodemap"
652 658 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
653 659 return self.index.nodemap
654 660
655 661 def hasnode(self, node):
656 662 try:
657 663 self.rev(node)
658 664 return True
659 665 except KeyError:
660 666 return False
661 667
662 668 def candelta(self, baserev, rev):
663 669 """whether two revisions (baserev, rev) can be delta-ed or not"""
664 670 # Disable delta if either rev requires a content-changing flag
665 671 # processor (ex. LFS). This is because such flag processor can alter
666 672 # the rawtext content that the delta will be based on, and two clients
667 673 # could have a same revlog node with different flags (i.e. different
668 674 # rawtext contents) and the delta could be incompatible.
669 675 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
670 676 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
671 677 ):
672 678 return False
673 679 return True
674 680
675 681 def clearcaches(self):
676 682 self._revisioncache = None
677 683 self._chainbasecache.clear()
678 684 self._chunkcache = (0, b'')
679 685 self._pcache = {}
680 686 self.index.clearcaches()
681 687
682 688 def rev(self, node):
683 689 try:
684 690 return self.index.nodemap[node]
685 691 except TypeError:
686 692 raise
687 693 except error.RevlogError:
688 694 # parsers.c radix tree lookup failed
689 695 if node == wdirid or node in wdirfilenodeids:
690 696 raise error.WdirUnsupported
691 697 raise error.LookupError(node, self.indexfile, _(b'no node'))
692 698
693 699 # Accessors for index entries.
694 700
695 701 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
696 702 # are flags.
697 703 def start(self, rev):
698 704 return int(self.index[rev][0] >> 16)
699 705
700 706 def flags(self, rev):
701 707 return self.index[rev][0] & 0xFFFF
702 708
703 709 def length(self, rev):
704 710 return self.index[rev][1]
705 711
706 712 def rawsize(self, rev):
707 713 """return the length of the uncompressed text for a given revision"""
708 714 l = self.index[rev][2]
709 715 if l >= 0:
710 716 return l
711 717
712 718 t = self.rawdata(rev)
713 719 return len(t)
714 720
715 721 def size(self, rev):
716 722 """length of non-raw text (processed by a "read" flag processor)"""
717 723 # fast path: if no "read" flag processor could change the content,
718 724 # size is rawsize. note: ELLIPSIS is known to not change the content.
719 725 flags = self.flags(rev)
720 726 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
721 727 return self.rawsize(rev)
722 728
723 729 return len(self.revision(rev, raw=False))
724 730
725 731 def chainbase(self, rev):
726 732 base = self._chainbasecache.get(rev)
727 733 if base is not None:
728 734 return base
729 735
730 736 index = self.index
731 737 iterrev = rev
732 738 base = index[iterrev][3]
733 739 while base != iterrev:
734 740 iterrev = base
735 741 base = index[iterrev][3]
736 742
737 743 self._chainbasecache[rev] = base
738 744 return base
739 745
740 746 def linkrev(self, rev):
741 747 return self.index[rev][4]
742 748
743 749 def parentrevs(self, rev):
744 750 try:
745 751 entry = self.index[rev]
746 752 except IndexError:
747 753 if rev == wdirrev:
748 754 raise error.WdirUnsupported
749 755 raise
750 756
751 757 return entry[5], entry[6]
752 758
753 759 # fast parentrevs(rev) where rev isn't filtered
754 760 _uncheckedparentrevs = parentrevs
755 761
756 762 def node(self, rev):
757 763 try:
758 764 return self.index[rev][7]
759 765 except IndexError:
760 766 if rev == wdirrev:
761 767 raise error.WdirUnsupported
762 768 raise
763 769
764 770 # Derived from index values.
765 771
766 772 def end(self, rev):
767 773 return self.start(rev) + self.length(rev)
768 774
769 775 def parents(self, node):
770 776 i = self.index
771 777 d = i[self.rev(node)]
772 778 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
773 779
774 780 def chainlen(self, rev):
775 781 return self._chaininfo(rev)[0]
776 782
777 783 def _chaininfo(self, rev):
778 784 chaininfocache = self._chaininfocache
779 785 if rev in chaininfocache:
780 786 return chaininfocache[rev]
781 787 index = self.index
782 788 generaldelta = self._generaldelta
783 789 iterrev = rev
784 790 e = index[iterrev]
785 791 clen = 0
786 792 compresseddeltalen = 0
787 793 while iterrev != e[3]:
788 794 clen += 1
789 795 compresseddeltalen += e[1]
790 796 if generaldelta:
791 797 iterrev = e[3]
792 798 else:
793 799 iterrev -= 1
794 800 if iterrev in chaininfocache:
795 801 t = chaininfocache[iterrev]
796 802 clen += t[0]
797 803 compresseddeltalen += t[1]
798 804 break
799 805 e = index[iterrev]
800 806 else:
801 807 # Add text length of base since decompressing that also takes
802 808 # work. For cache hits the length is already included.
803 809 compresseddeltalen += e[1]
804 810 r = (clen, compresseddeltalen)
805 811 chaininfocache[rev] = r
806 812 return r
807 813
808 814 def _deltachain(self, rev, stoprev=None):
809 815 """Obtain the delta chain for a revision.
810 816
811 817 ``stoprev`` specifies a revision to stop at. If not specified, we
812 818 stop at the base of the chain.
813 819
814 820 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
815 821 revs in ascending order and ``stopped`` is a bool indicating whether
816 822 ``stoprev`` was hit.
817 823 """
818 824 # Try C implementation.
819 825 try:
820 826 return self.index.deltachain(rev, stoprev, self._generaldelta)
821 827 except AttributeError:
822 828 pass
823 829
824 830 chain = []
825 831
826 832 # Alias to prevent attribute lookup in tight loop.
827 833 index = self.index
828 834 generaldelta = self._generaldelta
829 835
830 836 iterrev = rev
831 837 e = index[iterrev]
832 838 while iterrev != e[3] and iterrev != stoprev:
833 839 chain.append(iterrev)
834 840 if generaldelta:
835 841 iterrev = e[3]
836 842 else:
837 843 iterrev -= 1
838 844 e = index[iterrev]
839 845
840 846 if iterrev == stoprev:
841 847 stopped = True
842 848 else:
843 849 chain.append(iterrev)
844 850 stopped = False
845 851
846 852 chain.reverse()
847 853 return chain, stopped
848 854
849 855 def ancestors(self, revs, stoprev=0, inclusive=False):
850 856 """Generate the ancestors of 'revs' in reverse revision order.
851 857 Does not generate revs lower than stoprev.
852 858
853 859 See the documentation for ancestor.lazyancestors for more details."""
854 860
855 861 # first, make sure start revisions aren't filtered
856 862 revs = list(revs)
857 863 checkrev = self.node
858 864 for r in revs:
859 865 checkrev(r)
860 866 # and we're sure ancestors aren't filtered as well
861 867
862 868 if rustancestor is not None:
863 869 lazyancestors = rustancestor.LazyAncestors
864 870 arg = self.index
865 871 elif util.safehasattr(parsers, b'rustlazyancestors'):
866 872 lazyancestors = ancestor.rustlazyancestors
867 873 arg = self.index
868 874 else:
869 875 lazyancestors = ancestor.lazyancestors
870 876 arg = self._uncheckedparentrevs
871 877 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
872 878
873 879 def descendants(self, revs):
874 880 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
875 881
876 882 def findcommonmissing(self, common=None, heads=None):
877 883 """Return a tuple of the ancestors of common and the ancestors of heads
878 884 that are not ancestors of common. In revset terminology, we return the
879 885 tuple:
880 886
881 887 ::common, (::heads) - (::common)
882 888
883 889 The list is sorted by revision number, meaning it is
884 890 topologically sorted.
885 891
886 892 'heads' and 'common' are both lists of node IDs. If heads is
887 893 not supplied, uses all of the revlog's heads. If common is not
888 894 supplied, uses nullid."""
889 895 if common is None:
890 896 common = [nullid]
891 897 if heads is None:
892 898 heads = self.heads()
893 899
894 900 common = [self.rev(n) for n in common]
895 901 heads = [self.rev(n) for n in heads]
896 902
897 903 # we want the ancestors, but inclusive
898 904 class lazyset(object):
899 905 def __init__(self, lazyvalues):
900 906 self.addedvalues = set()
901 907 self.lazyvalues = lazyvalues
902 908
903 909 def __contains__(self, value):
904 910 return value in self.addedvalues or value in self.lazyvalues
905 911
906 912 def __iter__(self):
907 913 added = self.addedvalues
908 914 for r in added:
909 915 yield r
910 916 for r in self.lazyvalues:
911 917 if not r in added:
912 918 yield r
913 919
914 920 def add(self, value):
915 921 self.addedvalues.add(value)
916 922
917 923 def update(self, values):
918 924 self.addedvalues.update(values)
919 925
920 926 has = lazyset(self.ancestors(common))
921 927 has.add(nullrev)
922 928 has.update(common)
923 929
924 930 # take all ancestors from heads that aren't in has
925 931 missing = set()
926 932 visit = collections.deque(r for r in heads if r not in has)
927 933 while visit:
928 934 r = visit.popleft()
929 935 if r in missing:
930 936 continue
931 937 else:
932 938 missing.add(r)
933 939 for p in self.parentrevs(r):
934 940 if p not in has:
935 941 visit.append(p)
936 942 missing = list(missing)
937 943 missing.sort()
938 944 return has, [self.node(miss) for miss in missing]
939 945
940 946 def incrementalmissingrevs(self, common=None):
941 947 """Return an object that can be used to incrementally compute the
942 948 revision numbers of the ancestors of arbitrary sets that are not
943 949 ancestors of common. This is an ancestor.incrementalmissingancestors
944 950 object.
945 951
946 952 'common' is a list of revision numbers. If common is not supplied, uses
947 953 nullrev.
948 954 """
949 955 if common is None:
950 956 common = [nullrev]
951 957
952 958 if rustancestor is not None:
953 959 return rustancestor.MissingAncestors(self.index, common)
954 960 return ancestor.incrementalmissingancestors(self.parentrevs, common)
955 961
956 962 def findmissingrevs(self, common=None, heads=None):
957 963 """Return the revision numbers of the ancestors of heads that
958 964 are not ancestors of common.
959 965
960 966 More specifically, return a list of revision numbers corresponding to
961 967 nodes N such that every N satisfies the following constraints:
962 968
963 969 1. N is an ancestor of some node in 'heads'
964 970 2. N is not an ancestor of any node in 'common'
965 971
966 972 The list is sorted by revision number, meaning it is
967 973 topologically sorted.
968 974
969 975 'heads' and 'common' are both lists of revision numbers. If heads is
970 976 not supplied, uses all of the revlog's heads. If common is not
971 977 supplied, uses nullid."""
972 978 if common is None:
973 979 common = [nullrev]
974 980 if heads is None:
975 981 heads = self.headrevs()
976 982
977 983 inc = self.incrementalmissingrevs(common=common)
978 984 return inc.missingancestors(heads)
979 985
980 986 def findmissing(self, common=None, heads=None):
981 987 """Return the ancestors of heads that are not ancestors of common.
982 988
983 989 More specifically, return a list of nodes N such that every N
984 990 satisfies the following constraints:
985 991
986 992 1. N is an ancestor of some node in 'heads'
987 993 2. N is not an ancestor of any node in 'common'
988 994
989 995 The list is sorted by revision number, meaning it is
990 996 topologically sorted.
991 997
992 998 'heads' and 'common' are both lists of node IDs. If heads is
993 999 not supplied, uses all of the revlog's heads. If common is not
994 1000 supplied, uses nullid."""
995 1001 if common is None:
996 1002 common = [nullid]
997 1003 if heads is None:
998 1004 heads = self.heads()
999 1005
1000 1006 common = [self.rev(n) for n in common]
1001 1007 heads = [self.rev(n) for n in heads]
1002 1008
1003 1009 inc = self.incrementalmissingrevs(common=common)
1004 1010 return [self.node(r) for r in inc.missingancestors(heads)]
1005 1011
1006 1012 def nodesbetween(self, roots=None, heads=None):
1007 1013 """Return a topological path from 'roots' to 'heads'.
1008 1014
1009 1015 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1010 1016 topologically sorted list of all nodes N that satisfy both of
1011 1017 these constraints:
1012 1018
1013 1019 1. N is a descendant of some node in 'roots'
1014 1020 2. N is an ancestor of some node in 'heads'
1015 1021
1016 1022 Every node is considered to be both a descendant and an ancestor
1017 1023 of itself, so every reachable node in 'roots' and 'heads' will be
1018 1024 included in 'nodes'.
1019 1025
1020 1026 'outroots' is the list of reachable nodes in 'roots', i.e., the
1021 1027 subset of 'roots' that is returned in 'nodes'. Likewise,
1022 1028 'outheads' is the subset of 'heads' that is also in 'nodes'.
1023 1029
1024 1030 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1025 1031 unspecified, uses nullid as the only root. If 'heads' is
1026 1032 unspecified, uses list of all of the revlog's heads."""
1027 1033 nonodes = ([], [], [])
1028 1034 if roots is not None:
1029 1035 roots = list(roots)
1030 1036 if not roots:
1031 1037 return nonodes
1032 1038 lowestrev = min([self.rev(n) for n in roots])
1033 1039 else:
1034 1040 roots = [nullid] # Everybody's a descendant of nullid
1035 1041 lowestrev = nullrev
1036 1042 if (lowestrev == nullrev) and (heads is None):
1037 1043 # We want _all_ the nodes!
1038 1044 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1039 1045 if heads is None:
1040 1046 # All nodes are ancestors, so the latest ancestor is the last
1041 1047 # node.
1042 1048 highestrev = len(self) - 1
1043 1049 # Set ancestors to None to signal that every node is an ancestor.
1044 1050 ancestors = None
1045 1051 # Set heads to an empty dictionary for later discovery of heads
1046 1052 heads = {}
1047 1053 else:
1048 1054 heads = list(heads)
1049 1055 if not heads:
1050 1056 return nonodes
1051 1057 ancestors = set()
1052 1058 # Turn heads into a dictionary so we can remove 'fake' heads.
1053 1059 # Also, later we will be using it to filter out the heads we can't
1054 1060 # find from roots.
1055 1061 heads = dict.fromkeys(heads, False)
1056 1062 # Start at the top and keep marking parents until we're done.
1057 1063 nodestotag = set(heads)
1058 1064 # Remember where the top was so we can use it as a limit later.
1059 1065 highestrev = max([self.rev(n) for n in nodestotag])
1060 1066 while nodestotag:
1061 1067 # grab a node to tag
1062 1068 n = nodestotag.pop()
1063 1069 # Never tag nullid
1064 1070 if n == nullid:
1065 1071 continue
1066 1072 # A node's revision number represents its place in a
1067 1073 # topologically sorted list of nodes.
1068 1074 r = self.rev(n)
1069 1075 if r >= lowestrev:
1070 1076 if n not in ancestors:
1071 1077 # If we are possibly a descendant of one of the roots
1072 1078 # and we haven't already been marked as an ancestor
1073 1079 ancestors.add(n) # Mark as ancestor
1074 1080 # Add non-nullid parents to list of nodes to tag.
1075 1081 nodestotag.update(
1076 1082 [p for p in self.parents(n) if p != nullid]
1077 1083 )
1078 1084 elif n in heads: # We've seen it before, is it a fake head?
1079 1085 # So it is, real heads should not be the ancestors of
1080 1086 # any other heads.
1081 1087 heads.pop(n)
1082 1088 if not ancestors:
1083 1089 return nonodes
1084 1090 # Now that we have our set of ancestors, we want to remove any
1085 1091 # roots that are not ancestors.
1086 1092
1087 1093 # If one of the roots was nullid, everything is included anyway.
1088 1094 if lowestrev > nullrev:
1089 1095 # But, since we weren't, let's recompute the lowest rev to not
1090 1096 # include roots that aren't ancestors.
1091 1097
1092 1098 # Filter out roots that aren't ancestors of heads
1093 1099 roots = [root for root in roots if root in ancestors]
1094 1100 # Recompute the lowest revision
1095 1101 if roots:
1096 1102 lowestrev = min([self.rev(root) for root in roots])
1097 1103 else:
1098 1104 # No more roots? Return empty list
1099 1105 return nonodes
1100 1106 else:
1101 1107 # We are descending from nullid, and don't need to care about
1102 1108 # any other roots.
1103 1109 lowestrev = nullrev
1104 1110 roots = [nullid]
1105 1111 # Transform our roots list into a set.
1106 1112 descendants = set(roots)
1107 1113 # Also, keep the original roots so we can filter out roots that aren't
1108 1114 # 'real' roots (i.e. are descended from other roots).
1109 1115 roots = descendants.copy()
1110 1116 # Our topologically sorted list of output nodes.
1111 1117 orderedout = []
1112 1118 # Don't start at nullid since we don't want nullid in our output list,
1113 1119 # and if nullid shows up in descendants, empty parents will look like
1114 1120 # they're descendants.
1115 1121 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1116 1122 n = self.node(r)
1117 1123 isdescendant = False
1118 1124 if lowestrev == nullrev: # Everybody is a descendant of nullid
1119 1125 isdescendant = True
1120 1126 elif n in descendants:
1121 1127 # n is already a descendant
1122 1128 isdescendant = True
1123 1129 # This check only needs to be done here because all the roots
1124 1130 # will start being marked is descendants before the loop.
1125 1131 if n in roots:
1126 1132 # If n was a root, check if it's a 'real' root.
1127 1133 p = tuple(self.parents(n))
1128 1134 # If any of its parents are descendants, it's not a root.
1129 1135 if (p[0] in descendants) or (p[1] in descendants):
1130 1136 roots.remove(n)
1131 1137 else:
1132 1138 p = tuple(self.parents(n))
1133 1139 # A node is a descendant if either of its parents are
1134 1140 # descendants. (We seeded the dependents list with the roots
1135 1141 # up there, remember?)
1136 1142 if (p[0] in descendants) or (p[1] in descendants):
1137 1143 descendants.add(n)
1138 1144 isdescendant = True
1139 1145 if isdescendant and ((ancestors is None) or (n in ancestors)):
1140 1146 # Only include nodes that are both descendants and ancestors.
1141 1147 orderedout.append(n)
1142 1148 if (ancestors is not None) and (n in heads):
1143 1149 # We're trying to figure out which heads are reachable
1144 1150 # from roots.
1145 1151 # Mark this head as having been reached
1146 1152 heads[n] = True
1147 1153 elif ancestors is None:
1148 1154 # Otherwise, we're trying to discover the heads.
1149 1155 # Assume this is a head because if it isn't, the next step
1150 1156 # will eventually remove it.
1151 1157 heads[n] = True
1152 1158 # But, obviously its parents aren't.
1153 1159 for p in self.parents(n):
1154 1160 heads.pop(p, None)
1155 1161 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1156 1162 roots = list(roots)
1157 1163 assert orderedout
1158 1164 assert roots
1159 1165 assert heads
1160 1166 return (orderedout, roots, heads)
1161 1167
1162 1168 def headrevs(self, revs=None):
1163 1169 if revs is None:
1164 1170 try:
1165 1171 return self.index.headrevs()
1166 1172 except AttributeError:
1167 1173 return self._headrevs()
1168 1174 if rustdagop is not None:
1169 1175 return rustdagop.headrevs(self.index, revs)
1170 1176 return dagop.headrevs(revs, self._uncheckedparentrevs)
1171 1177
1172 1178 def computephases(self, roots):
1173 1179 return self.index.computephasesmapsets(roots)
1174 1180
1175 1181 def _headrevs(self):
1176 1182 count = len(self)
1177 1183 if not count:
1178 1184 return [nullrev]
1179 1185 # we won't iter over filtered rev so nobody is a head at start
1180 1186 ishead = [0] * (count + 1)
1181 1187 index = self.index
1182 1188 for r in self:
1183 1189 ishead[r] = 1 # I may be an head
1184 1190 e = index[r]
1185 1191 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1186 1192 return [r for r, val in enumerate(ishead) if val]
1187 1193
1188 1194 def heads(self, start=None, stop=None):
1189 1195 """return the list of all nodes that have no children
1190 1196
1191 1197 if start is specified, only heads that are descendants of
1192 1198 start will be returned
1193 1199 if stop is specified, it will consider all the revs from stop
1194 1200 as if they had no children
1195 1201 """
1196 1202 if start is None and stop is None:
1197 1203 if not len(self):
1198 1204 return [nullid]
1199 1205 return [self.node(r) for r in self.headrevs()]
1200 1206
1201 1207 if start is None:
1202 1208 start = nullrev
1203 1209 else:
1204 1210 start = self.rev(start)
1205 1211
1206 1212 stoprevs = set(self.rev(n) for n in stop or [])
1207 1213
1208 1214 revs = dagop.headrevssubset(
1209 1215 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1210 1216 )
1211 1217
1212 1218 return [self.node(rev) for rev in revs]
1213 1219
1214 1220 def children(self, node):
1215 1221 """find the children of a given node"""
1216 1222 c = []
1217 1223 p = self.rev(node)
1218 1224 for r in self.revs(start=p + 1):
1219 1225 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1220 1226 if prevs:
1221 1227 for pr in prevs:
1222 1228 if pr == p:
1223 1229 c.append(self.node(r))
1224 1230 elif p == nullrev:
1225 1231 c.append(self.node(r))
1226 1232 return c
1227 1233
1228 1234 def commonancestorsheads(self, a, b):
1229 1235 """calculate all the heads of the common ancestors of nodes a and b"""
1230 1236 a, b = self.rev(a), self.rev(b)
1231 1237 ancs = self._commonancestorsheads(a, b)
1232 1238 return pycompat.maplist(self.node, ancs)
1233 1239
1234 1240 def _commonancestorsheads(self, *revs):
1235 1241 """calculate all the heads of the common ancestors of revs"""
1236 1242 try:
1237 1243 ancs = self.index.commonancestorsheads(*revs)
1238 1244 except (AttributeError, OverflowError): # C implementation failed
1239 1245 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1240 1246 return ancs
1241 1247
1242 1248 def isancestor(self, a, b):
1243 1249 """return True if node a is an ancestor of node b
1244 1250
1245 1251 A revision is considered an ancestor of itself."""
1246 1252 a, b = self.rev(a), self.rev(b)
1247 1253 return self.isancestorrev(a, b)
1248 1254
1249 1255 def isancestorrev(self, a, b):
1250 1256 """return True if revision a is an ancestor of revision b
1251 1257
1252 1258 A revision is considered an ancestor of itself.
1253 1259
1254 1260 The implementation of this is trivial but the use of
1255 1261 reachableroots is not."""
1256 1262 if a == nullrev:
1257 1263 return True
1258 1264 elif a == b:
1259 1265 return True
1260 1266 elif a > b:
1261 1267 return False
1262 1268 return bool(self.reachableroots(a, [b], [a], includepath=False))
1263 1269
1264 1270 def reachableroots(self, minroot, heads, roots, includepath=False):
1265 1271 """return (heads(::<roots> and <roots>::<heads>))
1266 1272
1267 1273 If includepath is True, return (<roots>::<heads>)."""
1268 1274 try:
1269 1275 return self.index.reachableroots2(
1270 1276 minroot, heads, roots, includepath
1271 1277 )
1272 1278 except AttributeError:
1273 1279 return dagop._reachablerootspure(
1274 1280 self.parentrevs, minroot, roots, heads, includepath
1275 1281 )
1276 1282
1277 1283 def ancestor(self, a, b):
1278 1284 """calculate the "best" common ancestor of nodes a and b"""
1279 1285
1280 1286 a, b = self.rev(a), self.rev(b)
1281 1287 try:
1282 1288 ancs = self.index.ancestors(a, b)
1283 1289 except (AttributeError, OverflowError):
1284 1290 ancs = ancestor.ancestors(self.parentrevs, a, b)
1285 1291 if ancs:
1286 1292 # choose a consistent winner when there's a tie
1287 1293 return min(map(self.node, ancs))
1288 1294 return nullid
1289 1295
1290 1296 def _match(self, id):
1291 1297 if isinstance(id, int):
1292 1298 # rev
1293 1299 return self.node(id)
1294 1300 if len(id) == 20:
1295 1301 # possibly a binary node
1296 1302 # odds of a binary node being all hex in ASCII are 1 in 10**25
1297 1303 try:
1298 1304 node = id
1299 1305 self.rev(node) # quick search the index
1300 1306 return node
1301 1307 except error.LookupError:
1302 1308 pass # may be partial hex id
1303 1309 try:
1304 1310 # str(rev)
1305 1311 rev = int(id)
1306 1312 if b"%d" % rev != id:
1307 1313 raise ValueError
1308 1314 if rev < 0:
1309 1315 rev = len(self) + rev
1310 1316 if rev < 0 or rev >= len(self):
1311 1317 raise ValueError
1312 1318 return self.node(rev)
1313 1319 except (ValueError, OverflowError):
1314 1320 pass
1315 1321 if len(id) == 40:
1316 1322 try:
1317 1323 # a full hex nodeid?
1318 1324 node = bin(id)
1319 1325 self.rev(node)
1320 1326 return node
1321 1327 except (TypeError, error.LookupError):
1322 1328 pass
1323 1329
1324 1330 def _partialmatch(self, id):
1325 1331 # we don't care wdirfilenodeids as they should be always full hash
1326 1332 maybewdir = wdirhex.startswith(id)
1327 1333 try:
1328 1334 partial = self.index.partialmatch(id)
1329 1335 if partial and self.hasnode(partial):
1330 1336 if maybewdir:
1331 1337 # single 'ff...' match in radix tree, ambiguous with wdir
1332 1338 raise error.RevlogError
1333 1339 return partial
1334 1340 if maybewdir:
1335 1341 # no 'ff...' match in radix tree, wdir identified
1336 1342 raise error.WdirUnsupported
1337 1343 return None
1338 1344 except error.RevlogError:
1339 1345 # parsers.c radix tree lookup gave multiple matches
1340 1346 # fast path: for unfiltered changelog, radix tree is accurate
1341 1347 if not getattr(self, 'filteredrevs', None):
1342 1348 raise error.AmbiguousPrefixLookupError(
1343 1349 id, self.indexfile, _(b'ambiguous identifier')
1344 1350 )
1345 1351 # fall through to slow path that filters hidden revisions
1346 1352 except (AttributeError, ValueError):
1347 1353 # we are pure python, or key was too short to search radix tree
1348 1354 pass
1349 1355
1350 1356 if id in self._pcache:
1351 1357 return self._pcache[id]
1352 1358
1353 1359 if len(id) <= 40:
1354 1360 try:
1355 1361 # hex(node)[:...]
1356 1362 l = len(id) // 2 # grab an even number of digits
1357 1363 prefix = bin(id[: l * 2])
1358 1364 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1359 1365 nl = [
1360 1366 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1361 1367 ]
1362 1368 if nullhex.startswith(id):
1363 1369 nl.append(nullid)
1364 1370 if len(nl) > 0:
1365 1371 if len(nl) == 1 and not maybewdir:
1366 1372 self._pcache[id] = nl[0]
1367 1373 return nl[0]
1368 1374 raise error.AmbiguousPrefixLookupError(
1369 1375 id, self.indexfile, _(b'ambiguous identifier')
1370 1376 )
1371 1377 if maybewdir:
1372 1378 raise error.WdirUnsupported
1373 1379 return None
1374 1380 except TypeError:
1375 1381 pass
1376 1382
1377 1383 def lookup(self, id):
1378 1384 """locate a node based on:
1379 1385 - revision number or str(revision number)
1380 1386 - nodeid or subset of hex nodeid
1381 1387 """
1382 1388 n = self._match(id)
1383 1389 if n is not None:
1384 1390 return n
1385 1391 n = self._partialmatch(id)
1386 1392 if n:
1387 1393 return n
1388 1394
1389 1395 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1390 1396
1391 1397 def shortest(self, node, minlength=1):
1392 1398 """Find the shortest unambiguous prefix that matches node."""
1393 1399
1394 1400 def isvalid(prefix):
1395 1401 try:
1396 1402 matchednode = self._partialmatch(prefix)
1397 1403 except error.AmbiguousPrefixLookupError:
1398 1404 return False
1399 1405 except error.WdirUnsupported:
1400 1406 # single 'ff...' match
1401 1407 return True
1402 1408 if matchednode is None:
1403 1409 raise error.LookupError(node, self.indexfile, _(b'no node'))
1404 1410 return True
1405 1411
1406 1412 def maybewdir(prefix):
1407 1413 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1408 1414
1409 1415 hexnode = hex(node)
1410 1416
1411 1417 def disambiguate(hexnode, minlength):
1412 1418 """Disambiguate against wdirid."""
1413 1419 for length in range(minlength, 41):
1414 1420 prefix = hexnode[:length]
1415 1421 if not maybewdir(prefix):
1416 1422 return prefix
1417 1423
1418 1424 if not getattr(self, 'filteredrevs', None):
1419 1425 try:
1420 1426 length = max(self.index.shortest(node), minlength)
1421 1427 return disambiguate(hexnode, length)
1422 1428 except error.RevlogError:
1423 1429 if node != wdirid:
1424 1430 raise error.LookupError(node, self.indexfile, _(b'no node'))
1425 1431 except AttributeError:
1426 1432 # Fall through to pure code
1427 1433 pass
1428 1434
1429 1435 if node == wdirid:
1430 1436 for length in range(minlength, 41):
1431 1437 prefix = hexnode[:length]
1432 1438 if isvalid(prefix):
1433 1439 return prefix
1434 1440
1435 1441 for length in range(minlength, 41):
1436 1442 prefix = hexnode[:length]
1437 1443 if isvalid(prefix):
1438 1444 return disambiguate(hexnode, length)
1439 1445
1440 1446 def cmp(self, node, text):
1441 1447 """compare text with a given file revision
1442 1448
1443 1449 returns True if text is different than what is stored.
1444 1450 """
1445 1451 p1, p2 = self.parents(node)
1446 1452 return storageutil.hashrevisionsha1(text, p1, p2) != node
1447 1453
1448 1454 def _cachesegment(self, offset, data):
1449 1455 """Add a segment to the revlog cache.
1450 1456
1451 1457 Accepts an absolute offset and the data that is at that location.
1452 1458 """
1453 1459 o, d = self._chunkcache
1454 1460 # try to add to existing cache
1455 1461 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1456 1462 self._chunkcache = o, d + data
1457 1463 else:
1458 1464 self._chunkcache = offset, data
1459 1465
1460 1466 def _readsegment(self, offset, length, df=None):
1461 1467 """Load a segment of raw data from the revlog.
1462 1468
1463 1469 Accepts an absolute offset, length to read, and an optional existing
1464 1470 file handle to read from.
1465 1471
1466 1472 If an existing file handle is passed, it will be seeked and the
1467 1473 original seek position will NOT be restored.
1468 1474
1469 1475 Returns a str or buffer of raw byte data.
1470 1476
1471 1477 Raises if the requested number of bytes could not be read.
1472 1478 """
1473 1479 # Cache data both forward and backward around the requested
1474 1480 # data, in a fixed size window. This helps speed up operations
1475 1481 # involving reading the revlog backwards.
1476 1482 cachesize = self._chunkcachesize
1477 1483 realoffset = offset & ~(cachesize - 1)
1478 1484 reallength = (
1479 1485 (offset + length + cachesize) & ~(cachesize - 1)
1480 1486 ) - realoffset
1481 1487 with self._datareadfp(df) as df:
1482 1488 df.seek(realoffset)
1483 1489 d = df.read(reallength)
1484 1490
1485 1491 self._cachesegment(realoffset, d)
1486 1492 if offset != realoffset or reallength != length:
1487 1493 startoffset = offset - realoffset
1488 1494 if len(d) - startoffset < length:
1489 1495 raise error.RevlogError(
1490 1496 _(
1491 1497 b'partial read of revlog %s; expected %d bytes from '
1492 1498 b'offset %d, got %d'
1493 1499 )
1494 1500 % (
1495 1501 self.indexfile if self._inline else self.datafile,
1496 1502 length,
1497 1503 realoffset,
1498 1504 len(d) - startoffset,
1499 1505 )
1500 1506 )
1501 1507
1502 1508 return util.buffer(d, startoffset, length)
1503 1509
1504 1510 if len(d) < length:
1505 1511 raise error.RevlogError(
1506 1512 _(
1507 1513 b'partial read of revlog %s; expected %d bytes from offset '
1508 1514 b'%d, got %d'
1509 1515 )
1510 1516 % (
1511 1517 self.indexfile if self._inline else self.datafile,
1512 1518 length,
1513 1519 offset,
1514 1520 len(d),
1515 1521 )
1516 1522 )
1517 1523
1518 1524 return d
1519 1525
1520 1526 def _getsegment(self, offset, length, df=None):
1521 1527 """Obtain a segment of raw data from the revlog.
1522 1528
1523 1529 Accepts an absolute offset, length of bytes to obtain, and an
1524 1530 optional file handle to the already-opened revlog. If the file
1525 1531 handle is used, it's original seek position will not be preserved.
1526 1532
1527 1533 Requests for data may be returned from a cache.
1528 1534
1529 1535 Returns a str or a buffer instance of raw byte data.
1530 1536 """
1531 1537 o, d = self._chunkcache
1532 1538 l = len(d)
1533 1539
1534 1540 # is it in the cache?
1535 1541 cachestart = offset - o
1536 1542 cacheend = cachestart + length
1537 1543 if cachestart >= 0 and cacheend <= l:
1538 1544 if cachestart == 0 and cacheend == l:
1539 1545 return d # avoid a copy
1540 1546 return util.buffer(d, cachestart, cacheend - cachestart)
1541 1547
1542 1548 return self._readsegment(offset, length, df=df)
1543 1549
1544 1550 def _getsegmentforrevs(self, startrev, endrev, df=None):
1545 1551 """Obtain a segment of raw data corresponding to a range of revisions.
1546 1552
1547 1553 Accepts the start and end revisions and an optional already-open
1548 1554 file handle to be used for reading. If the file handle is read, its
1549 1555 seek position will not be preserved.
1550 1556
1551 1557 Requests for data may be satisfied by a cache.
1552 1558
1553 1559 Returns a 2-tuple of (offset, data) for the requested range of
1554 1560 revisions. Offset is the integer offset from the beginning of the
1555 1561 revlog and data is a str or buffer of the raw byte data.
1556 1562
1557 1563 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1558 1564 to determine where each revision's data begins and ends.
1559 1565 """
1560 1566 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1561 1567 # (functions are expensive).
1562 1568 index = self.index
1563 1569 istart = index[startrev]
1564 1570 start = int(istart[0] >> 16)
1565 1571 if startrev == endrev:
1566 1572 end = start + istart[1]
1567 1573 else:
1568 1574 iend = index[endrev]
1569 1575 end = int(iend[0] >> 16) + iend[1]
1570 1576
1571 1577 if self._inline:
1572 1578 start += (startrev + 1) * self._io.size
1573 1579 end += (endrev + 1) * self._io.size
1574 1580 length = end - start
1575 1581
1576 1582 return start, self._getsegment(start, length, df=df)
1577 1583
1578 1584 def _chunk(self, rev, df=None):
1579 1585 """Obtain a single decompressed chunk for a revision.
1580 1586
1581 1587 Accepts an integer revision and an optional already-open file handle
1582 1588 to be used for reading. If used, the seek position of the file will not
1583 1589 be preserved.
1584 1590
1585 1591 Returns a str holding uncompressed data for the requested revision.
1586 1592 """
1587 1593 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1588 1594
1589 1595 def _chunks(self, revs, df=None, targetsize=None):
1590 1596 """Obtain decompressed chunks for the specified revisions.
1591 1597
1592 1598 Accepts an iterable of numeric revisions that are assumed to be in
1593 1599 ascending order. Also accepts an optional already-open file handle
1594 1600 to be used for reading. If used, the seek position of the file will
1595 1601 not be preserved.
1596 1602
1597 1603 This function is similar to calling ``self._chunk()`` multiple times,
1598 1604 but is faster.
1599 1605
1600 1606 Returns a list with decompressed data for each requested revision.
1601 1607 """
1602 1608 if not revs:
1603 1609 return []
1604 1610 start = self.start
1605 1611 length = self.length
1606 1612 inline = self._inline
1607 1613 iosize = self._io.size
1608 1614 buffer = util.buffer
1609 1615
1610 1616 l = []
1611 1617 ladd = l.append
1612 1618
1613 1619 if not self._withsparseread:
1614 1620 slicedchunks = (revs,)
1615 1621 else:
1616 1622 slicedchunks = deltautil.slicechunk(
1617 1623 self, revs, targetsize=targetsize
1618 1624 )
1619 1625
1620 1626 for revschunk in slicedchunks:
1621 1627 firstrev = revschunk[0]
1622 1628 # Skip trailing revisions with empty diff
1623 1629 for lastrev in revschunk[::-1]:
1624 1630 if length(lastrev) != 0:
1625 1631 break
1626 1632
1627 1633 try:
1628 1634 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1629 1635 except OverflowError:
1630 1636 # issue4215 - we can't cache a run of chunks greater than
1631 1637 # 2G on Windows
1632 1638 return [self._chunk(rev, df=df) for rev in revschunk]
1633 1639
1634 1640 decomp = self.decompress
1635 1641 for rev in revschunk:
1636 1642 chunkstart = start(rev)
1637 1643 if inline:
1638 1644 chunkstart += (rev + 1) * iosize
1639 1645 chunklength = length(rev)
1640 1646 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1641 1647
1642 1648 return l
1643 1649
1644 1650 def _chunkclear(self):
1645 1651 """Clear the raw chunk cache."""
1646 1652 self._chunkcache = (0, b'')
1647 1653
1648 1654 def deltaparent(self, rev):
1649 1655 """return deltaparent of the given revision"""
1650 1656 base = self.index[rev][3]
1651 1657 if base == rev:
1652 1658 return nullrev
1653 1659 elif self._generaldelta:
1654 1660 return base
1655 1661 else:
1656 1662 return rev - 1
1657 1663
1658 1664 def issnapshot(self, rev):
1659 1665 """tells whether rev is a snapshot
1660 1666 """
1661 1667 if not self._sparserevlog:
1662 1668 return self.deltaparent(rev) == nullrev
1663 1669 elif util.safehasattr(self.index, b'issnapshot'):
1664 1670 # directly assign the method to cache the testing and access
1665 1671 self.issnapshot = self.index.issnapshot
1666 1672 return self.issnapshot(rev)
1667 1673 if rev == nullrev:
1668 1674 return True
1669 1675 entry = self.index[rev]
1670 1676 base = entry[3]
1671 1677 if base == rev:
1672 1678 return True
1673 1679 if base == nullrev:
1674 1680 return True
1675 1681 p1 = entry[5]
1676 1682 p2 = entry[6]
1677 1683 if base == p1 or base == p2:
1678 1684 return False
1679 1685 return self.issnapshot(base)
1680 1686
1681 1687 def snapshotdepth(self, rev):
1682 1688 """number of snapshot in the chain before this one"""
1683 1689 if not self.issnapshot(rev):
1684 1690 raise error.ProgrammingError(b'revision %d not a snapshot')
1685 1691 return len(self._deltachain(rev)[0]) - 1
1686 1692
1687 1693 def revdiff(self, rev1, rev2):
1688 1694 """return or calculate a delta between two revisions
1689 1695
1690 1696 The delta calculated is in binary form and is intended to be written to
1691 1697 revlog data directly. So this function needs raw revision data.
1692 1698 """
1693 1699 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1694 1700 return bytes(self._chunk(rev2))
1695 1701
1696 1702 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1697 1703
1698 1704 def _processflags(self, text, flags, operation, raw=False):
1699 1705 """deprecated entry point to access flag processors"""
1700 1706 msg = b'_processflag(...) use the specialized variant'
1701 1707 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1702 1708 if raw:
1703 1709 return text, flagutil.processflagsraw(self, text, flags)
1704 1710 elif operation == b'read':
1705 1711 return flagutil.processflagsread(self, text, flags)
1706 1712 else: # write operation
1707 1713 return flagutil.processflagswrite(self, text, flags)
1708 1714
1709 1715 def revision(self, nodeorrev, _df=None, raw=False):
1710 1716 """return an uncompressed revision of a given node or revision
1711 1717 number.
1712 1718
1713 1719 _df - an existing file handle to read from. (internal-only)
1714 1720 raw - an optional argument specifying if the revision data is to be
1715 1721 treated as raw data when applying flag transforms. 'raw' should be set
1716 1722 to True when generating changegroups or in debug commands.
1717 1723 """
1718 1724 if raw:
1719 1725 msg = (
1720 1726 b'revlog.revision(..., raw=True) is deprecated, '
1721 1727 b'use revlog.rawdata(...)'
1722 1728 )
1723 1729 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1724 1730 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1725 1731
1726 1732 def sidedata(self, nodeorrev, _df=None):
1727 1733 """a map of extra data related to the changeset but not part of the hash
1728 1734
1729 1735 This function currently return a dictionary. However, more advanced
1730 1736 mapping object will likely be used in the future for a more
1731 1737 efficient/lazy code.
1732 1738 """
1733 1739 return self._revisiondata(nodeorrev, _df)[1]
1734 1740
1735 1741 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1736 1742 # deal with <nodeorrev> argument type
1737 1743 if isinstance(nodeorrev, int):
1738 1744 rev = nodeorrev
1739 1745 node = self.node(rev)
1740 1746 else:
1741 1747 node = nodeorrev
1742 1748 rev = None
1743 1749
1744 1750 # fast path the special `nullid` rev
1745 1751 if node == nullid:
1746 1752 return b"", {}
1747 1753
1748 1754 # The text as stored inside the revlog. Might be the revision or might
1749 1755 # need to be processed to retrieve the revision.
1750 1756 rawtext = None
1751 1757
1752 1758 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1753 1759
1754 1760 if raw and validated:
1755 1761 # if we don't want to process the raw text and that raw
1756 1762 # text is cached, we can exit early.
1757 1763 return rawtext, {}
1758 1764 if rev is None:
1759 1765 rev = self.rev(node)
1760 1766 # the revlog's flag for this revision
1761 1767 # (usually alter its state or content)
1762 1768 flags = self.flags(rev)
1763 1769
1764 1770 if validated and flags == REVIDX_DEFAULT_FLAGS:
1765 1771 # no extra flags set, no flag processor runs, text = rawtext
1766 1772 return rawtext, {}
1767 1773
1768 1774 sidedata = {}
1769 1775 if raw:
1770 1776 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1771 1777 text = rawtext
1772 1778 else:
1773 1779 try:
1774 1780 r = flagutil.processflagsread(self, rawtext, flags)
1775 1781 except error.SidedataHashError as exc:
1776 1782 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1777 1783 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1778 1784 raise error.RevlogError(msg)
1779 1785 text, validatehash, sidedata = r
1780 1786 if validatehash:
1781 1787 self.checkhash(text, node, rev=rev)
1782 1788 if not validated:
1783 1789 self._revisioncache = (node, rev, rawtext)
1784 1790
1785 1791 return text, sidedata
1786 1792
1787 1793 def _rawtext(self, node, rev, _df=None):
1788 1794 """return the possibly unvalidated rawtext for a revision
1789 1795
1790 1796 returns (rev, rawtext, validated)
1791 1797 """
1792 1798
1793 1799 # revision in the cache (could be useful to apply delta)
1794 1800 cachedrev = None
1795 1801 # An intermediate text to apply deltas to
1796 1802 basetext = None
1797 1803
1798 1804 # Check if we have the entry in cache
1799 1805 # The cache entry looks like (node, rev, rawtext)
1800 1806 if self._revisioncache:
1801 1807 if self._revisioncache[0] == node:
1802 1808 return (rev, self._revisioncache[2], True)
1803 1809 cachedrev = self._revisioncache[1]
1804 1810
1805 1811 if rev is None:
1806 1812 rev = self.rev(node)
1807 1813
1808 1814 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1809 1815 if stopped:
1810 1816 basetext = self._revisioncache[2]
1811 1817
1812 1818 # drop cache to save memory, the caller is expected to
1813 1819 # update self._revisioncache after validating the text
1814 1820 self._revisioncache = None
1815 1821
1816 1822 targetsize = None
1817 1823 rawsize = self.index[rev][2]
1818 1824 if 0 <= rawsize:
1819 1825 targetsize = 4 * rawsize
1820 1826
1821 1827 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1822 1828 if basetext is None:
1823 1829 basetext = bytes(bins[0])
1824 1830 bins = bins[1:]
1825 1831
1826 1832 rawtext = mdiff.patches(basetext, bins)
1827 1833 del basetext # let us have a chance to free memory early
1828 1834 return (rev, rawtext, False)
1829 1835
1830 1836 def rawdata(self, nodeorrev, _df=None):
1831 1837 """return an uncompressed raw data of a given node or revision number.
1832 1838
1833 1839 _df - an existing file handle to read from. (internal-only)
1834 1840 """
1835 1841 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1836 1842
1837 1843 def hash(self, text, p1, p2):
1838 1844 """Compute a node hash.
1839 1845
1840 1846 Available as a function so that subclasses can replace the hash
1841 1847 as needed.
1842 1848 """
1843 1849 return storageutil.hashrevisionsha1(text, p1, p2)
1844 1850
1845 1851 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1846 1852 """Check node hash integrity.
1847 1853
1848 1854 Available as a function so that subclasses can extend hash mismatch
1849 1855 behaviors as needed.
1850 1856 """
1851 1857 try:
1852 1858 if p1 is None and p2 is None:
1853 1859 p1, p2 = self.parents(node)
1854 1860 if node != self.hash(text, p1, p2):
1855 1861 # Clear the revision cache on hash failure. The revision cache
1856 1862 # only stores the raw revision and clearing the cache does have
1857 1863 # the side-effect that we won't have a cache hit when the raw
1858 1864 # revision data is accessed. But this case should be rare and
1859 1865 # it is extra work to teach the cache about the hash
1860 1866 # verification state.
1861 1867 if self._revisioncache and self._revisioncache[0] == node:
1862 1868 self._revisioncache = None
1863 1869
1864 1870 revornode = rev
1865 1871 if revornode is None:
1866 1872 revornode = templatefilters.short(hex(node))
1867 1873 raise error.RevlogError(
1868 1874 _(b"integrity check failed on %s:%s")
1869 1875 % (self.indexfile, pycompat.bytestr(revornode))
1870 1876 )
1871 1877 except error.RevlogError:
1872 1878 if self._censorable and storageutil.iscensoredtext(text):
1873 1879 raise error.CensoredNodeError(self.indexfile, node, text)
1874 1880 raise
1875 1881
1876 1882 def _enforceinlinesize(self, tr, fp=None):
1877 1883 """Check if the revlog is too big for inline and convert if so.
1878 1884
1879 1885 This should be called after revisions are added to the revlog. If the
1880 1886 revlog has grown too large to be an inline revlog, it will convert it
1881 1887 to use multiple index and data files.
1882 1888 """
1883 1889 tiprev = len(self) - 1
1884 1890 if (
1885 1891 not self._inline
1886 1892 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1887 1893 ):
1888 1894 return
1889 1895
1890 1896 trinfo = tr.find(self.indexfile)
1891 1897 if trinfo is None:
1892 1898 raise error.RevlogError(
1893 1899 _(b"%s not found in the transaction") % self.indexfile
1894 1900 )
1895 1901
1896 1902 trindex = trinfo[2]
1897 1903 if trindex is not None:
1898 1904 dataoff = self.start(trindex)
1899 1905 else:
1900 1906 # revlog was stripped at start of transaction, use all leftover data
1901 1907 trindex = len(self) - 1
1902 1908 dataoff = self.end(tiprev)
1903 1909
1904 1910 tr.add(self.datafile, dataoff)
1905 1911
1906 1912 if fp:
1907 1913 fp.flush()
1908 1914 fp.close()
1909 1915 # We can't use the cached file handle after close(). So prevent
1910 1916 # its usage.
1911 1917 self._writinghandles = None
1912 1918
1913 1919 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1914 1920 for r in self:
1915 1921 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1916 1922
1917 1923 with self._indexfp(b'w') as fp:
1918 1924 self.version &= ~FLAG_INLINE_DATA
1919 1925 self._inline = False
1920 1926 io = self._io
1921 1927 for i in self:
1922 1928 e = io.packentry(self.index[i], self.node, self.version, i)
1923 1929 fp.write(e)
1924 1930
1925 1931 # the temp file replace the real index when we exit the context
1926 1932 # manager
1927 1933
1928 1934 tr.replace(self.indexfile, trindex * self._io.size)
1929 1935 self._chunkclear()
1930 1936
1931 1937 def _nodeduplicatecallback(self, transaction, node):
1932 1938 """called when trying to add a node already stored.
1933 1939 """
1934 1940
1935 1941 def addrevision(
1936 1942 self,
1937 1943 text,
1938 1944 transaction,
1939 1945 link,
1940 1946 p1,
1941 1947 p2,
1942 1948 cachedelta=None,
1943 1949 node=None,
1944 1950 flags=REVIDX_DEFAULT_FLAGS,
1945 1951 deltacomputer=None,
1946 1952 sidedata=None,
1947 1953 ):
1948 1954 """add a revision to the log
1949 1955
1950 1956 text - the revision data to add
1951 1957 transaction - the transaction object used for rollback
1952 1958 link - the linkrev data to add
1953 1959 p1, p2 - the parent nodeids of the revision
1954 1960 cachedelta - an optional precomputed delta
1955 1961 node - nodeid of revision; typically node is not specified, and it is
1956 1962 computed by default as hash(text, p1, p2), however subclasses might
1957 1963 use different hashing method (and override checkhash() in such case)
1958 1964 flags - the known flags to set on the revision
1959 1965 deltacomputer - an optional deltacomputer instance shared between
1960 1966 multiple calls
1961 1967 """
1962 1968 if link == nullrev:
1963 1969 raise error.RevlogError(
1964 1970 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1965 1971 )
1966 1972
1967 1973 if sidedata is None:
1968 1974 sidedata = {}
1969 1975 flags = flags & ~REVIDX_SIDEDATA
1970 1976 elif not self.hassidedata:
1971 1977 raise error.ProgrammingError(
1972 1978 _(b"trying to add sidedata to a revlog who don't support them")
1973 1979 )
1974 1980 else:
1975 1981 flags |= REVIDX_SIDEDATA
1976 1982
1977 1983 if flags:
1978 1984 node = node or self.hash(text, p1, p2)
1979 1985
1980 1986 rawtext, validatehash = flagutil.processflagswrite(
1981 1987 self, text, flags, sidedata=sidedata
1982 1988 )
1983 1989
1984 1990 # If the flag processor modifies the revision data, ignore any provided
1985 1991 # cachedelta.
1986 1992 if rawtext != text:
1987 1993 cachedelta = None
1988 1994
1989 1995 if len(rawtext) > _maxentrysize:
1990 1996 raise error.RevlogError(
1991 1997 _(
1992 1998 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
1993 1999 )
1994 2000 % (self.indexfile, len(rawtext))
1995 2001 )
1996 2002
1997 2003 node = node or self.hash(rawtext, p1, p2)
1998 2004 if self.index.has_node(node):
1999 2005 return node
2000 2006
2001 2007 if validatehash:
2002 2008 self.checkhash(rawtext, node, p1=p1, p2=p2)
2003 2009
2004 2010 return self.addrawrevision(
2005 2011 rawtext,
2006 2012 transaction,
2007 2013 link,
2008 2014 p1,
2009 2015 p2,
2010 2016 node,
2011 2017 flags,
2012 2018 cachedelta=cachedelta,
2013 2019 deltacomputer=deltacomputer,
2014 2020 )
2015 2021
2016 2022 def addrawrevision(
2017 2023 self,
2018 2024 rawtext,
2019 2025 transaction,
2020 2026 link,
2021 2027 p1,
2022 2028 p2,
2023 2029 node,
2024 2030 flags,
2025 2031 cachedelta=None,
2026 2032 deltacomputer=None,
2027 2033 ):
2028 2034 """add a raw revision with known flags, node and parents
2029 2035 useful when reusing a revision not stored in this revlog (ex: received
2030 2036 over wire, or read from an external bundle).
2031 2037 """
2032 2038 dfh = None
2033 2039 if not self._inline:
2034 2040 dfh = self._datafp(b"a+")
2035 2041 ifh = self._indexfp(b"a+")
2036 2042 try:
2037 2043 return self._addrevision(
2038 2044 node,
2039 2045 rawtext,
2040 2046 transaction,
2041 2047 link,
2042 2048 p1,
2043 2049 p2,
2044 2050 flags,
2045 2051 cachedelta,
2046 2052 ifh,
2047 2053 dfh,
2048 2054 deltacomputer=deltacomputer,
2049 2055 )
2050 2056 finally:
2051 2057 if dfh:
2052 2058 dfh.close()
2053 2059 ifh.close()
2054 2060
2055 2061 def compress(self, data):
2056 2062 """Generate a possibly-compressed representation of data."""
2057 2063 if not data:
2058 2064 return b'', data
2059 2065
2060 2066 compressed = self._compressor.compress(data)
2061 2067
2062 2068 if compressed:
2063 2069 # The revlog compressor added the header in the returned data.
2064 2070 return b'', compressed
2065 2071
2066 2072 if data[0:1] == b'\0':
2067 2073 return b'', data
2068 2074 return b'u', data
2069 2075
2070 2076 def decompress(self, data):
2071 2077 """Decompress a revlog chunk.
2072 2078
2073 2079 The chunk is expected to begin with a header identifying the
2074 2080 format type so it can be routed to an appropriate decompressor.
2075 2081 """
2076 2082 if not data:
2077 2083 return data
2078 2084
2079 2085 # Revlogs are read much more frequently than they are written and many
2080 2086 # chunks only take microseconds to decompress, so performance is
2081 2087 # important here.
2082 2088 #
2083 2089 # We can make a few assumptions about revlogs:
2084 2090 #
2085 2091 # 1) the majority of chunks will be compressed (as opposed to inline
2086 2092 # raw data).
2087 2093 # 2) decompressing *any* data will likely by at least 10x slower than
2088 2094 # returning raw inline data.
2089 2095 # 3) we want to prioritize common and officially supported compression
2090 2096 # engines
2091 2097 #
2092 2098 # It follows that we want to optimize for "decompress compressed data
2093 2099 # when encoded with common and officially supported compression engines"
2094 2100 # case over "raw data" and "data encoded by less common or non-official
2095 2101 # compression engines." That is why we have the inline lookup first
2096 2102 # followed by the compengines lookup.
2097 2103 #
2098 2104 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2099 2105 # compressed chunks. And this matters for changelog and manifest reads.
2100 2106 t = data[0:1]
2101 2107
2102 2108 if t == b'x':
2103 2109 try:
2104 2110 return _zlibdecompress(data)
2105 2111 except zlib.error as e:
2106 2112 raise error.RevlogError(
2107 2113 _(b'revlog decompress error: %s')
2108 2114 % stringutil.forcebytestr(e)
2109 2115 )
2110 2116 # '\0' is more common than 'u' so it goes first.
2111 2117 elif t == b'\0':
2112 2118 return data
2113 2119 elif t == b'u':
2114 2120 return util.buffer(data, 1)
2115 2121
2116 2122 try:
2117 2123 compressor = self._decompressors[t]
2118 2124 except KeyError:
2119 2125 try:
2120 2126 engine = util.compengines.forrevlogheader(t)
2121 2127 compressor = engine.revlogcompressor(self._compengineopts)
2122 2128 self._decompressors[t] = compressor
2123 2129 except KeyError:
2124 2130 raise error.RevlogError(_(b'unknown compression type %r') % t)
2125 2131
2126 2132 return compressor.decompress(data)
2127 2133
2128 2134 def _addrevision(
2129 2135 self,
2130 2136 node,
2131 2137 rawtext,
2132 2138 transaction,
2133 2139 link,
2134 2140 p1,
2135 2141 p2,
2136 2142 flags,
2137 2143 cachedelta,
2138 2144 ifh,
2139 2145 dfh,
2140 2146 alwayscache=False,
2141 2147 deltacomputer=None,
2142 2148 ):
2143 2149 """internal function to add revisions to the log
2144 2150
2145 2151 see addrevision for argument descriptions.
2146 2152
2147 2153 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2148 2154
2149 2155 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2150 2156 be used.
2151 2157
2152 2158 invariants:
2153 2159 - rawtext is optional (can be None); if not set, cachedelta must be set.
2154 2160 if both are set, they must correspond to each other.
2155 2161 """
2156 2162 if node == nullid:
2157 2163 raise error.RevlogError(
2158 2164 _(b"%s: attempt to add null revision") % self.indexfile
2159 2165 )
2160 2166 if node == wdirid or node in wdirfilenodeids:
2161 2167 raise error.RevlogError(
2162 2168 _(b"%s: attempt to add wdir revision") % self.indexfile
2163 2169 )
2164 2170
2165 2171 if self._inline:
2166 2172 fh = ifh
2167 2173 else:
2168 2174 fh = dfh
2169 2175
2170 2176 btext = [rawtext]
2171 2177
2172 2178 curr = len(self)
2173 2179 prev = curr - 1
2174 2180 offset = self.end(prev)
2175 2181 p1r, p2r = self.rev(p1), self.rev(p2)
2176 2182
2177 2183 # full versions are inserted when the needed deltas
2178 2184 # become comparable to the uncompressed text
2179 2185 if rawtext is None:
2180 2186 # need rawtext size, before changed by flag processors, which is
2181 2187 # the non-raw size. use revlog explicitly to avoid filelog's extra
2182 2188 # logic that might remove metadata size.
2183 2189 textlen = mdiff.patchedsize(
2184 2190 revlog.size(self, cachedelta[0]), cachedelta[1]
2185 2191 )
2186 2192 else:
2187 2193 textlen = len(rawtext)
2188 2194
2189 2195 if deltacomputer is None:
2190 2196 deltacomputer = deltautil.deltacomputer(self)
2191 2197
2192 2198 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2193 2199
2194 2200 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2195 2201
2196 2202 e = (
2197 2203 offset_type(offset, flags),
2198 2204 deltainfo.deltalen,
2199 2205 textlen,
2200 2206 deltainfo.base,
2201 2207 link,
2202 2208 p1r,
2203 2209 p2r,
2204 2210 node,
2205 2211 )
2206 2212 self.index.append(e)
2207 2213
2208 2214 # Reset the pure node cache start lookup offset to account for new
2209 2215 # revision.
2210 2216 if self._nodepos is not None:
2211 2217 self._nodepos = curr
2212 2218
2213 2219 entry = self._io.packentry(e, self.node, self.version, curr)
2214 2220 self._writeentry(
2215 2221 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2216 2222 )
2217 2223
2218 2224 rawtext = btext[0]
2219 2225
2220 2226 if alwayscache and rawtext is None:
2221 2227 rawtext = deltacomputer.buildtext(revinfo, fh)
2222 2228
2223 2229 if type(rawtext) == bytes: # only accept immutable objects
2224 2230 self._revisioncache = (node, curr, rawtext)
2225 2231 self._chainbasecache[curr] = deltainfo.chainbase
2226 2232 return node
2227 2233
2228 2234 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2229 2235 # Files opened in a+ mode have inconsistent behavior on various
2230 2236 # platforms. Windows requires that a file positioning call be made
2231 2237 # when the file handle transitions between reads and writes. See
2232 2238 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2233 2239 # platforms, Python or the platform itself can be buggy. Some versions
2234 2240 # of Solaris have been observed to not append at the end of the file
2235 2241 # if the file was seeked to before the end. See issue4943 for more.
2236 2242 #
2237 2243 # We work around this issue by inserting a seek() before writing.
2238 2244 # Note: This is likely not necessary on Python 3. However, because
2239 2245 # the file handle is reused for reads and may be seeked there, we need
2240 2246 # to be careful before changing this.
2241 2247 ifh.seek(0, os.SEEK_END)
2242 2248 if dfh:
2243 2249 dfh.seek(0, os.SEEK_END)
2244 2250
2245 2251 curr = len(self) - 1
2246 2252 if not self._inline:
2247 2253 transaction.add(self.datafile, offset)
2248 2254 transaction.add(self.indexfile, curr * len(entry))
2249 2255 if data[0]:
2250 2256 dfh.write(data[0])
2251 2257 dfh.write(data[1])
2252 2258 ifh.write(entry)
2253 2259 else:
2254 2260 offset += curr * self._io.size
2255 2261 transaction.add(self.indexfile, offset, curr)
2256 2262 ifh.write(entry)
2257 2263 ifh.write(data[0])
2258 2264 ifh.write(data[1])
2259 2265 self._enforceinlinesize(transaction, ifh)
2260 2266
2261 2267 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2262 2268 """
2263 2269 add a delta group
2264 2270
2265 2271 given a set of deltas, add them to the revision log. the
2266 2272 first delta is against its parent, which should be in our
2267 2273 log, the rest are against the previous delta.
2268 2274
2269 2275 If ``addrevisioncb`` is defined, it will be called with arguments of
2270 2276 this revlog and the node that was added.
2271 2277 """
2272 2278
2273 2279 if self._writinghandles:
2274 2280 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2275 2281
2276 2282 nodes = []
2277 2283
2278 2284 r = len(self)
2279 2285 end = 0
2280 2286 if r:
2281 2287 end = self.end(r - 1)
2282 2288 ifh = self._indexfp(b"a+")
2283 2289 isize = r * self._io.size
2284 2290 if self._inline:
2285 2291 transaction.add(self.indexfile, end + isize, r)
2286 2292 dfh = None
2287 2293 else:
2288 2294 transaction.add(self.indexfile, isize, r)
2289 2295 transaction.add(self.datafile, end)
2290 2296 dfh = self._datafp(b"a+")
2291 2297
2292 2298 def flush():
2293 2299 if dfh:
2294 2300 dfh.flush()
2295 2301 ifh.flush()
2296 2302
2297 2303 self._writinghandles = (ifh, dfh)
2298 2304
2299 2305 try:
2300 2306 deltacomputer = deltautil.deltacomputer(self)
2301 2307 # loop through our set of deltas
2302 2308 for data in deltas:
2303 2309 node, p1, p2, linknode, deltabase, delta, flags = data
2304 2310 link = linkmapper(linknode)
2305 2311 flags = flags or REVIDX_DEFAULT_FLAGS
2306 2312
2307 2313 nodes.append(node)
2308 2314
2309 2315 if self.index.has_node(node):
2310 2316 self._nodeduplicatecallback(transaction, node)
2311 2317 # this can happen if two branches make the same change
2312 2318 continue
2313 2319
2314 2320 for p in (p1, p2):
2315 2321 if not self.index.has_node(p):
2316 2322 raise error.LookupError(
2317 2323 p, self.indexfile, _(b'unknown parent')
2318 2324 )
2319 2325
2320 2326 if not self.index.has_node(deltabase):
2321 2327 raise error.LookupError(
2322 2328 deltabase, self.indexfile, _(b'unknown delta base')
2323 2329 )
2324 2330
2325 2331 baserev = self.rev(deltabase)
2326 2332
2327 2333 if baserev != nullrev and self.iscensored(baserev):
2328 2334 # if base is censored, delta must be full replacement in a
2329 2335 # single patch operation
2330 2336 hlen = struct.calcsize(b">lll")
2331 2337 oldlen = self.rawsize(baserev)
2332 2338 newlen = len(delta) - hlen
2333 2339 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2334 2340 raise error.CensoredBaseError(
2335 2341 self.indexfile, self.node(baserev)
2336 2342 )
2337 2343
2338 2344 if not flags and self._peek_iscensored(baserev, delta, flush):
2339 2345 flags |= REVIDX_ISCENSORED
2340 2346
2341 2347 # We assume consumers of addrevisioncb will want to retrieve
2342 2348 # the added revision, which will require a call to
2343 2349 # revision(). revision() will fast path if there is a cache
2344 2350 # hit. So, we tell _addrevision() to always cache in this case.
2345 2351 # We're only using addgroup() in the context of changegroup
2346 2352 # generation so the revision data can always be handled as raw
2347 2353 # by the flagprocessor.
2348 2354 self._addrevision(
2349 2355 node,
2350 2356 None,
2351 2357 transaction,
2352 2358 link,
2353 2359 p1,
2354 2360 p2,
2355 2361 flags,
2356 2362 (baserev, delta),
2357 2363 ifh,
2358 2364 dfh,
2359 2365 alwayscache=bool(addrevisioncb),
2360 2366 deltacomputer=deltacomputer,
2361 2367 )
2362 2368
2363 2369 if addrevisioncb:
2364 2370 addrevisioncb(self, node)
2365 2371
2366 2372 if not dfh and not self._inline:
2367 2373 # addrevision switched from inline to conventional
2368 2374 # reopen the index
2369 2375 ifh.close()
2370 2376 dfh = self._datafp(b"a+")
2371 2377 ifh = self._indexfp(b"a+")
2372 2378 self._writinghandles = (ifh, dfh)
2373 2379 finally:
2374 2380 self._writinghandles = None
2375 2381
2376 2382 if dfh:
2377 2383 dfh.close()
2378 2384 ifh.close()
2379 2385
2380 2386 return nodes
2381 2387
2382 2388 def iscensored(self, rev):
2383 2389 """Check if a file revision is censored."""
2384 2390 if not self._censorable:
2385 2391 return False
2386 2392
2387 2393 return self.flags(rev) & REVIDX_ISCENSORED
2388 2394
2389 2395 def _peek_iscensored(self, baserev, delta, flush):
2390 2396 """Quickly check if a delta produces a censored revision."""
2391 2397 if not self._censorable:
2392 2398 return False
2393 2399
2394 2400 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2395 2401
2396 2402 def getstrippoint(self, minlink):
2397 2403 """find the minimum rev that must be stripped to strip the linkrev
2398 2404
2399 2405 Returns a tuple containing the minimum rev and a set of all revs that
2400 2406 have linkrevs that will be broken by this strip.
2401 2407 """
2402 2408 return storageutil.resolvestripinfo(
2403 2409 minlink,
2404 2410 len(self) - 1,
2405 2411 self.headrevs(),
2406 2412 self.linkrev,
2407 2413 self.parentrevs,
2408 2414 )
2409 2415
2410 2416 def strip(self, minlink, transaction):
2411 2417 """truncate the revlog on the first revision with a linkrev >= minlink
2412 2418
2413 2419 This function is called when we're stripping revision minlink and
2414 2420 its descendants from the repository.
2415 2421
2416 2422 We have to remove all revisions with linkrev >= minlink, because
2417 2423 the equivalent changelog revisions will be renumbered after the
2418 2424 strip.
2419 2425
2420 2426 So we truncate the revlog on the first of these revisions, and
2421 2427 trust that the caller has saved the revisions that shouldn't be
2422 2428 removed and that it'll re-add them after this truncation.
2423 2429 """
2424 2430 if len(self) == 0:
2425 2431 return
2426 2432
2427 2433 rev, _ = self.getstrippoint(minlink)
2428 2434 if rev == len(self):
2429 2435 return
2430 2436
2431 2437 # first truncate the files on disk
2432 2438 end = self.start(rev)
2433 2439 if not self._inline:
2434 2440 transaction.add(self.datafile, end)
2435 2441 end = rev * self._io.size
2436 2442 else:
2437 2443 end += rev * self._io.size
2438 2444
2439 2445 transaction.add(self.indexfile, end)
2440 2446
2441 2447 # then reset internal state in memory to forget those revisions
2442 2448 self._revisioncache = None
2443 2449 self._chaininfocache = {}
2444 2450 self._chunkclear()
2445 2451
2446 2452 del self.index[rev:-1]
2447 2453 self._nodepos = None
2448 2454
2449 2455 def checksize(self):
2450 2456 """Check size of index and data files
2451 2457
2452 2458 return a (dd, di) tuple.
2453 2459 - dd: extra bytes for the "data" file
2454 2460 - di: extra bytes for the "index" file
2455 2461
2456 2462 A healthy revlog will return (0, 0).
2457 2463 """
2458 2464 expected = 0
2459 2465 if len(self):
2460 2466 expected = max(0, self.end(len(self) - 1))
2461 2467
2462 2468 try:
2463 2469 with self._datafp() as f:
2464 2470 f.seek(0, io.SEEK_END)
2465 2471 actual = f.tell()
2466 2472 dd = actual - expected
2467 2473 except IOError as inst:
2468 2474 if inst.errno != errno.ENOENT:
2469 2475 raise
2470 2476 dd = 0
2471 2477
2472 2478 try:
2473 2479 f = self.opener(self.indexfile)
2474 2480 f.seek(0, io.SEEK_END)
2475 2481 actual = f.tell()
2476 2482 f.close()
2477 2483 s = self._io.size
2478 2484 i = max(0, actual // s)
2479 2485 di = actual - (i * s)
2480 2486 if self._inline:
2481 2487 databytes = 0
2482 2488 for r in self:
2483 2489 databytes += max(0, self.length(r))
2484 2490 dd = 0
2485 2491 di = actual - len(self) * s - databytes
2486 2492 except IOError as inst:
2487 2493 if inst.errno != errno.ENOENT:
2488 2494 raise
2489 2495 di = 0
2490 2496
2491 2497 return (dd, di)
2492 2498
2493 2499 def files(self):
2494 2500 res = [self.indexfile]
2495 2501 if not self._inline:
2496 2502 res.append(self.datafile)
2497 2503 return res
2498 2504
2499 2505 def emitrevisions(
2500 2506 self,
2501 2507 nodes,
2502 2508 nodesorder=None,
2503 2509 revisiondata=False,
2504 2510 assumehaveparentrevisions=False,
2505 2511 deltamode=repository.CG_DELTAMODE_STD,
2506 2512 ):
2507 2513 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2508 2514 raise error.ProgrammingError(
2509 2515 b'unhandled value for nodesorder: %s' % nodesorder
2510 2516 )
2511 2517
2512 2518 if nodesorder is None and not self._generaldelta:
2513 2519 nodesorder = b'storage'
2514 2520
2515 2521 if (
2516 2522 not self._storedeltachains
2517 2523 and deltamode != repository.CG_DELTAMODE_PREV
2518 2524 ):
2519 2525 deltamode = repository.CG_DELTAMODE_FULL
2520 2526
2521 2527 return storageutil.emitrevisions(
2522 2528 self,
2523 2529 nodes,
2524 2530 nodesorder,
2525 2531 revlogrevisiondelta,
2526 2532 deltaparentfn=self.deltaparent,
2527 2533 candeltafn=self.candelta,
2528 2534 rawsizefn=self.rawsize,
2529 2535 revdifffn=self.revdiff,
2530 2536 flagsfn=self.flags,
2531 2537 deltamode=deltamode,
2532 2538 revisiondata=revisiondata,
2533 2539 assumehaveparentrevisions=assumehaveparentrevisions,
2534 2540 )
2535 2541
2536 2542 DELTAREUSEALWAYS = b'always'
2537 2543 DELTAREUSESAMEREVS = b'samerevs'
2538 2544 DELTAREUSENEVER = b'never'
2539 2545
2540 2546 DELTAREUSEFULLADD = b'fulladd'
2541 2547
2542 2548 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2543 2549
2544 2550 def clone(
2545 2551 self,
2546 2552 tr,
2547 2553 destrevlog,
2548 2554 addrevisioncb=None,
2549 2555 deltareuse=DELTAREUSESAMEREVS,
2550 2556 forcedeltabothparents=None,
2551 2557 sidedatacompanion=None,
2552 2558 ):
2553 2559 """Copy this revlog to another, possibly with format changes.
2554 2560
2555 2561 The destination revlog will contain the same revisions and nodes.
2556 2562 However, it may not be bit-for-bit identical due to e.g. delta encoding
2557 2563 differences.
2558 2564
2559 2565 The ``deltareuse`` argument control how deltas from the existing revlog
2560 2566 are preserved in the destination revlog. The argument can have the
2561 2567 following values:
2562 2568
2563 2569 DELTAREUSEALWAYS
2564 2570 Deltas will always be reused (if possible), even if the destination
2565 2571 revlog would not select the same revisions for the delta. This is the
2566 2572 fastest mode of operation.
2567 2573 DELTAREUSESAMEREVS
2568 2574 Deltas will be reused if the destination revlog would pick the same
2569 2575 revisions for the delta. This mode strikes a balance between speed
2570 2576 and optimization.
2571 2577 DELTAREUSENEVER
2572 2578 Deltas will never be reused. This is the slowest mode of execution.
2573 2579 This mode can be used to recompute deltas (e.g. if the diff/delta
2574 2580 algorithm changes).
2575 2581 DELTAREUSEFULLADD
2576 2582 Revision will be re-added as if their were new content. This is
2577 2583 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2578 2584 eg: large file detection and handling.
2579 2585
2580 2586 Delta computation can be slow, so the choice of delta reuse policy can
2581 2587 significantly affect run time.
2582 2588
2583 2589 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2584 2590 two extremes. Deltas will be reused if they are appropriate. But if the
2585 2591 delta could choose a better revision, it will do so. This means if you
2586 2592 are converting a non-generaldelta revlog to a generaldelta revlog,
2587 2593 deltas will be recomputed if the delta's parent isn't a parent of the
2588 2594 revision.
2589 2595
2590 2596 In addition to the delta policy, the ``forcedeltabothparents``
2591 2597 argument controls whether to force compute deltas against both parents
2592 2598 for merges. By default, the current default is used.
2593 2599
2594 2600 If not None, the `sidedatacompanion` is callable that accept two
2595 2601 arguments:
2596 2602
2597 2603 (srcrevlog, rev)
2598 2604
2599 2605 and return a triplet that control changes to sidedata content from the
2600 2606 old revision to the new clone result:
2601 2607
2602 2608 (dropall, filterout, update)
2603 2609
2604 2610 * if `dropall` is True, all sidedata should be dropped
2605 2611 * `filterout` is a set of sidedata keys that should be dropped
2606 2612 * `update` is a mapping of additionnal/new key -> value
2607 2613 """
2608 2614 if deltareuse not in self.DELTAREUSEALL:
2609 2615 raise ValueError(
2610 2616 _(b'value for deltareuse invalid: %s') % deltareuse
2611 2617 )
2612 2618
2613 2619 if len(destrevlog):
2614 2620 raise ValueError(_(b'destination revlog is not empty'))
2615 2621
2616 2622 if getattr(self, 'filteredrevs', None):
2617 2623 raise ValueError(_(b'source revlog has filtered revisions'))
2618 2624 if getattr(destrevlog, 'filteredrevs', None):
2619 2625 raise ValueError(_(b'destination revlog has filtered revisions'))
2620 2626
2621 2627 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2622 2628 # if possible.
2623 2629 oldlazydelta = destrevlog._lazydelta
2624 2630 oldlazydeltabase = destrevlog._lazydeltabase
2625 2631 oldamd = destrevlog._deltabothparents
2626 2632
2627 2633 try:
2628 2634 if deltareuse == self.DELTAREUSEALWAYS:
2629 2635 destrevlog._lazydeltabase = True
2630 2636 destrevlog._lazydelta = True
2631 2637 elif deltareuse == self.DELTAREUSESAMEREVS:
2632 2638 destrevlog._lazydeltabase = False
2633 2639 destrevlog._lazydelta = True
2634 2640 elif deltareuse == self.DELTAREUSENEVER:
2635 2641 destrevlog._lazydeltabase = False
2636 2642 destrevlog._lazydelta = False
2637 2643
2638 2644 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2639 2645
2640 2646 self._clone(
2641 2647 tr,
2642 2648 destrevlog,
2643 2649 addrevisioncb,
2644 2650 deltareuse,
2645 2651 forcedeltabothparents,
2646 2652 sidedatacompanion,
2647 2653 )
2648 2654
2649 2655 finally:
2650 2656 destrevlog._lazydelta = oldlazydelta
2651 2657 destrevlog._lazydeltabase = oldlazydeltabase
2652 2658 destrevlog._deltabothparents = oldamd
2653 2659
2654 2660 def _clone(
2655 2661 self,
2656 2662 tr,
2657 2663 destrevlog,
2658 2664 addrevisioncb,
2659 2665 deltareuse,
2660 2666 forcedeltabothparents,
2661 2667 sidedatacompanion,
2662 2668 ):
2663 2669 """perform the core duty of `revlog.clone` after parameter processing"""
2664 2670 deltacomputer = deltautil.deltacomputer(destrevlog)
2665 2671 index = self.index
2666 2672 for rev in self:
2667 2673 entry = index[rev]
2668 2674
2669 2675 # Some classes override linkrev to take filtered revs into
2670 2676 # account. Use raw entry from index.
2671 2677 flags = entry[0] & 0xFFFF
2672 2678 linkrev = entry[4]
2673 2679 p1 = index[entry[5]][7]
2674 2680 p2 = index[entry[6]][7]
2675 2681 node = entry[7]
2676 2682
2677 2683 sidedataactions = (False, [], {})
2678 2684 if sidedatacompanion is not None:
2679 2685 sidedataactions = sidedatacompanion(self, rev)
2680 2686
2681 2687 # (Possibly) reuse the delta from the revlog if allowed and
2682 2688 # the revlog chunk is a delta.
2683 2689 cachedelta = None
2684 2690 rawtext = None
2685 2691 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2686 2692 dropall, filterout, update = sidedataactions
2687 2693 text, sidedata = self._revisiondata(rev)
2688 2694 if dropall:
2689 2695 sidedata = {}
2690 2696 for key in filterout:
2691 2697 sidedata.pop(key, None)
2692 2698 sidedata.update(update)
2693 2699 if not sidedata:
2694 2700 sidedata = None
2695 2701 destrevlog.addrevision(
2696 2702 text,
2697 2703 tr,
2698 2704 linkrev,
2699 2705 p1,
2700 2706 p2,
2701 2707 cachedelta=cachedelta,
2702 2708 node=node,
2703 2709 flags=flags,
2704 2710 deltacomputer=deltacomputer,
2705 2711 sidedata=sidedata,
2706 2712 )
2707 2713 else:
2708 2714 if destrevlog._lazydelta:
2709 2715 dp = self.deltaparent(rev)
2710 2716 if dp != nullrev:
2711 2717 cachedelta = (dp, bytes(self._chunk(rev)))
2712 2718
2713 2719 if not cachedelta:
2714 2720 rawtext = self.rawdata(rev)
2715 2721
2716 2722 ifh = destrevlog.opener(
2717 2723 destrevlog.indexfile, b'a+', checkambig=False
2718 2724 )
2719 2725 dfh = None
2720 2726 if not destrevlog._inline:
2721 2727 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2722 2728 try:
2723 2729 destrevlog._addrevision(
2724 2730 node,
2725 2731 rawtext,
2726 2732 tr,
2727 2733 linkrev,
2728 2734 p1,
2729 2735 p2,
2730 2736 flags,
2731 2737 cachedelta,
2732 2738 ifh,
2733 2739 dfh,
2734 2740 deltacomputer=deltacomputer,
2735 2741 )
2736 2742 finally:
2737 2743 if dfh:
2738 2744 dfh.close()
2739 2745 ifh.close()
2740 2746
2741 2747 if addrevisioncb:
2742 2748 addrevisioncb(self, rev, node)
2743 2749
2744 2750 def censorrevision(self, tr, censornode, tombstone=b''):
2745 2751 if (self.version & 0xFFFF) == REVLOGV0:
2746 2752 raise error.RevlogError(
2747 2753 _(b'cannot censor with version %d revlogs') % self.version
2748 2754 )
2749 2755
2750 2756 censorrev = self.rev(censornode)
2751 2757 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2752 2758
2753 2759 if len(tombstone) > self.rawsize(censorrev):
2754 2760 raise error.Abort(
2755 2761 _(b'censor tombstone must be no longer than censored data')
2756 2762 )
2757 2763
2758 2764 # Rewriting the revlog in place is hard. Our strategy for censoring is
2759 2765 # to create a new revlog, copy all revisions to it, then replace the
2760 2766 # revlogs on transaction close.
2761 2767
2762 2768 newindexfile = self.indexfile + b'.tmpcensored'
2763 2769 newdatafile = self.datafile + b'.tmpcensored'
2764 2770
2765 2771 # This is a bit dangerous. We could easily have a mismatch of state.
2766 2772 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2767 2773 newrl.version = self.version
2768 2774 newrl._generaldelta = self._generaldelta
2769 2775 newrl._io = self._io
2770 2776
2771 2777 for rev in self.revs():
2772 2778 node = self.node(rev)
2773 2779 p1, p2 = self.parents(node)
2774 2780
2775 2781 if rev == censorrev:
2776 2782 newrl.addrawrevision(
2777 2783 tombstone,
2778 2784 tr,
2779 2785 self.linkrev(censorrev),
2780 2786 p1,
2781 2787 p2,
2782 2788 censornode,
2783 2789 REVIDX_ISCENSORED,
2784 2790 )
2785 2791
2786 2792 if newrl.deltaparent(rev) != nullrev:
2787 2793 raise error.Abort(
2788 2794 _(
2789 2795 b'censored revision stored as delta; '
2790 2796 b'cannot censor'
2791 2797 ),
2792 2798 hint=_(
2793 2799 b'censoring of revlogs is not '
2794 2800 b'fully implemented; please report '
2795 2801 b'this bug'
2796 2802 ),
2797 2803 )
2798 2804 continue
2799 2805
2800 2806 if self.iscensored(rev):
2801 2807 if self.deltaparent(rev) != nullrev:
2802 2808 raise error.Abort(
2803 2809 _(
2804 2810 b'cannot censor due to censored '
2805 2811 b'revision having delta stored'
2806 2812 )
2807 2813 )
2808 2814 rawtext = self._chunk(rev)
2809 2815 else:
2810 2816 rawtext = self.rawdata(rev)
2811 2817
2812 2818 newrl.addrawrevision(
2813 2819 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2814 2820 )
2815 2821
2816 2822 tr.addbackup(self.indexfile, location=b'store')
2817 2823 if not self._inline:
2818 2824 tr.addbackup(self.datafile, location=b'store')
2819 2825
2820 2826 self.opener.rename(newrl.indexfile, self.indexfile)
2821 2827 if not self._inline:
2822 2828 self.opener.rename(newrl.datafile, self.datafile)
2823 2829
2824 2830 self.clearcaches()
2825 2831 self._loadindex()
2826 2832
2827 2833 def verifyintegrity(self, state):
2828 2834 """Verifies the integrity of the revlog.
2829 2835
2830 2836 Yields ``revlogproblem`` instances describing problems that are
2831 2837 found.
2832 2838 """
2833 2839 dd, di = self.checksize()
2834 2840 if dd:
2835 2841 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2836 2842 if di:
2837 2843 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2838 2844
2839 2845 version = self.version & 0xFFFF
2840 2846
2841 2847 # The verifier tells us what version revlog we should be.
2842 2848 if version != state[b'expectedversion']:
2843 2849 yield revlogproblem(
2844 2850 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2845 2851 % (self.indexfile, version, state[b'expectedversion'])
2846 2852 )
2847 2853
2848 2854 state[b'skipread'] = set()
2849 2855
2850 2856 for rev in self:
2851 2857 node = self.node(rev)
2852 2858
2853 2859 # Verify contents. 4 cases to care about:
2854 2860 #
2855 2861 # common: the most common case
2856 2862 # rename: with a rename
2857 2863 # meta: file content starts with b'\1\n', the metadata
2858 2864 # header defined in filelog.py, but without a rename
2859 2865 # ext: content stored externally
2860 2866 #
2861 2867 # More formally, their differences are shown below:
2862 2868 #
2863 2869 # | common | rename | meta | ext
2864 2870 # -------------------------------------------------------
2865 2871 # flags() | 0 | 0 | 0 | not 0
2866 2872 # renamed() | False | True | False | ?
2867 2873 # rawtext[0:2]=='\1\n'| False | True | True | ?
2868 2874 #
2869 2875 # "rawtext" means the raw text stored in revlog data, which
2870 2876 # could be retrieved by "rawdata(rev)". "text"
2871 2877 # mentioned below is "revision(rev)".
2872 2878 #
2873 2879 # There are 3 different lengths stored physically:
2874 2880 # 1. L1: rawsize, stored in revlog index
2875 2881 # 2. L2: len(rawtext), stored in revlog data
2876 2882 # 3. L3: len(text), stored in revlog data if flags==0, or
2877 2883 # possibly somewhere else if flags!=0
2878 2884 #
2879 2885 # L1 should be equal to L2. L3 could be different from them.
2880 2886 # "text" may or may not affect commit hash depending on flag
2881 2887 # processors (see flagutil.addflagprocessor).
2882 2888 #
2883 2889 # | common | rename | meta | ext
2884 2890 # -------------------------------------------------
2885 2891 # rawsize() | L1 | L1 | L1 | L1
2886 2892 # size() | L1 | L2-LM | L1(*) | L1 (?)
2887 2893 # len(rawtext) | L2 | L2 | L2 | L2
2888 2894 # len(text) | L2 | L2 | L2 | L3
2889 2895 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2890 2896 #
2891 2897 # LM: length of metadata, depending on rawtext
2892 2898 # (*): not ideal, see comment in filelog.size
2893 2899 # (?): could be "- len(meta)" if the resolved content has
2894 2900 # rename metadata
2895 2901 #
2896 2902 # Checks needed to be done:
2897 2903 # 1. length check: L1 == L2, in all cases.
2898 2904 # 2. hash check: depending on flag processor, we may need to
2899 2905 # use either "text" (external), or "rawtext" (in revlog).
2900 2906
2901 2907 try:
2902 2908 skipflags = state.get(b'skipflags', 0)
2903 2909 if skipflags:
2904 2910 skipflags &= self.flags(rev)
2905 2911
2906 2912 if skipflags:
2907 2913 state[b'skipread'].add(node)
2908 2914 else:
2909 2915 # Side-effect: read content and verify hash.
2910 2916 self.revision(node)
2911 2917
2912 2918 l1 = self.rawsize(rev)
2913 2919 l2 = len(self.rawdata(node))
2914 2920
2915 2921 if l1 != l2:
2916 2922 yield revlogproblem(
2917 2923 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
2918 2924 node=node,
2919 2925 )
2920 2926
2921 2927 except error.CensoredNodeError:
2922 2928 if state[b'erroroncensored']:
2923 2929 yield revlogproblem(
2924 2930 error=_(b'censored file data'), node=node
2925 2931 )
2926 2932 state[b'skipread'].add(node)
2927 2933 except Exception as e:
2928 2934 yield revlogproblem(
2929 2935 error=_(b'unpacking %s: %s')
2930 2936 % (short(node), stringutil.forcebytestr(e)),
2931 2937 node=node,
2932 2938 )
2933 2939 state[b'skipread'].add(node)
2934 2940
2935 2941 def storageinfo(
2936 2942 self,
2937 2943 exclusivefiles=False,
2938 2944 sharedfiles=False,
2939 2945 revisionscount=False,
2940 2946 trackedsize=False,
2941 2947 storedsize=False,
2942 2948 ):
2943 2949 d = {}
2944 2950
2945 2951 if exclusivefiles:
2946 2952 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
2947 2953 if not self._inline:
2948 2954 d[b'exclusivefiles'].append((self.opener, self.datafile))
2949 2955
2950 2956 if sharedfiles:
2951 2957 d[b'sharedfiles'] = []
2952 2958
2953 2959 if revisionscount:
2954 2960 d[b'revisionscount'] = len(self)
2955 2961
2956 2962 if trackedsize:
2957 2963 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
2958 2964
2959 2965 if storedsize:
2960 2966 d[b'storedsize'] = sum(
2961 2967 self.opener.stat(path).st_size for path in self.files()
2962 2968 )
2963 2969
2964 2970 return d
General Comments 0
You need to be logged in to leave comments. Login now