##// END OF EJS Templates
index: add a `get_rev` method (API)...
marmoute -
r43861:9db20dcf default draft
parent child Browse files
Show More
@@ -1,762 +1,762 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #define PY_SSIZE_T_CLEAN
11 11 #include <Python.h>
12 12 #include <ctype.h>
13 13 #include <stddef.h>
14 14 #include <string.h>
15 15
16 16 #include "bitmanipulation.h"
17 17 #include "charencode.h"
18 18 #include "util.h"
19 19
20 20 #ifdef IS_PY3K
21 21 /* The mapping of Python types is meant to be temporary to get Python
22 22 * 3 to compile. We should remove this once Python 3 support is fully
23 23 * supported and proper types are used in the extensions themselves. */
24 24 #define PyInt_Check PyLong_Check
25 25 #define PyInt_FromLong PyLong_FromLong
26 26 #define PyInt_FromSsize_t PyLong_FromSsize_t
27 27 #define PyInt_AsLong PyLong_AsLong
28 28 #endif
29 29
30 30 static const char *const versionerrortext = "Python minor version mismatch";
31 31
32 32 static PyObject *dict_new_presized(PyObject *self, PyObject *args)
33 33 {
34 34 Py_ssize_t expected_size;
35 35
36 36 if (!PyArg_ParseTuple(args, "n:make_presized_dict", &expected_size)) {
37 37 return NULL;
38 38 }
39 39
40 40 return _dict_new_presized(expected_size);
41 41 }
42 42
43 43 static inline dirstateTupleObject *make_dirstate_tuple(char state, int mode,
44 44 int size, int mtime)
45 45 {
46 46 dirstateTupleObject *t =
47 47 PyObject_New(dirstateTupleObject, &dirstateTupleType);
48 48 if (!t) {
49 49 return NULL;
50 50 }
51 51 t->state = state;
52 52 t->mode = mode;
53 53 t->size = size;
54 54 t->mtime = mtime;
55 55 return t;
56 56 }
57 57
58 58 static PyObject *dirstate_tuple_new(PyTypeObject *subtype, PyObject *args,
59 59 PyObject *kwds)
60 60 {
61 61 /* We do all the initialization here and not a tp_init function because
62 62 * dirstate_tuple is immutable. */
63 63 dirstateTupleObject *t;
64 64 char state;
65 65 int size, mode, mtime;
66 66 if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime)) {
67 67 return NULL;
68 68 }
69 69
70 70 t = (dirstateTupleObject *)subtype->tp_alloc(subtype, 1);
71 71 if (!t) {
72 72 return NULL;
73 73 }
74 74 t->state = state;
75 75 t->mode = mode;
76 76 t->size = size;
77 77 t->mtime = mtime;
78 78
79 79 return (PyObject *)t;
80 80 }
81 81
82 82 static void dirstate_tuple_dealloc(PyObject *o)
83 83 {
84 84 PyObject_Del(o);
85 85 }
86 86
87 87 static Py_ssize_t dirstate_tuple_length(PyObject *o)
88 88 {
89 89 return 4;
90 90 }
91 91
92 92 static PyObject *dirstate_tuple_item(PyObject *o, Py_ssize_t i)
93 93 {
94 94 dirstateTupleObject *t = (dirstateTupleObject *)o;
95 95 switch (i) {
96 96 case 0:
97 97 return PyBytes_FromStringAndSize(&t->state, 1);
98 98 case 1:
99 99 return PyInt_FromLong(t->mode);
100 100 case 2:
101 101 return PyInt_FromLong(t->size);
102 102 case 3:
103 103 return PyInt_FromLong(t->mtime);
104 104 default:
105 105 PyErr_SetString(PyExc_IndexError, "index out of range");
106 106 return NULL;
107 107 }
108 108 }
109 109
110 110 static PySequenceMethods dirstate_tuple_sq = {
111 111 dirstate_tuple_length, /* sq_length */
112 112 0, /* sq_concat */
113 113 0, /* sq_repeat */
114 114 dirstate_tuple_item, /* sq_item */
115 115 0, /* sq_ass_item */
116 116 0, /* sq_contains */
117 117 0, /* sq_inplace_concat */
118 118 0 /* sq_inplace_repeat */
119 119 };
120 120
121 121 PyTypeObject dirstateTupleType = {
122 122 PyVarObject_HEAD_INIT(NULL, 0) /* header */
123 123 "dirstate_tuple", /* tp_name */
124 124 sizeof(dirstateTupleObject), /* tp_basicsize */
125 125 0, /* tp_itemsize */
126 126 (destructor)dirstate_tuple_dealloc, /* tp_dealloc */
127 127 0, /* tp_print */
128 128 0, /* tp_getattr */
129 129 0, /* tp_setattr */
130 130 0, /* tp_compare */
131 131 0, /* tp_repr */
132 132 0, /* tp_as_number */
133 133 &dirstate_tuple_sq, /* tp_as_sequence */
134 134 0, /* tp_as_mapping */
135 135 0, /* tp_hash */
136 136 0, /* tp_call */
137 137 0, /* tp_str */
138 138 0, /* tp_getattro */
139 139 0, /* tp_setattro */
140 140 0, /* tp_as_buffer */
141 141 Py_TPFLAGS_DEFAULT, /* tp_flags */
142 142 "dirstate tuple", /* tp_doc */
143 143 0, /* tp_traverse */
144 144 0, /* tp_clear */
145 145 0, /* tp_richcompare */
146 146 0, /* tp_weaklistoffset */
147 147 0, /* tp_iter */
148 148 0, /* tp_iternext */
149 149 0, /* tp_methods */
150 150 0, /* tp_members */
151 151 0, /* tp_getset */
152 152 0, /* tp_base */
153 153 0, /* tp_dict */
154 154 0, /* tp_descr_get */
155 155 0, /* tp_descr_set */
156 156 0, /* tp_dictoffset */
157 157 0, /* tp_init */
158 158 0, /* tp_alloc */
159 159 dirstate_tuple_new, /* tp_new */
160 160 };
161 161
162 162 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
163 163 {
164 164 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
165 165 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
166 166 char state, *cur, *str, *cpos;
167 167 int mode, size, mtime;
168 168 unsigned int flen, pos = 40;
169 169 Py_ssize_t len = 40;
170 170 Py_ssize_t readlen;
171 171
172 172 if (!PyArg_ParseTuple(
173 173 args, PY23("O!O!s#:parse_dirstate", "O!O!y#:parse_dirstate"),
174 174 &PyDict_Type, &dmap, &PyDict_Type, &cmap, &str, &readlen)) {
175 175 goto quit;
176 176 }
177 177
178 178 len = readlen;
179 179
180 180 /* read parents */
181 181 if (len < 40) {
182 182 PyErr_SetString(PyExc_ValueError,
183 183 "too little data for parents");
184 184 goto quit;
185 185 }
186 186
187 187 parents = Py_BuildValue(PY23("s#s#", "y#y#"), str, (Py_ssize_t)20,
188 188 str + 20, (Py_ssize_t)20);
189 189 if (!parents) {
190 190 goto quit;
191 191 }
192 192
193 193 /* read filenames */
194 194 while (pos >= 40 && pos < len) {
195 195 if (pos + 17 > len) {
196 196 PyErr_SetString(PyExc_ValueError,
197 197 "overflow in dirstate");
198 198 goto quit;
199 199 }
200 200 cur = str + pos;
201 201 /* unpack header */
202 202 state = *cur;
203 203 mode = getbe32(cur + 1);
204 204 size = getbe32(cur + 5);
205 205 mtime = getbe32(cur + 9);
206 206 flen = getbe32(cur + 13);
207 207 pos += 17;
208 208 cur += 17;
209 209 if (flen > len - pos) {
210 210 PyErr_SetString(PyExc_ValueError,
211 211 "overflow in dirstate");
212 212 goto quit;
213 213 }
214 214
215 215 entry =
216 216 (PyObject *)make_dirstate_tuple(state, mode, size, mtime);
217 217 cpos = memchr(cur, 0, flen);
218 218 if (cpos) {
219 219 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
220 220 cname = PyBytes_FromStringAndSize(
221 221 cpos + 1, flen - (cpos - cur) - 1);
222 222 if (!fname || !cname ||
223 223 PyDict_SetItem(cmap, fname, cname) == -1 ||
224 224 PyDict_SetItem(dmap, fname, entry) == -1) {
225 225 goto quit;
226 226 }
227 227 Py_DECREF(cname);
228 228 } else {
229 229 fname = PyBytes_FromStringAndSize(cur, flen);
230 230 if (!fname ||
231 231 PyDict_SetItem(dmap, fname, entry) == -1) {
232 232 goto quit;
233 233 }
234 234 }
235 235 Py_DECREF(fname);
236 236 Py_DECREF(entry);
237 237 fname = cname = entry = NULL;
238 238 pos += flen;
239 239 }
240 240
241 241 ret = parents;
242 242 Py_INCREF(ret);
243 243 quit:
244 244 Py_XDECREF(fname);
245 245 Py_XDECREF(cname);
246 246 Py_XDECREF(entry);
247 247 Py_XDECREF(parents);
248 248 return ret;
249 249 }
250 250
251 251 /*
252 252 * Build a set of non-normal and other parent entries from the dirstate dmap
253 253 */
254 254 static PyObject *nonnormalotherparententries(PyObject *self, PyObject *args)
255 255 {
256 256 PyObject *dmap, *fname, *v;
257 257 PyObject *nonnset = NULL, *otherpset = NULL, *result = NULL;
258 258 Py_ssize_t pos;
259 259
260 260 if (!PyArg_ParseTuple(args, "O!:nonnormalentries", &PyDict_Type,
261 261 &dmap)) {
262 262 goto bail;
263 263 }
264 264
265 265 nonnset = PySet_New(NULL);
266 266 if (nonnset == NULL) {
267 267 goto bail;
268 268 }
269 269
270 270 otherpset = PySet_New(NULL);
271 271 if (otherpset == NULL) {
272 272 goto bail;
273 273 }
274 274
275 275 pos = 0;
276 276 while (PyDict_Next(dmap, &pos, &fname, &v)) {
277 277 dirstateTupleObject *t;
278 278 if (!dirstate_tuple_check(v)) {
279 279 PyErr_SetString(PyExc_TypeError,
280 280 "expected a dirstate tuple");
281 281 goto bail;
282 282 }
283 283 t = (dirstateTupleObject *)v;
284 284
285 285 if (t->state == 'n' && t->size == -2) {
286 286 if (PySet_Add(otherpset, fname) == -1) {
287 287 goto bail;
288 288 }
289 289 }
290 290
291 291 if (t->state == 'n' && t->mtime != -1) {
292 292 continue;
293 293 }
294 294 if (PySet_Add(nonnset, fname) == -1) {
295 295 goto bail;
296 296 }
297 297 }
298 298
299 299 result = Py_BuildValue("(OO)", nonnset, otherpset);
300 300 if (result == NULL) {
301 301 goto bail;
302 302 }
303 303 Py_DECREF(nonnset);
304 304 Py_DECREF(otherpset);
305 305 return result;
306 306 bail:
307 307 Py_XDECREF(nonnset);
308 308 Py_XDECREF(otherpset);
309 309 Py_XDECREF(result);
310 310 return NULL;
311 311 }
312 312
313 313 /*
314 314 * Efficiently pack a dirstate object into its on-disk format.
315 315 */
316 316 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
317 317 {
318 318 PyObject *packobj = NULL;
319 319 PyObject *map, *copymap, *pl, *mtime_unset = NULL;
320 320 Py_ssize_t nbytes, pos, l;
321 321 PyObject *k, *v = NULL, *pn;
322 322 char *p, *s;
323 323 int now;
324 324
325 325 if (!PyArg_ParseTuple(args, "O!O!O!i:pack_dirstate", &PyDict_Type, &map,
326 326 &PyDict_Type, &copymap, &PyTuple_Type, &pl,
327 327 &now)) {
328 328 return NULL;
329 329 }
330 330
331 331 if (PyTuple_Size(pl) != 2) {
332 332 PyErr_SetString(PyExc_TypeError, "expected 2-element tuple");
333 333 return NULL;
334 334 }
335 335
336 336 /* Figure out how much we need to allocate. */
337 337 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
338 338 PyObject *c;
339 339 if (!PyBytes_Check(k)) {
340 340 PyErr_SetString(PyExc_TypeError, "expected string key");
341 341 goto bail;
342 342 }
343 343 nbytes += PyBytes_GET_SIZE(k) + 17;
344 344 c = PyDict_GetItem(copymap, k);
345 345 if (c) {
346 346 if (!PyBytes_Check(c)) {
347 347 PyErr_SetString(PyExc_TypeError,
348 348 "expected string key");
349 349 goto bail;
350 350 }
351 351 nbytes += PyBytes_GET_SIZE(c) + 1;
352 352 }
353 353 }
354 354
355 355 packobj = PyBytes_FromStringAndSize(NULL, nbytes);
356 356 if (packobj == NULL) {
357 357 goto bail;
358 358 }
359 359
360 360 p = PyBytes_AS_STRING(packobj);
361 361
362 362 pn = PyTuple_GET_ITEM(pl, 0);
363 363 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
364 364 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
365 365 goto bail;
366 366 }
367 367 memcpy(p, s, l);
368 368 p += 20;
369 369 pn = PyTuple_GET_ITEM(pl, 1);
370 370 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
371 371 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
372 372 goto bail;
373 373 }
374 374 memcpy(p, s, l);
375 375 p += 20;
376 376
377 377 for (pos = 0; PyDict_Next(map, &pos, &k, &v);) {
378 378 dirstateTupleObject *tuple;
379 379 char state;
380 380 int mode, size, mtime;
381 381 Py_ssize_t len, l;
382 382 PyObject *o;
383 383 char *t;
384 384
385 385 if (!dirstate_tuple_check(v)) {
386 386 PyErr_SetString(PyExc_TypeError,
387 387 "expected a dirstate tuple");
388 388 goto bail;
389 389 }
390 390 tuple = (dirstateTupleObject *)v;
391 391
392 392 state = tuple->state;
393 393 mode = tuple->mode;
394 394 size = tuple->size;
395 395 mtime = tuple->mtime;
396 396 if (state == 'n' && mtime == now) {
397 397 /* See pure/parsers.py:pack_dirstate for why we do
398 398 * this. */
399 399 mtime = -1;
400 400 mtime_unset = (PyObject *)make_dirstate_tuple(
401 401 state, mode, size, mtime);
402 402 if (!mtime_unset) {
403 403 goto bail;
404 404 }
405 405 if (PyDict_SetItem(map, k, mtime_unset) == -1) {
406 406 goto bail;
407 407 }
408 408 Py_DECREF(mtime_unset);
409 409 mtime_unset = NULL;
410 410 }
411 411 *p++ = state;
412 412 putbe32((uint32_t)mode, p);
413 413 putbe32((uint32_t)size, p + 4);
414 414 putbe32((uint32_t)mtime, p + 8);
415 415 t = p + 12;
416 416 p += 16;
417 417 len = PyBytes_GET_SIZE(k);
418 418 memcpy(p, PyBytes_AS_STRING(k), len);
419 419 p += len;
420 420 o = PyDict_GetItem(copymap, k);
421 421 if (o) {
422 422 *p++ = '\0';
423 423 l = PyBytes_GET_SIZE(o);
424 424 memcpy(p, PyBytes_AS_STRING(o), l);
425 425 p += l;
426 426 len += l + 1;
427 427 }
428 428 putbe32((uint32_t)len, t);
429 429 }
430 430
431 431 pos = p - PyBytes_AS_STRING(packobj);
432 432 if (pos != nbytes) {
433 433 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
434 434 (long)pos, (long)nbytes);
435 435 goto bail;
436 436 }
437 437
438 438 return packobj;
439 439 bail:
440 440 Py_XDECREF(mtime_unset);
441 441 Py_XDECREF(packobj);
442 442 Py_XDECREF(v);
443 443 return NULL;
444 444 }
445 445
446 446 #define BUMPED_FIX 1
447 447 #define USING_SHA_256 2
448 448 #define FM1_HEADER_SIZE (4 + 8 + 2 + 2 + 1 + 1 + 1)
449 449
450 450 static PyObject *readshas(const char *source, unsigned char num,
451 451 Py_ssize_t hashwidth)
452 452 {
453 453 int i;
454 454 PyObject *list = PyTuple_New(num);
455 455 if (list == NULL) {
456 456 return NULL;
457 457 }
458 458 for (i = 0; i < num; i++) {
459 459 PyObject *hash = PyBytes_FromStringAndSize(source, hashwidth);
460 460 if (hash == NULL) {
461 461 Py_DECREF(list);
462 462 return NULL;
463 463 }
464 464 PyTuple_SET_ITEM(list, i, hash);
465 465 source += hashwidth;
466 466 }
467 467 return list;
468 468 }
469 469
470 470 static PyObject *fm1readmarker(const char *databegin, const char *dataend,
471 471 uint32_t *msize)
472 472 {
473 473 const char *data = databegin;
474 474 const char *meta;
475 475
476 476 double mtime;
477 477 int16_t tz;
478 478 uint16_t flags;
479 479 unsigned char nsuccs, nparents, nmetadata;
480 480 Py_ssize_t hashwidth = 20;
481 481
482 482 PyObject *prec = NULL, *parents = NULL, *succs = NULL;
483 483 PyObject *metadata = NULL, *ret = NULL;
484 484 int i;
485 485
486 486 if (data + FM1_HEADER_SIZE > dataend) {
487 487 goto overflow;
488 488 }
489 489
490 490 *msize = getbe32(data);
491 491 data += 4;
492 492 mtime = getbefloat64(data);
493 493 data += 8;
494 494 tz = getbeint16(data);
495 495 data += 2;
496 496 flags = getbeuint16(data);
497 497 data += 2;
498 498
499 499 if (flags & USING_SHA_256) {
500 500 hashwidth = 32;
501 501 }
502 502
503 503 nsuccs = (unsigned char)(*data++);
504 504 nparents = (unsigned char)(*data++);
505 505 nmetadata = (unsigned char)(*data++);
506 506
507 507 if (databegin + *msize > dataend) {
508 508 goto overflow;
509 509 }
510 510 dataend = databegin + *msize; /* narrow down to marker size */
511 511
512 512 if (data + hashwidth > dataend) {
513 513 goto overflow;
514 514 }
515 515 prec = PyBytes_FromStringAndSize(data, hashwidth);
516 516 data += hashwidth;
517 517 if (prec == NULL) {
518 518 goto bail;
519 519 }
520 520
521 521 if (data + nsuccs * hashwidth > dataend) {
522 522 goto overflow;
523 523 }
524 524 succs = readshas(data, nsuccs, hashwidth);
525 525 if (succs == NULL) {
526 526 goto bail;
527 527 }
528 528 data += nsuccs * hashwidth;
529 529
530 530 if (nparents == 1 || nparents == 2) {
531 531 if (data + nparents * hashwidth > dataend) {
532 532 goto overflow;
533 533 }
534 534 parents = readshas(data, nparents, hashwidth);
535 535 if (parents == NULL) {
536 536 goto bail;
537 537 }
538 538 data += nparents * hashwidth;
539 539 } else {
540 540 parents = Py_None;
541 541 Py_INCREF(parents);
542 542 }
543 543
544 544 if (data + 2 * nmetadata > dataend) {
545 545 goto overflow;
546 546 }
547 547 meta = data + (2 * nmetadata);
548 548 metadata = PyTuple_New(nmetadata);
549 549 if (metadata == NULL) {
550 550 goto bail;
551 551 }
552 552 for (i = 0; i < nmetadata; i++) {
553 553 PyObject *tmp, *left = NULL, *right = NULL;
554 554 Py_ssize_t leftsize = (unsigned char)(*data++);
555 555 Py_ssize_t rightsize = (unsigned char)(*data++);
556 556 if (meta + leftsize + rightsize > dataend) {
557 557 goto overflow;
558 558 }
559 559 left = PyBytes_FromStringAndSize(meta, leftsize);
560 560 meta += leftsize;
561 561 right = PyBytes_FromStringAndSize(meta, rightsize);
562 562 meta += rightsize;
563 563 tmp = PyTuple_New(2);
564 564 if (!left || !right || !tmp) {
565 565 Py_XDECREF(left);
566 566 Py_XDECREF(right);
567 567 Py_XDECREF(tmp);
568 568 goto bail;
569 569 }
570 570 PyTuple_SET_ITEM(tmp, 0, left);
571 571 PyTuple_SET_ITEM(tmp, 1, right);
572 572 PyTuple_SET_ITEM(metadata, i, tmp);
573 573 }
574 574 ret = Py_BuildValue("(OOHO(di)O)", prec, succs, flags, metadata, mtime,
575 575 (int)tz * 60, parents);
576 576 goto bail; /* return successfully */
577 577
578 578 overflow:
579 579 PyErr_SetString(PyExc_ValueError, "overflow in obsstore");
580 580 bail:
581 581 Py_XDECREF(prec);
582 582 Py_XDECREF(succs);
583 583 Py_XDECREF(metadata);
584 584 Py_XDECREF(parents);
585 585 return ret;
586 586 }
587 587
588 588 static PyObject *fm1readmarkers(PyObject *self, PyObject *args)
589 589 {
590 590 const char *data, *dataend;
591 591 Py_ssize_t datalen, offset, stop;
592 592 PyObject *markers = NULL;
593 593
594 594 if (!PyArg_ParseTuple(args, PY23("s#nn", "y#nn"), &data, &datalen,
595 595 &offset, &stop)) {
596 596 return NULL;
597 597 }
598 598 if (offset < 0) {
599 599 PyErr_SetString(PyExc_ValueError,
600 600 "invalid negative offset in fm1readmarkers");
601 601 return NULL;
602 602 }
603 603 if (stop > datalen) {
604 604 PyErr_SetString(
605 605 PyExc_ValueError,
606 606 "stop longer than data length in fm1readmarkers");
607 607 return NULL;
608 608 }
609 609 dataend = data + datalen;
610 610 data += offset;
611 611 markers = PyList_New(0);
612 612 if (!markers) {
613 613 return NULL;
614 614 }
615 615 while (offset < stop) {
616 616 uint32_t msize;
617 617 int error;
618 618 PyObject *record = fm1readmarker(data, dataend, &msize);
619 619 if (!record) {
620 620 goto bail;
621 621 }
622 622 error = PyList_Append(markers, record);
623 623 Py_DECREF(record);
624 624 if (error) {
625 625 goto bail;
626 626 }
627 627 data += msize;
628 628 offset += msize;
629 629 }
630 630 return markers;
631 631 bail:
632 632 Py_DECREF(markers);
633 633 return NULL;
634 634 }
635 635
636 636 static char parsers_doc[] = "Efficient content parsing.";
637 637
638 638 PyObject *encodedir(PyObject *self, PyObject *args);
639 639 PyObject *pathencode(PyObject *self, PyObject *args);
640 640 PyObject *lowerencode(PyObject *self, PyObject *args);
641 641 PyObject *parse_index2(PyObject *self, PyObject *args);
642 642
643 643 static PyMethodDef methods[] = {
644 644 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
645 645 {"nonnormalotherparententries", nonnormalotherparententries, METH_VARARGS,
646 646 "create a set containing non-normal and other parent entries of given "
647 647 "dirstate\n"},
648 648 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
649 649 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
650 650 {"isasciistr", isasciistr, METH_VARARGS, "check if an ASCII string\n"},
651 651 {"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"},
652 652 {"asciiupper", asciiupper, METH_VARARGS, "uppercase an ASCII string\n"},
653 653 {"dict_new_presized", dict_new_presized, METH_VARARGS,
654 654 "construct a dict with an expected size\n"},
655 655 {"make_file_foldmap", make_file_foldmap, METH_VARARGS,
656 656 "make file foldmap\n"},
657 657 {"jsonescapeu8fast", jsonescapeu8fast, METH_VARARGS,
658 658 "escape a UTF-8 byte string to JSON (fast path)\n"},
659 659 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
660 660 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
661 661 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
662 662 {"fm1readmarkers", fm1readmarkers, METH_VARARGS,
663 663 "parse v1 obsolete markers\n"},
664 664 {NULL, NULL}};
665 665
666 666 void dirs_module_init(PyObject *mod);
667 667 void manifest_module_init(PyObject *mod);
668 668 void revlog_module_init(PyObject *mod);
669 669
670 static const int version = 15;
670 static const int version = 16;
671 671
672 672 static void module_init(PyObject *mod)
673 673 {
674 674 PyObject *capsule = NULL;
675 675 PyModule_AddIntConstant(mod, "version", version);
676 676
677 677 /* This module constant has two purposes. First, it lets us unit test
678 678 * the ImportError raised without hard-coding any error text. This
679 679 * means we can change the text in the future without breaking tests,
680 680 * even across changesets without a recompile. Second, its presence
681 681 * can be used to determine whether the version-checking logic is
682 682 * present, which also helps in testing across changesets without a
683 683 * recompile. Note that this means the pure-Python version of parsers
684 684 * should not have this module constant. */
685 685 PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
686 686
687 687 dirs_module_init(mod);
688 688 manifest_module_init(mod);
689 689 revlog_module_init(mod);
690 690
691 691 capsule = PyCapsule_New(
692 692 make_dirstate_tuple,
693 693 "mercurial.cext.parsers.make_dirstate_tuple_CAPI", NULL);
694 694 if (capsule != NULL)
695 695 PyModule_AddObject(mod, "make_dirstate_tuple_CAPI", capsule);
696 696
697 697 if (PyType_Ready(&dirstateTupleType) < 0) {
698 698 return;
699 699 }
700 700 Py_INCREF(&dirstateTupleType);
701 701 PyModule_AddObject(mod, "dirstatetuple",
702 702 (PyObject *)&dirstateTupleType);
703 703 }
704 704
705 705 static int check_python_version(void)
706 706 {
707 707 PyObject *sys = PyImport_ImportModule("sys"), *ver;
708 708 long hexversion;
709 709 if (!sys) {
710 710 return -1;
711 711 }
712 712 ver = PyObject_GetAttrString(sys, "hexversion");
713 713 Py_DECREF(sys);
714 714 if (!ver) {
715 715 return -1;
716 716 }
717 717 hexversion = PyInt_AsLong(ver);
718 718 Py_DECREF(ver);
719 719 /* sys.hexversion is a 32-bit number by default, so the -1 case
720 720 * should only occur in unusual circumstances (e.g. if sys.hexversion
721 721 * is manually set to an invalid value). */
722 722 if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
723 723 PyErr_Format(PyExc_ImportError,
724 724 "%s: The Mercurial extension "
725 725 "modules were compiled with Python " PY_VERSION
726 726 ", but "
727 727 "Mercurial is currently using Python with "
728 728 "sys.hexversion=%ld: "
729 729 "Python %s\n at: %s",
730 730 versionerrortext, hexversion, Py_GetVersion(),
731 731 Py_GetProgramFullPath());
732 732 return -1;
733 733 }
734 734 return 0;
735 735 }
736 736
737 737 #ifdef IS_PY3K
738 738 static struct PyModuleDef parsers_module = {PyModuleDef_HEAD_INIT, "parsers",
739 739 parsers_doc, -1, methods};
740 740
741 741 PyMODINIT_FUNC PyInit_parsers(void)
742 742 {
743 743 PyObject *mod;
744 744
745 745 if (check_python_version() == -1)
746 746 return NULL;
747 747 mod = PyModule_Create(&parsers_module);
748 748 module_init(mod);
749 749 return mod;
750 750 }
751 751 #else
752 752 PyMODINIT_FUNC initparsers(void)
753 753 {
754 754 PyObject *mod;
755 755
756 756 if (check_python_version() == -1) {
757 757 return;
758 758 }
759 759 mod = Py_InitModule3("parsers", methods, parsers_doc);
760 760 module_init(mod);
761 761 }
762 762 #endif
@@ -1,3068 +1,3070 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #define PY_SSIZE_T_CLEAN
11 11 #include <Python.h>
12 12 #include <assert.h>
13 13 #include <ctype.h>
14 14 #include <limits.h>
15 15 #include <stddef.h>
16 16 #include <stdlib.h>
17 17 #include <string.h>
18 18
19 19 #include "bitmanipulation.h"
20 20 #include "charencode.h"
21 21 #include "revlog.h"
22 22 #include "util.h"
23 23
24 24 #ifdef IS_PY3K
25 25 /* The mapping of Python types is meant to be temporary to get Python
26 26 * 3 to compile. We should remove this once Python 3 support is fully
27 27 * supported and proper types are used in the extensions themselves. */
28 28 #define PyInt_Check PyLong_Check
29 29 #define PyInt_FromLong PyLong_FromLong
30 30 #define PyInt_FromSsize_t PyLong_FromSsize_t
31 31 #define PyInt_AsLong PyLong_AsLong
32 32 #endif
33 33
34 34 typedef struct indexObjectStruct indexObject;
35 35
36 36 typedef struct {
37 37 int children[16];
38 38 } nodetreenode;
39 39
40 40 /*
41 41 * A base-16 trie for fast node->rev mapping.
42 42 *
43 43 * Positive value is index of the next node in the trie
44 44 * Negative value is a leaf: -(rev + 2)
45 45 * Zero is empty
46 46 */
47 47 typedef struct {
48 48 indexObject *index;
49 49 nodetreenode *nodes;
50 50 unsigned length; /* # nodes in use */
51 51 unsigned capacity; /* # nodes allocated */
52 52 int depth; /* maximum depth of tree */
53 53 int splits; /* # splits performed */
54 54 } nodetree;
55 55
56 56 typedef struct {
57 57 PyObject_HEAD /* ; */
58 58 nodetree nt;
59 59 } nodetreeObject;
60 60
61 61 /*
62 62 * This class has two behaviors.
63 63 *
64 64 * When used in a list-like way (with integer keys), we decode an
65 65 * entry in a RevlogNG index file on demand. Our last entry is a
66 66 * sentinel, always a nullid. We have limited support for
67 67 * integer-keyed insert and delete, only at elements right before the
68 68 * sentinel.
69 69 *
70 70 * With string keys, we lazily perform a reverse mapping from node to
71 71 * rev, using a base-16 trie.
72 72 */
73 73 struct indexObjectStruct {
74 74 PyObject_HEAD
75 75 /* Type-specific fields go here. */
76 76 PyObject *data; /* raw bytes of index */
77 77 Py_buffer buf; /* buffer of data */
78 78 PyObject **cache; /* cached tuples */
79 79 const char **offsets; /* populated on demand */
80 80 Py_ssize_t raw_length; /* original number of elements */
81 81 Py_ssize_t length; /* current number of elements */
82 82 PyObject *added; /* populated on demand */
83 83 PyObject *headrevs; /* cache, invalidated on changes */
84 84 PyObject *filteredrevs; /* filtered revs set */
85 85 nodetree nt; /* base-16 trie */
86 86 int ntinitialized; /* 0 or 1 */
87 87 int ntrev; /* last rev scanned */
88 88 int ntlookups; /* # lookups */
89 89 int ntmisses; /* # lookups that miss the cache */
90 90 int inlined;
91 91 };
92 92
93 93 static Py_ssize_t index_length(const indexObject *self)
94 94 {
95 95 if (self->added == NULL)
96 96 return self->length;
97 97 return self->length + PyList_GET_SIZE(self->added);
98 98 }
99 99
100 100 static PyObject *nullentry = NULL;
101 101 static const char nullid[20] = {0};
102 102 static const Py_ssize_t nullrev = -1;
103 103
104 104 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
105 105
106 106 #if LONG_MAX == 0x7fffffffL
107 107 static const char *const tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#");
108 108 #else
109 109 static const char *const tuple_format = PY23("kiiiiiis#", "kiiiiiiy#");
110 110 #endif
111 111
112 112 /* A RevlogNG v1 index entry is 64 bytes long. */
113 113 static const long v1_hdrsize = 64;
114 114
115 115 static void raise_revlog_error(void)
116 116 {
117 117 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
118 118
119 119 mod = PyImport_ImportModule("mercurial.error");
120 120 if (mod == NULL) {
121 121 goto cleanup;
122 122 }
123 123
124 124 dict = PyModule_GetDict(mod);
125 125 if (dict == NULL) {
126 126 goto cleanup;
127 127 }
128 128 Py_INCREF(dict);
129 129
130 130 errclass = PyDict_GetItemString(dict, "RevlogError");
131 131 if (errclass == NULL) {
132 132 PyErr_SetString(PyExc_SystemError,
133 133 "could not find RevlogError");
134 134 goto cleanup;
135 135 }
136 136
137 137 /* value of exception is ignored by callers */
138 138 PyErr_SetString(errclass, "RevlogError");
139 139
140 140 cleanup:
141 141 Py_XDECREF(dict);
142 142 Py_XDECREF(mod);
143 143 }
144 144
145 145 /*
146 146 * Return a pointer to the beginning of a RevlogNG record.
147 147 */
148 148 static const char *index_deref(indexObject *self, Py_ssize_t pos)
149 149 {
150 150 if (self->inlined && pos > 0) {
151 151 if (self->offsets == NULL) {
152 152 self->offsets = PyMem_Malloc(self->raw_length *
153 153 sizeof(*self->offsets));
154 154 if (self->offsets == NULL)
155 155 return (const char *)PyErr_NoMemory();
156 156 inline_scan(self, self->offsets);
157 157 }
158 158 return self->offsets[pos];
159 159 }
160 160
161 161 return (const char *)(self->buf.buf) + pos * v1_hdrsize;
162 162 }
163 163
164 164 /*
165 165 * Get parents of the given rev.
166 166 *
167 167 * The specified rev must be valid and must not be nullrev. A returned
168 168 * parent revision may be nullrev, but is guaranteed to be in valid range.
169 169 */
170 170 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
171 171 int maxrev)
172 172 {
173 173 if (rev >= self->length) {
174 174 long tmp;
175 175 PyObject *tuple =
176 176 PyList_GET_ITEM(self->added, rev - self->length);
177 177 if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 5), &tmp)) {
178 178 return -1;
179 179 }
180 180 ps[0] = (int)tmp;
181 181 if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 6), &tmp)) {
182 182 return -1;
183 183 }
184 184 ps[1] = (int)tmp;
185 185 } else {
186 186 const char *data = index_deref(self, rev);
187 187 ps[0] = getbe32(data + 24);
188 188 ps[1] = getbe32(data + 28);
189 189 }
190 190 /* If index file is corrupted, ps[] may point to invalid revisions. So
191 191 * there is a risk of buffer overflow to trust them unconditionally. */
192 192 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
193 193 PyErr_SetString(PyExc_ValueError, "parent out of range");
194 194 return -1;
195 195 }
196 196 return 0;
197 197 }
198 198
199 199 /*
200 200 * Get parents of the given rev.
201 201 *
202 202 * If the specified rev is out of range, IndexError will be raised. If the
203 203 * revlog entry is corrupted, ValueError may be raised.
204 204 *
205 205 * Returns 0 on success or -1 on failure.
206 206 */
207 207 int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps)
208 208 {
209 209 int tiprev;
210 210 if (!op || !HgRevlogIndex_Check(op) || !ps) {
211 211 PyErr_BadInternalCall();
212 212 return -1;
213 213 }
214 214 tiprev = (int)index_length((indexObject *)op) - 1;
215 215 if (rev < -1 || rev > tiprev) {
216 216 PyErr_Format(PyExc_IndexError, "rev out of range: %d", rev);
217 217 return -1;
218 218 } else if (rev == -1) {
219 219 ps[0] = ps[1] = -1;
220 220 return 0;
221 221 } else {
222 222 return index_get_parents((indexObject *)op, rev, ps, tiprev);
223 223 }
224 224 }
225 225
226 226 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
227 227 {
228 228 uint64_t offset;
229 229 if (rev == nullrev) {
230 230 return 0;
231 231 }
232 232 if (rev >= self->length) {
233 233 PyObject *tuple;
234 234 PyObject *pylong;
235 235 PY_LONG_LONG tmp;
236 236 tuple = PyList_GET_ITEM(self->added, rev - self->length);
237 237 pylong = PyTuple_GET_ITEM(tuple, 0);
238 238 tmp = PyLong_AsLongLong(pylong);
239 239 if (tmp == -1 && PyErr_Occurred()) {
240 240 return -1;
241 241 }
242 242 if (tmp < 0) {
243 243 PyErr_Format(PyExc_OverflowError,
244 244 "revlog entry size out of bound (%lld)",
245 245 (long long)tmp);
246 246 return -1;
247 247 }
248 248 offset = (uint64_t)tmp;
249 249 } else {
250 250 const char *data = index_deref(self, rev);
251 251 offset = getbe32(data + 4);
252 252 if (rev == 0) {
253 253 /* mask out version number for the first entry */
254 254 offset &= 0xFFFF;
255 255 } else {
256 256 uint32_t offset_high = getbe32(data);
257 257 offset |= ((uint64_t)offset_high) << 32;
258 258 }
259 259 }
260 260 return (int64_t)(offset >> 16);
261 261 }
262 262
263 263 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
264 264 {
265 265 if (rev == nullrev) {
266 266 return 0;
267 267 }
268 268 if (rev >= self->length) {
269 269 PyObject *tuple;
270 270 PyObject *pylong;
271 271 long ret;
272 272 tuple = PyList_GET_ITEM(self->added, rev - self->length);
273 273 pylong = PyTuple_GET_ITEM(tuple, 1);
274 274 ret = PyInt_AsLong(pylong);
275 275 if (ret == -1 && PyErr_Occurred()) {
276 276 return -1;
277 277 }
278 278 if (ret < 0 || ret > (long)INT_MAX) {
279 279 PyErr_Format(PyExc_OverflowError,
280 280 "revlog entry size out of bound (%ld)",
281 281 ret);
282 282 return -1;
283 283 }
284 284 return (int)ret;
285 285 } else {
286 286 const char *data = index_deref(self, rev);
287 287 int tmp = (int)getbe32(data + 8);
288 288 if (tmp < 0) {
289 289 PyErr_Format(PyExc_OverflowError,
290 290 "revlog entry size out of bound (%d)",
291 291 tmp);
292 292 return -1;
293 293 }
294 294 return tmp;
295 295 }
296 296 }
297 297
298 298 /*
299 299 * RevlogNG format (all in big endian, data may be inlined):
300 300 * 6 bytes: offset
301 301 * 2 bytes: flags
302 302 * 4 bytes: compressed length
303 303 * 4 bytes: uncompressed length
304 304 * 4 bytes: base revision
305 305 * 4 bytes: link revision
306 306 * 4 bytes: parent 1 revision
307 307 * 4 bytes: parent 2 revision
308 308 * 32 bytes: nodeid (only 20 bytes used)
309 309 */
310 310 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
311 311 {
312 312 uint64_t offset_flags;
313 313 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
314 314 const char *c_node_id;
315 315 const char *data;
316 316 Py_ssize_t length = index_length(self);
317 317 PyObject *entry;
318 318
319 319 if (pos == nullrev) {
320 320 Py_INCREF(nullentry);
321 321 return nullentry;
322 322 }
323 323
324 324 if (pos < 0 || pos >= length) {
325 325 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
326 326 return NULL;
327 327 }
328 328
329 329 if (pos >= self->length) {
330 330 PyObject *obj;
331 331 obj = PyList_GET_ITEM(self->added, pos - self->length);
332 332 Py_INCREF(obj);
333 333 return obj;
334 334 }
335 335
336 336 if (self->cache) {
337 337 if (self->cache[pos]) {
338 338 Py_INCREF(self->cache[pos]);
339 339 return self->cache[pos];
340 340 }
341 341 } else {
342 342 self->cache = calloc(self->raw_length, sizeof(PyObject *));
343 343 if (self->cache == NULL)
344 344 return PyErr_NoMemory();
345 345 }
346 346
347 347 data = index_deref(self, pos);
348 348 if (data == NULL)
349 349 return NULL;
350 350
351 351 offset_flags = getbe32(data + 4);
352 352 if (pos == 0) /* mask out version number for the first entry */
353 353 offset_flags &= 0xFFFF;
354 354 else {
355 355 uint32_t offset_high = getbe32(data);
356 356 offset_flags |= ((uint64_t)offset_high) << 32;
357 357 }
358 358
359 359 comp_len = getbe32(data + 8);
360 360 uncomp_len = getbe32(data + 12);
361 361 base_rev = getbe32(data + 16);
362 362 link_rev = getbe32(data + 20);
363 363 parent_1 = getbe32(data + 24);
364 364 parent_2 = getbe32(data + 28);
365 365 c_node_id = data + 32;
366 366
367 367 entry = Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
368 368 base_rev, link_rev, parent_1, parent_2, c_node_id,
369 369 (Py_ssize_t)20);
370 370
371 371 if (entry) {
372 372 PyObject_GC_UnTrack(entry);
373 373 Py_INCREF(entry);
374 374 }
375 375
376 376 self->cache[pos] = entry;
377 377
378 378 return entry;
379 379 }
380 380
381 381 /*
382 382 * Return the 20-byte SHA of the node corresponding to the given rev.
383 383 */
384 384 static const char *index_node(indexObject *self, Py_ssize_t pos)
385 385 {
386 386 Py_ssize_t length = index_length(self);
387 387 const char *data;
388 388
389 389 if (pos == nullrev)
390 390 return nullid;
391 391
392 392 if (pos >= length)
393 393 return NULL;
394 394
395 395 if (pos >= self->length) {
396 396 PyObject *tuple, *str;
397 397 tuple = PyList_GET_ITEM(self->added, pos - self->length);
398 398 str = PyTuple_GetItem(tuple, 7);
399 399 return str ? PyBytes_AS_STRING(str) : NULL;
400 400 }
401 401
402 402 data = index_deref(self, pos);
403 403 return data ? data + 32 : NULL;
404 404 }
405 405
406 406 /*
407 407 * Return the 20-byte SHA of the node corresponding to the given rev. The
408 408 * rev is assumed to be existing. If not, an exception is set.
409 409 */
410 410 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
411 411 {
412 412 const char *node = index_node(self, pos);
413 413 if (node == NULL) {
414 414 PyErr_Format(PyExc_IndexError, "could not access rev %d",
415 415 (int)pos);
416 416 }
417 417 return node;
418 418 }
419 419
420 420 static int nt_insert(nodetree *self, const char *node, int rev);
421 421
422 422 static int node_check(PyObject *obj, char **node)
423 423 {
424 424 Py_ssize_t nodelen;
425 425 if (PyBytes_AsStringAndSize(obj, node, &nodelen) == -1)
426 426 return -1;
427 427 if (nodelen == 20)
428 428 return 0;
429 429 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
430 430 return -1;
431 431 }
432 432
433 433 static PyObject *index_append(indexObject *self, PyObject *obj)
434 434 {
435 435 char *node;
436 436 Py_ssize_t len;
437 437
438 438 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
439 439 PyErr_SetString(PyExc_TypeError, "8-tuple required");
440 440 return NULL;
441 441 }
442 442
443 443 if (node_check(PyTuple_GET_ITEM(obj, 7), &node) == -1)
444 444 return NULL;
445 445
446 446 len = index_length(self);
447 447
448 448 if (self->added == NULL) {
449 449 self->added = PyList_New(0);
450 450 if (self->added == NULL)
451 451 return NULL;
452 452 }
453 453
454 454 if (PyList_Append(self->added, obj) == -1)
455 455 return NULL;
456 456
457 457 if (self->ntinitialized)
458 458 nt_insert(&self->nt, node, (int)len);
459 459
460 460 Py_CLEAR(self->headrevs);
461 461 Py_RETURN_NONE;
462 462 }
463 463
464 464 static PyObject *index_stats(indexObject *self)
465 465 {
466 466 PyObject *obj = PyDict_New();
467 467 PyObject *s = NULL;
468 468 PyObject *t = NULL;
469 469
470 470 if (obj == NULL)
471 471 return NULL;
472 472
473 473 #define istat(__n, __d) \
474 474 do { \
475 475 s = PyBytes_FromString(__d); \
476 476 t = PyInt_FromSsize_t(self->__n); \
477 477 if (!s || !t) \
478 478 goto bail; \
479 479 if (PyDict_SetItem(obj, s, t) == -1) \
480 480 goto bail; \
481 481 Py_CLEAR(s); \
482 482 Py_CLEAR(t); \
483 483 } while (0)
484 484
485 485 if (self->added) {
486 486 Py_ssize_t len = PyList_GET_SIZE(self->added);
487 487 s = PyBytes_FromString("index entries added");
488 488 t = PyInt_FromSsize_t(len);
489 489 if (!s || !t)
490 490 goto bail;
491 491 if (PyDict_SetItem(obj, s, t) == -1)
492 492 goto bail;
493 493 Py_CLEAR(s);
494 494 Py_CLEAR(t);
495 495 }
496 496
497 497 if (self->raw_length != self->length)
498 498 istat(raw_length, "revs on disk");
499 499 istat(length, "revs in memory");
500 500 istat(ntlookups, "node trie lookups");
501 501 istat(ntmisses, "node trie misses");
502 502 istat(ntrev, "node trie last rev scanned");
503 503 if (self->ntinitialized) {
504 504 istat(nt.capacity, "node trie capacity");
505 505 istat(nt.depth, "node trie depth");
506 506 istat(nt.length, "node trie count");
507 507 istat(nt.splits, "node trie splits");
508 508 }
509 509
510 510 #undef istat
511 511
512 512 return obj;
513 513
514 514 bail:
515 515 Py_XDECREF(obj);
516 516 Py_XDECREF(s);
517 517 Py_XDECREF(t);
518 518 return NULL;
519 519 }
520 520
521 521 /*
522 522 * When we cache a list, we want to be sure the caller can't mutate
523 523 * the cached copy.
524 524 */
525 525 static PyObject *list_copy(PyObject *list)
526 526 {
527 527 Py_ssize_t len = PyList_GET_SIZE(list);
528 528 PyObject *newlist = PyList_New(len);
529 529 Py_ssize_t i;
530 530
531 531 if (newlist == NULL)
532 532 return NULL;
533 533
534 534 for (i = 0; i < len; i++) {
535 535 PyObject *obj = PyList_GET_ITEM(list, i);
536 536 Py_INCREF(obj);
537 537 PyList_SET_ITEM(newlist, i, obj);
538 538 }
539 539
540 540 return newlist;
541 541 }
542 542
543 543 static int check_filter(PyObject *filter, Py_ssize_t arg)
544 544 {
545 545 if (filter) {
546 546 PyObject *arglist, *result;
547 547 int isfiltered;
548 548
549 549 arglist = Py_BuildValue("(n)", arg);
550 550 if (!arglist) {
551 551 return -1;
552 552 }
553 553
554 554 result = PyEval_CallObject(filter, arglist);
555 555 Py_DECREF(arglist);
556 556 if (!result) {
557 557 return -1;
558 558 }
559 559
560 560 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
561 561 * same as this function, so we can just return it directly.*/
562 562 isfiltered = PyObject_IsTrue(result);
563 563 Py_DECREF(result);
564 564 return isfiltered;
565 565 } else {
566 566 return 0;
567 567 }
568 568 }
569 569
570 570 static Py_ssize_t add_roots_get_min(indexObject *self, PyObject *list,
571 571 Py_ssize_t marker, char *phases)
572 572 {
573 573 PyObject *iter = NULL;
574 574 PyObject *iter_item = NULL;
575 575 Py_ssize_t min_idx = index_length(self) + 2;
576 576 long iter_item_long;
577 577
578 578 if (PyList_GET_SIZE(list) != 0) {
579 579 iter = PyObject_GetIter(list);
580 580 if (iter == NULL)
581 581 return -2;
582 582 while ((iter_item = PyIter_Next(iter))) {
583 583 if (!pylong_to_long(iter_item, &iter_item_long)) {
584 584 Py_DECREF(iter_item);
585 585 return -2;
586 586 }
587 587 Py_DECREF(iter_item);
588 588 if (iter_item_long < min_idx)
589 589 min_idx = iter_item_long;
590 590 phases[iter_item_long] = (char)marker;
591 591 }
592 592 Py_DECREF(iter);
593 593 }
594 594
595 595 return min_idx;
596 596 }
597 597
598 598 static inline void set_phase_from_parents(char *phases, int parent_1,
599 599 int parent_2, Py_ssize_t i)
600 600 {
601 601 if (parent_1 >= 0 && phases[parent_1] > phases[i])
602 602 phases[i] = phases[parent_1];
603 603 if (parent_2 >= 0 && phases[parent_2] > phases[i])
604 604 phases[i] = phases[parent_2];
605 605 }
606 606
607 607 static PyObject *reachableroots2(indexObject *self, PyObject *args)
608 608 {
609 609
610 610 /* Input */
611 611 long minroot;
612 612 PyObject *includepatharg = NULL;
613 613 int includepath = 0;
614 614 /* heads and roots are lists */
615 615 PyObject *heads = NULL;
616 616 PyObject *roots = NULL;
617 617 PyObject *reachable = NULL;
618 618
619 619 PyObject *val;
620 620 Py_ssize_t len = index_length(self);
621 621 long revnum;
622 622 Py_ssize_t k;
623 623 Py_ssize_t i;
624 624 Py_ssize_t l;
625 625 int r;
626 626 int parents[2];
627 627
628 628 /* Internal data structure:
629 629 * tovisit: array of length len+1 (all revs + nullrev), filled upto
630 630 * lentovisit
631 631 *
632 632 * revstates: array of length len+1 (all revs + nullrev) */
633 633 int *tovisit = NULL;
634 634 long lentovisit = 0;
635 635 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
636 636 char *revstates = NULL;
637 637
638 638 /* Get arguments */
639 639 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
640 640 &PyList_Type, &roots, &PyBool_Type,
641 641 &includepatharg))
642 642 goto bail;
643 643
644 644 if (includepatharg == Py_True)
645 645 includepath = 1;
646 646
647 647 /* Initialize return set */
648 648 reachable = PyList_New(0);
649 649 if (reachable == NULL)
650 650 goto bail;
651 651
652 652 /* Initialize internal datastructures */
653 653 tovisit = (int *)malloc((len + 1) * sizeof(int));
654 654 if (tovisit == NULL) {
655 655 PyErr_NoMemory();
656 656 goto bail;
657 657 }
658 658
659 659 revstates = (char *)calloc(len + 1, 1);
660 660 if (revstates == NULL) {
661 661 PyErr_NoMemory();
662 662 goto bail;
663 663 }
664 664
665 665 l = PyList_GET_SIZE(roots);
666 666 for (i = 0; i < l; i++) {
667 667 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
668 668 if (revnum == -1 && PyErr_Occurred())
669 669 goto bail;
670 670 /* If root is out of range, e.g. wdir(), it must be unreachable
671 671 * from heads. So we can just ignore it. */
672 672 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
673 673 continue;
674 674 revstates[revnum + 1] |= RS_ROOT;
675 675 }
676 676
677 677 /* Populate tovisit with all the heads */
678 678 l = PyList_GET_SIZE(heads);
679 679 for (i = 0; i < l; i++) {
680 680 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
681 681 if (revnum == -1 && PyErr_Occurred())
682 682 goto bail;
683 683 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
684 684 PyErr_SetString(PyExc_IndexError, "head out of range");
685 685 goto bail;
686 686 }
687 687 if (!(revstates[revnum + 1] & RS_SEEN)) {
688 688 tovisit[lentovisit++] = (int)revnum;
689 689 revstates[revnum + 1] |= RS_SEEN;
690 690 }
691 691 }
692 692
693 693 /* Visit the tovisit list and find the reachable roots */
694 694 k = 0;
695 695 while (k < lentovisit) {
696 696 /* Add the node to reachable if it is a root*/
697 697 revnum = tovisit[k++];
698 698 if (revstates[revnum + 1] & RS_ROOT) {
699 699 revstates[revnum + 1] |= RS_REACHABLE;
700 700 val = PyInt_FromLong(revnum);
701 701 if (val == NULL)
702 702 goto bail;
703 703 r = PyList_Append(reachable, val);
704 704 Py_DECREF(val);
705 705 if (r < 0)
706 706 goto bail;
707 707 if (includepath == 0)
708 708 continue;
709 709 }
710 710
711 711 /* Add its parents to the list of nodes to visit */
712 712 if (revnum == nullrev)
713 713 continue;
714 714 r = index_get_parents(self, revnum, parents, (int)len - 1);
715 715 if (r < 0)
716 716 goto bail;
717 717 for (i = 0; i < 2; i++) {
718 718 if (!(revstates[parents[i] + 1] & RS_SEEN) &&
719 719 parents[i] >= minroot) {
720 720 tovisit[lentovisit++] = parents[i];
721 721 revstates[parents[i] + 1] |= RS_SEEN;
722 722 }
723 723 }
724 724 }
725 725
726 726 /* Find all the nodes in between the roots we found and the heads
727 727 * and add them to the reachable set */
728 728 if (includepath == 1) {
729 729 long minidx = minroot;
730 730 if (minidx < 0)
731 731 minidx = 0;
732 732 for (i = minidx; i < len; i++) {
733 733 if (!(revstates[i + 1] & RS_SEEN))
734 734 continue;
735 735 r = index_get_parents(self, i, parents, (int)len - 1);
736 736 /* Corrupted index file, error is set from
737 737 * index_get_parents */
738 738 if (r < 0)
739 739 goto bail;
740 740 if (((revstates[parents[0] + 1] |
741 741 revstates[parents[1] + 1]) &
742 742 RS_REACHABLE) &&
743 743 !(revstates[i + 1] & RS_REACHABLE)) {
744 744 revstates[i + 1] |= RS_REACHABLE;
745 745 val = PyInt_FromSsize_t(i);
746 746 if (val == NULL)
747 747 goto bail;
748 748 r = PyList_Append(reachable, val);
749 749 Py_DECREF(val);
750 750 if (r < 0)
751 751 goto bail;
752 752 }
753 753 }
754 754 }
755 755
756 756 free(revstates);
757 757 free(tovisit);
758 758 return reachable;
759 759 bail:
760 760 Py_XDECREF(reachable);
761 761 free(revstates);
762 762 free(tovisit);
763 763 return NULL;
764 764 }
765 765
766 766 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
767 767 {
768 768 PyObject *roots = Py_None;
769 769 PyObject *ret = NULL;
770 770 PyObject *phasessize = NULL;
771 771 PyObject *phaseroots = NULL;
772 772 PyObject *phaseset = NULL;
773 773 PyObject *phasessetlist = NULL;
774 774 PyObject *rev = NULL;
775 775 Py_ssize_t len = index_length(self);
776 776 Py_ssize_t numphase = 0;
777 777 Py_ssize_t minrevallphases = 0;
778 778 Py_ssize_t minrevphase = 0;
779 779 Py_ssize_t i = 0;
780 780 char *phases = NULL;
781 781 long phase;
782 782
783 783 if (!PyArg_ParseTuple(args, "O", &roots))
784 784 goto done;
785 785 if (roots == NULL || !PyList_Check(roots)) {
786 786 PyErr_SetString(PyExc_TypeError, "roots must be a list");
787 787 goto done;
788 788 }
789 789
790 790 phases = calloc(
791 791 len, 1); /* phase per rev: {0: public, 1: draft, 2: secret} */
792 792 if (phases == NULL) {
793 793 PyErr_NoMemory();
794 794 goto done;
795 795 }
796 796 /* Put the phase information of all the roots in phases */
797 797 numphase = PyList_GET_SIZE(roots) + 1;
798 798 minrevallphases = len + 1;
799 799 phasessetlist = PyList_New(numphase);
800 800 if (phasessetlist == NULL)
801 801 goto done;
802 802
803 803 PyList_SET_ITEM(phasessetlist, 0, Py_None);
804 804 Py_INCREF(Py_None);
805 805
806 806 for (i = 0; i < numphase - 1; i++) {
807 807 phaseroots = PyList_GET_ITEM(roots, i);
808 808 phaseset = PySet_New(NULL);
809 809 if (phaseset == NULL)
810 810 goto release;
811 811 PyList_SET_ITEM(phasessetlist, i + 1, phaseset);
812 812 if (!PyList_Check(phaseroots)) {
813 813 PyErr_SetString(PyExc_TypeError,
814 814 "roots item must be a list");
815 815 goto release;
816 816 }
817 817 minrevphase =
818 818 add_roots_get_min(self, phaseroots, i + 1, phases);
819 819 if (minrevphase == -2) /* Error from add_roots_get_min */
820 820 goto release;
821 821 minrevallphases = MIN(minrevallphases, minrevphase);
822 822 }
823 823 /* Propagate the phase information from the roots to the revs */
824 824 if (minrevallphases != -1) {
825 825 int parents[2];
826 826 for (i = minrevallphases; i < len; i++) {
827 827 if (index_get_parents(self, i, parents, (int)len - 1) <
828 828 0)
829 829 goto release;
830 830 set_phase_from_parents(phases, parents[0], parents[1],
831 831 i);
832 832 }
833 833 }
834 834 /* Transform phase list to a python list */
835 835 phasessize = PyInt_FromSsize_t(len);
836 836 if (phasessize == NULL)
837 837 goto release;
838 838 for (i = 0; i < len; i++) {
839 839 phase = phases[i];
840 840 /* We only store the sets of phase for non public phase, the
841 841 * public phase is computed as a difference */
842 842 if (phase != 0) {
843 843 phaseset = PyList_GET_ITEM(phasessetlist, phase);
844 844 rev = PyInt_FromSsize_t(i);
845 845 if (rev == NULL)
846 846 goto release;
847 847 PySet_Add(phaseset, rev);
848 848 Py_XDECREF(rev);
849 849 }
850 850 }
851 851 ret = PyTuple_Pack(2, phasessize, phasessetlist);
852 852
853 853 release:
854 854 Py_XDECREF(phasessize);
855 855 Py_XDECREF(phasessetlist);
856 856 done:
857 857 free(phases);
858 858 return ret;
859 859 }
860 860
861 861 static PyObject *index_headrevs(indexObject *self, PyObject *args)
862 862 {
863 863 Py_ssize_t i, j, len;
864 864 char *nothead = NULL;
865 865 PyObject *heads = NULL;
866 866 PyObject *filter = NULL;
867 867 PyObject *filteredrevs = Py_None;
868 868
869 869 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
870 870 return NULL;
871 871 }
872 872
873 873 if (self->headrevs && filteredrevs == self->filteredrevs)
874 874 return list_copy(self->headrevs);
875 875
876 876 Py_DECREF(self->filteredrevs);
877 877 self->filteredrevs = filteredrevs;
878 878 Py_INCREF(filteredrevs);
879 879
880 880 if (filteredrevs != Py_None) {
881 881 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
882 882 if (!filter) {
883 883 PyErr_SetString(
884 884 PyExc_TypeError,
885 885 "filteredrevs has no attribute __contains__");
886 886 goto bail;
887 887 }
888 888 }
889 889
890 890 len = index_length(self);
891 891 heads = PyList_New(0);
892 892 if (heads == NULL)
893 893 goto bail;
894 894 if (len == 0) {
895 895 PyObject *nullid = PyInt_FromLong(-1);
896 896 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
897 897 Py_XDECREF(nullid);
898 898 goto bail;
899 899 }
900 900 goto done;
901 901 }
902 902
903 903 nothead = calloc(len, 1);
904 904 if (nothead == NULL) {
905 905 PyErr_NoMemory();
906 906 goto bail;
907 907 }
908 908
909 909 for (i = len - 1; i >= 0; i--) {
910 910 int isfiltered;
911 911 int parents[2];
912 912
913 913 /* If nothead[i] == 1, it means we've seen an unfiltered child
914 914 * of this node already, and therefore this node is not
915 915 * filtered. So we can skip the expensive check_filter step.
916 916 */
917 917 if (nothead[i] != 1) {
918 918 isfiltered = check_filter(filter, i);
919 919 if (isfiltered == -1) {
920 920 PyErr_SetString(PyExc_TypeError,
921 921 "unable to check filter");
922 922 goto bail;
923 923 }
924 924
925 925 if (isfiltered) {
926 926 nothead[i] = 1;
927 927 continue;
928 928 }
929 929 }
930 930
931 931 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
932 932 goto bail;
933 933 for (j = 0; j < 2; j++) {
934 934 if (parents[j] >= 0)
935 935 nothead[parents[j]] = 1;
936 936 }
937 937 }
938 938
939 939 for (i = 0; i < len; i++) {
940 940 PyObject *head;
941 941
942 942 if (nothead[i])
943 943 continue;
944 944 head = PyInt_FromSsize_t(i);
945 945 if (head == NULL || PyList_Append(heads, head) == -1) {
946 946 Py_XDECREF(head);
947 947 goto bail;
948 948 }
949 949 }
950 950
951 951 done:
952 952 self->headrevs = heads;
953 953 Py_XDECREF(filter);
954 954 free(nothead);
955 955 return list_copy(self->headrevs);
956 956 bail:
957 957 Py_XDECREF(filter);
958 958 Py_XDECREF(heads);
959 959 free(nothead);
960 960 return NULL;
961 961 }
962 962
963 963 /**
964 964 * Obtain the base revision index entry.
965 965 *
966 966 * Callers must ensure that rev >= 0 or illegal memory access may occur.
967 967 */
968 968 static inline int index_baserev(indexObject *self, int rev)
969 969 {
970 970 const char *data;
971 971 int result;
972 972
973 973 if (rev >= self->length) {
974 974 PyObject *tuple =
975 975 PyList_GET_ITEM(self->added, rev - self->length);
976 976 long ret;
977 977 if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 3), &ret)) {
978 978 return -2;
979 979 }
980 980 result = (int)ret;
981 981 } else {
982 982 data = index_deref(self, rev);
983 983 if (data == NULL) {
984 984 return -2;
985 985 }
986 986
987 987 result = getbe32(data + 16);
988 988 }
989 989 if (result > rev) {
990 990 PyErr_Format(
991 991 PyExc_ValueError,
992 992 "corrupted revlog, revision base above revision: %d, %d",
993 993 rev, result);
994 994 return -2;
995 995 }
996 996 if (result < -1) {
997 997 PyErr_Format(
998 998 PyExc_ValueError,
999 999 "corrupted revlog, revision base out of range: %d, %d", rev,
1000 1000 result);
1001 1001 return -2;
1002 1002 }
1003 1003 return result;
1004 1004 }
1005 1005
1006 1006 /**
1007 1007 * Find if a revision is a snapshot or not
1008 1008 *
1009 1009 * Only relevant for sparse-revlog case.
1010 1010 * Callers must ensure that rev is in a valid range.
1011 1011 */
1012 1012 static int index_issnapshotrev(indexObject *self, Py_ssize_t rev)
1013 1013 {
1014 1014 int ps[2];
1015 1015 Py_ssize_t base;
1016 1016 while (rev >= 0) {
1017 1017 base = (Py_ssize_t)index_baserev(self, rev);
1018 1018 if (base == rev) {
1019 1019 base = -1;
1020 1020 }
1021 1021 if (base == -2) {
1022 1022 assert(PyErr_Occurred());
1023 1023 return -1;
1024 1024 }
1025 1025 if (base == -1) {
1026 1026 return 1;
1027 1027 }
1028 1028 if (index_get_parents(self, rev, ps, (int)rev) < 0) {
1029 1029 assert(PyErr_Occurred());
1030 1030 return -1;
1031 1031 };
1032 1032 if (base == ps[0] || base == ps[1]) {
1033 1033 return 0;
1034 1034 }
1035 1035 rev = base;
1036 1036 }
1037 1037 return rev == -1;
1038 1038 }
1039 1039
1040 1040 static PyObject *index_issnapshot(indexObject *self, PyObject *value)
1041 1041 {
1042 1042 long rev;
1043 1043 int issnap;
1044 1044 Py_ssize_t length = index_length(self);
1045 1045
1046 1046 if (!pylong_to_long(value, &rev)) {
1047 1047 return NULL;
1048 1048 }
1049 1049 if (rev < -1 || rev >= length) {
1050 1050 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
1051 1051 rev);
1052 1052 return NULL;
1053 1053 };
1054 1054 issnap = index_issnapshotrev(self, (Py_ssize_t)rev);
1055 1055 if (issnap < 0) {
1056 1056 return NULL;
1057 1057 };
1058 1058 return PyBool_FromLong((long)issnap);
1059 1059 }
1060 1060
1061 1061 static PyObject *index_findsnapshots(indexObject *self, PyObject *args)
1062 1062 {
1063 1063 Py_ssize_t start_rev;
1064 1064 PyObject *cache;
1065 1065 Py_ssize_t base;
1066 1066 Py_ssize_t rev;
1067 1067 PyObject *key = NULL;
1068 1068 PyObject *value = NULL;
1069 1069 const Py_ssize_t length = index_length(self);
1070 1070 if (!PyArg_ParseTuple(args, "O!n", &PyDict_Type, &cache, &start_rev)) {
1071 1071 return NULL;
1072 1072 }
1073 1073 for (rev = start_rev; rev < length; rev++) {
1074 1074 int issnap;
1075 1075 PyObject *allvalues = NULL;
1076 1076 issnap = index_issnapshotrev(self, rev);
1077 1077 if (issnap < 0) {
1078 1078 goto bail;
1079 1079 }
1080 1080 if (issnap == 0) {
1081 1081 continue;
1082 1082 }
1083 1083 base = (Py_ssize_t)index_baserev(self, rev);
1084 1084 if (base == rev) {
1085 1085 base = -1;
1086 1086 }
1087 1087 if (base == -2) {
1088 1088 assert(PyErr_Occurred());
1089 1089 goto bail;
1090 1090 }
1091 1091 key = PyInt_FromSsize_t(base);
1092 1092 allvalues = PyDict_GetItem(cache, key);
1093 1093 if (allvalues == NULL && PyErr_Occurred()) {
1094 1094 goto bail;
1095 1095 }
1096 1096 if (allvalues == NULL) {
1097 1097 int r;
1098 1098 allvalues = PyList_New(0);
1099 1099 if (!allvalues) {
1100 1100 goto bail;
1101 1101 }
1102 1102 r = PyDict_SetItem(cache, key, allvalues);
1103 1103 Py_DECREF(allvalues);
1104 1104 if (r < 0) {
1105 1105 goto bail;
1106 1106 }
1107 1107 }
1108 1108 value = PyInt_FromSsize_t(rev);
1109 1109 if (PyList_Append(allvalues, value)) {
1110 1110 goto bail;
1111 1111 }
1112 1112 Py_CLEAR(key);
1113 1113 Py_CLEAR(value);
1114 1114 }
1115 1115 Py_RETURN_NONE;
1116 1116 bail:
1117 1117 Py_XDECREF(key);
1118 1118 Py_XDECREF(value);
1119 1119 return NULL;
1120 1120 }
1121 1121
1122 1122 static PyObject *index_deltachain(indexObject *self, PyObject *args)
1123 1123 {
1124 1124 int rev, generaldelta;
1125 1125 PyObject *stoparg;
1126 1126 int stoprev, iterrev, baserev = -1;
1127 1127 int stopped;
1128 1128 PyObject *chain = NULL, *result = NULL;
1129 1129 const Py_ssize_t length = index_length(self);
1130 1130
1131 1131 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
1132 1132 return NULL;
1133 1133 }
1134 1134
1135 1135 if (PyInt_Check(stoparg)) {
1136 1136 stoprev = (int)PyInt_AsLong(stoparg);
1137 1137 if (stoprev == -1 && PyErr_Occurred()) {
1138 1138 return NULL;
1139 1139 }
1140 1140 } else if (stoparg == Py_None) {
1141 1141 stoprev = -2;
1142 1142 } else {
1143 1143 PyErr_SetString(PyExc_ValueError,
1144 1144 "stoprev must be integer or None");
1145 1145 return NULL;
1146 1146 }
1147 1147
1148 1148 if (rev < 0 || rev >= length) {
1149 1149 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1150 1150 return NULL;
1151 1151 }
1152 1152
1153 1153 chain = PyList_New(0);
1154 1154 if (chain == NULL) {
1155 1155 return NULL;
1156 1156 }
1157 1157
1158 1158 baserev = index_baserev(self, rev);
1159 1159
1160 1160 /* This should never happen. */
1161 1161 if (baserev <= -2) {
1162 1162 /* Error should be set by index_deref() */
1163 1163 assert(PyErr_Occurred());
1164 1164 goto bail;
1165 1165 }
1166 1166
1167 1167 iterrev = rev;
1168 1168
1169 1169 while (iterrev != baserev && iterrev != stoprev) {
1170 1170 PyObject *value = PyInt_FromLong(iterrev);
1171 1171 if (value == NULL) {
1172 1172 goto bail;
1173 1173 }
1174 1174 if (PyList_Append(chain, value)) {
1175 1175 Py_DECREF(value);
1176 1176 goto bail;
1177 1177 }
1178 1178 Py_DECREF(value);
1179 1179
1180 1180 if (generaldelta) {
1181 1181 iterrev = baserev;
1182 1182 } else {
1183 1183 iterrev--;
1184 1184 }
1185 1185
1186 1186 if (iterrev < 0) {
1187 1187 break;
1188 1188 }
1189 1189
1190 1190 if (iterrev >= length) {
1191 1191 PyErr_SetString(PyExc_IndexError,
1192 1192 "revision outside index");
1193 1193 return NULL;
1194 1194 }
1195 1195
1196 1196 baserev = index_baserev(self, iterrev);
1197 1197
1198 1198 /* This should never happen. */
1199 1199 if (baserev <= -2) {
1200 1200 /* Error should be set by index_deref() */
1201 1201 assert(PyErr_Occurred());
1202 1202 goto bail;
1203 1203 }
1204 1204 }
1205 1205
1206 1206 if (iterrev == stoprev) {
1207 1207 stopped = 1;
1208 1208 } else {
1209 1209 PyObject *value = PyInt_FromLong(iterrev);
1210 1210 if (value == NULL) {
1211 1211 goto bail;
1212 1212 }
1213 1213 if (PyList_Append(chain, value)) {
1214 1214 Py_DECREF(value);
1215 1215 goto bail;
1216 1216 }
1217 1217 Py_DECREF(value);
1218 1218
1219 1219 stopped = 0;
1220 1220 }
1221 1221
1222 1222 if (PyList_Reverse(chain)) {
1223 1223 goto bail;
1224 1224 }
1225 1225
1226 1226 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
1227 1227 Py_DECREF(chain);
1228 1228 return result;
1229 1229
1230 1230 bail:
1231 1231 Py_DECREF(chain);
1232 1232 return NULL;
1233 1233 }
1234 1234
1235 1235 static inline int64_t
1236 1236 index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
1237 1237 {
1238 1238 int64_t start_offset;
1239 1239 int64_t end_offset;
1240 1240 int end_size;
1241 1241 start_offset = index_get_start(self, start_rev);
1242 1242 if (start_offset < 0) {
1243 1243 return -1;
1244 1244 }
1245 1245 end_offset = index_get_start(self, end_rev);
1246 1246 if (end_offset < 0) {
1247 1247 return -1;
1248 1248 }
1249 1249 end_size = index_get_length(self, end_rev);
1250 1250 if (end_size < 0) {
1251 1251 return -1;
1252 1252 }
1253 1253 if (end_offset < start_offset) {
1254 1254 PyErr_Format(PyExc_ValueError,
1255 1255 "corrupted revlog index: inconsistent offset "
1256 1256 "between revisions (%zd) and (%zd)",
1257 1257 start_rev, end_rev);
1258 1258 return -1;
1259 1259 }
1260 1260 return (end_offset - start_offset) + (int64_t)end_size;
1261 1261 }
1262 1262
1263 1263 /* returns endidx so that revs[startidx:endidx] has no empty trailing revs */
1264 1264 static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
1265 1265 Py_ssize_t startidx, Py_ssize_t endidx)
1266 1266 {
1267 1267 int length;
1268 1268 while (endidx > 1 && endidx > startidx) {
1269 1269 length = index_get_length(self, revs[endidx - 1]);
1270 1270 if (length < 0) {
1271 1271 return -1;
1272 1272 }
1273 1273 if (length != 0) {
1274 1274 break;
1275 1275 }
1276 1276 endidx -= 1;
1277 1277 }
1278 1278 return endidx;
1279 1279 }
1280 1280
1281 1281 struct Gap {
1282 1282 int64_t size;
1283 1283 Py_ssize_t idx;
1284 1284 };
1285 1285
1286 1286 static int gap_compare(const void *left, const void *right)
1287 1287 {
1288 1288 const struct Gap *l_left = ((const struct Gap *)left);
1289 1289 const struct Gap *l_right = ((const struct Gap *)right);
1290 1290 if (l_left->size < l_right->size) {
1291 1291 return -1;
1292 1292 } else if (l_left->size > l_right->size) {
1293 1293 return 1;
1294 1294 }
1295 1295 return 0;
1296 1296 }
1297 1297 static int Py_ssize_t_compare(const void *left, const void *right)
1298 1298 {
1299 1299 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1300 1300 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1301 1301 if (l_left < l_right) {
1302 1302 return -1;
1303 1303 } else if (l_left > l_right) {
1304 1304 return 1;
1305 1305 }
1306 1306 return 0;
1307 1307 }
1308 1308
1309 1309 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1310 1310 {
1311 1311 /* method arguments */
1312 1312 PyObject *list_revs = NULL; /* revisions in the chain */
1313 1313 double targetdensity = 0; /* min density to achieve */
1314 1314 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1315 1315
1316 1316 /* other core variables */
1317 1317 Py_ssize_t idxlen = index_length(self);
1318 1318 Py_ssize_t i; /* used for various iteration */
1319 1319 PyObject *result = NULL; /* the final return of the function */
1320 1320
1321 1321 /* generic information about the delta chain being slice */
1322 1322 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1323 1323 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1324 1324 int64_t chainpayload = 0; /* sum of all delta in the chain */
1325 1325 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1326 1326
1327 1327 /* variable used for slicing the delta chain */
1328 1328 int64_t readdata = 0; /* amount of data currently planned to be read */
1329 1329 double density = 0; /* ration of payload data compared to read ones */
1330 1330 int64_t previous_end;
1331 1331 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1332 1332 Py_ssize_t num_gaps =
1333 1333 0; /* total number of notable gap recorded so far */
1334 1334 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1335 1335 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1336 1336 PyObject *chunk = NULL; /* individual slice */
1337 1337 PyObject *allchunks = NULL; /* all slices */
1338 1338 Py_ssize_t previdx;
1339 1339
1340 1340 /* parsing argument */
1341 1341 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1342 1342 &targetdensity, &mingapsize)) {
1343 1343 goto bail;
1344 1344 }
1345 1345
1346 1346 /* If the delta chain contains a single element, we do not need slicing
1347 1347 */
1348 1348 num_revs = PyList_GET_SIZE(list_revs);
1349 1349 if (num_revs <= 1) {
1350 1350 result = PyTuple_Pack(1, list_revs);
1351 1351 goto done;
1352 1352 }
1353 1353
1354 1354 /* Turn the python list into a native integer array (for efficiency) */
1355 1355 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1356 1356 if (revs == NULL) {
1357 1357 PyErr_NoMemory();
1358 1358 goto bail;
1359 1359 }
1360 1360 for (i = 0; i < num_revs; i++) {
1361 1361 Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i));
1362 1362 if (revnum == -1 && PyErr_Occurred()) {
1363 1363 goto bail;
1364 1364 }
1365 1365 if (revnum < nullrev || revnum >= idxlen) {
1366 1366 PyErr_Format(PyExc_IndexError,
1367 1367 "index out of range: %zd", revnum);
1368 1368 goto bail;
1369 1369 }
1370 1370 revs[i] = revnum;
1371 1371 }
1372 1372
1373 1373 /* Compute and check various property of the unsliced delta chain */
1374 1374 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1375 1375 if (deltachainspan < 0) {
1376 1376 goto bail;
1377 1377 }
1378 1378
1379 1379 if (deltachainspan <= mingapsize) {
1380 1380 result = PyTuple_Pack(1, list_revs);
1381 1381 goto done;
1382 1382 }
1383 1383 chainpayload = 0;
1384 1384 for (i = 0; i < num_revs; i++) {
1385 1385 int tmp = index_get_length(self, revs[i]);
1386 1386 if (tmp < 0) {
1387 1387 goto bail;
1388 1388 }
1389 1389 chainpayload += tmp;
1390 1390 }
1391 1391
1392 1392 readdata = deltachainspan;
1393 1393 density = 1.0;
1394 1394
1395 1395 if (0 < deltachainspan) {
1396 1396 density = (double)chainpayload / (double)deltachainspan;
1397 1397 }
1398 1398
1399 1399 if (density >= targetdensity) {
1400 1400 result = PyTuple_Pack(1, list_revs);
1401 1401 goto done;
1402 1402 }
1403 1403
1404 1404 /* if chain is too sparse, look for relevant gaps */
1405 1405 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1406 1406 if (gaps == NULL) {
1407 1407 PyErr_NoMemory();
1408 1408 goto bail;
1409 1409 }
1410 1410
1411 1411 previous_end = -1;
1412 1412 for (i = 0; i < num_revs; i++) {
1413 1413 int64_t revstart;
1414 1414 int revsize;
1415 1415 revstart = index_get_start(self, revs[i]);
1416 1416 if (revstart < 0) {
1417 1417 goto bail;
1418 1418 };
1419 1419 revsize = index_get_length(self, revs[i]);
1420 1420 if (revsize < 0) {
1421 1421 goto bail;
1422 1422 };
1423 1423 if (revsize == 0) {
1424 1424 continue;
1425 1425 }
1426 1426 if (previous_end >= 0) {
1427 1427 int64_t gapsize = revstart - previous_end;
1428 1428 if (gapsize > mingapsize) {
1429 1429 gaps[num_gaps].size = gapsize;
1430 1430 gaps[num_gaps].idx = i;
1431 1431 num_gaps += 1;
1432 1432 }
1433 1433 }
1434 1434 previous_end = revstart + revsize;
1435 1435 }
1436 1436 if (num_gaps == 0) {
1437 1437 result = PyTuple_Pack(1, list_revs);
1438 1438 goto done;
1439 1439 }
1440 1440 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
1441 1441
1442 1442 /* Slice the largest gap first, they improve the density the most */
1443 1443 selected_indices =
1444 1444 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
1445 1445 if (selected_indices == NULL) {
1446 1446 PyErr_NoMemory();
1447 1447 goto bail;
1448 1448 }
1449 1449
1450 1450 for (i = num_gaps - 1; i >= 0; i--) {
1451 1451 selected_indices[num_selected] = gaps[i].idx;
1452 1452 readdata -= gaps[i].size;
1453 1453 num_selected += 1;
1454 1454 if (readdata <= 0) {
1455 1455 density = 1.0;
1456 1456 } else {
1457 1457 density = (double)chainpayload / (double)readdata;
1458 1458 }
1459 1459 if (density >= targetdensity) {
1460 1460 break;
1461 1461 }
1462 1462 }
1463 1463 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
1464 1464 &Py_ssize_t_compare);
1465 1465
1466 1466 /* create the resulting slice */
1467 1467 allchunks = PyList_New(0);
1468 1468 if (allchunks == NULL) {
1469 1469 goto bail;
1470 1470 }
1471 1471 previdx = 0;
1472 1472 selected_indices[num_selected] = num_revs;
1473 1473 for (i = 0; i <= num_selected; i++) {
1474 1474 Py_ssize_t idx = selected_indices[i];
1475 1475 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
1476 1476 if (endidx < 0) {
1477 1477 goto bail;
1478 1478 }
1479 1479 if (previdx < endidx) {
1480 1480 chunk = PyList_GetSlice(list_revs, previdx, endidx);
1481 1481 if (chunk == NULL) {
1482 1482 goto bail;
1483 1483 }
1484 1484 if (PyList_Append(allchunks, chunk) == -1) {
1485 1485 goto bail;
1486 1486 }
1487 1487 Py_DECREF(chunk);
1488 1488 chunk = NULL;
1489 1489 }
1490 1490 previdx = idx;
1491 1491 }
1492 1492 result = allchunks;
1493 1493 goto done;
1494 1494
1495 1495 bail:
1496 1496 Py_XDECREF(allchunks);
1497 1497 Py_XDECREF(chunk);
1498 1498 done:
1499 1499 free(revs);
1500 1500 free(gaps);
1501 1501 free(selected_indices);
1502 1502 return result;
1503 1503 }
1504 1504
1505 1505 static inline int nt_level(const char *node, Py_ssize_t level)
1506 1506 {
1507 1507 int v = node[level >> 1];
1508 1508 if (!(level & 1))
1509 1509 v >>= 4;
1510 1510 return v & 0xf;
1511 1511 }
1512 1512
1513 1513 /*
1514 1514 * Return values:
1515 1515 *
1516 1516 * -4: match is ambiguous (multiple candidates)
1517 1517 * -2: not found
1518 1518 * rest: valid rev
1519 1519 */
1520 1520 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
1521 1521 int hex)
1522 1522 {
1523 1523 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1524 1524 int level, maxlevel, off;
1525 1525
1526 1526 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
1527 1527 return -1;
1528 1528
1529 1529 if (hex)
1530 1530 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
1531 1531 else
1532 1532 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
1533 1533
1534 1534 for (level = off = 0; level < maxlevel; level++) {
1535 1535 int k = getnybble(node, level);
1536 1536 nodetreenode *n = &self->nodes[off];
1537 1537 int v = n->children[k];
1538 1538
1539 1539 if (v < 0) {
1540 1540 const char *n;
1541 1541 Py_ssize_t i;
1542 1542
1543 1543 v = -(v + 2);
1544 1544 n = index_node(self->index, v);
1545 1545 if (n == NULL)
1546 1546 return -2;
1547 1547 for (i = level; i < maxlevel; i++)
1548 1548 if (getnybble(node, i) != nt_level(n, i))
1549 1549 return -2;
1550 1550 return v;
1551 1551 }
1552 1552 if (v == 0)
1553 1553 return -2;
1554 1554 off = v;
1555 1555 }
1556 1556 /* multiple matches against an ambiguous prefix */
1557 1557 return -4;
1558 1558 }
1559 1559
1560 1560 static int nt_new(nodetree *self)
1561 1561 {
1562 1562 if (self->length == self->capacity) {
1563 1563 unsigned newcapacity;
1564 1564 nodetreenode *newnodes;
1565 1565 newcapacity = self->capacity * 2;
1566 1566 if (newcapacity >= INT_MAX / sizeof(nodetreenode)) {
1567 1567 PyErr_SetString(PyExc_MemoryError,
1568 1568 "overflow in nt_new");
1569 1569 return -1;
1570 1570 }
1571 1571 newnodes =
1572 1572 realloc(self->nodes, newcapacity * sizeof(nodetreenode));
1573 1573 if (newnodes == NULL) {
1574 1574 PyErr_SetString(PyExc_MemoryError, "out of memory");
1575 1575 return -1;
1576 1576 }
1577 1577 self->capacity = newcapacity;
1578 1578 self->nodes = newnodes;
1579 1579 memset(&self->nodes[self->length], 0,
1580 1580 sizeof(nodetreenode) * (self->capacity - self->length));
1581 1581 }
1582 1582 return self->length++;
1583 1583 }
1584 1584
1585 1585 static int nt_insert(nodetree *self, const char *node, int rev)
1586 1586 {
1587 1587 int level = 0;
1588 1588 int off = 0;
1589 1589
1590 1590 while (level < 40) {
1591 1591 int k = nt_level(node, level);
1592 1592 nodetreenode *n;
1593 1593 int v;
1594 1594
1595 1595 n = &self->nodes[off];
1596 1596 v = n->children[k];
1597 1597
1598 1598 if (v == 0) {
1599 1599 n->children[k] = -rev - 2;
1600 1600 return 0;
1601 1601 }
1602 1602 if (v < 0) {
1603 1603 const char *oldnode =
1604 1604 index_node_existing(self->index, -(v + 2));
1605 1605 int noff;
1606 1606
1607 1607 if (oldnode == NULL)
1608 1608 return -1;
1609 1609 if (!memcmp(oldnode, node, 20)) {
1610 1610 n->children[k] = -rev - 2;
1611 1611 return 0;
1612 1612 }
1613 1613 noff = nt_new(self);
1614 1614 if (noff == -1)
1615 1615 return -1;
1616 1616 /* self->nodes may have been changed by realloc */
1617 1617 self->nodes[off].children[k] = noff;
1618 1618 off = noff;
1619 1619 n = &self->nodes[off];
1620 1620 n->children[nt_level(oldnode, ++level)] = v;
1621 1621 if (level > self->depth)
1622 1622 self->depth = level;
1623 1623 self->splits += 1;
1624 1624 } else {
1625 1625 level += 1;
1626 1626 off = v;
1627 1627 }
1628 1628 }
1629 1629
1630 1630 return -1;
1631 1631 }
1632 1632
1633 1633 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
1634 1634 {
1635 1635 Py_ssize_t rev;
1636 1636 const char *node;
1637 1637 Py_ssize_t length;
1638 1638 if (!PyArg_ParseTuple(args, "n", &rev))
1639 1639 return NULL;
1640 1640 length = index_length(self->nt.index);
1641 1641 if (rev < 0 || rev >= length) {
1642 1642 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1643 1643 return NULL;
1644 1644 }
1645 1645 node = index_node_existing(self->nt.index, rev);
1646 1646 if (nt_insert(&self->nt, node, (int)rev) == -1)
1647 1647 return NULL;
1648 1648 Py_RETURN_NONE;
1649 1649 }
1650 1650
1651 1651 static int nt_delete_node(nodetree *self, const char *node)
1652 1652 {
1653 1653 /* rev==-2 happens to get encoded as 0, which is interpreted as not set
1654 1654 */
1655 1655 return nt_insert(self, node, -2);
1656 1656 }
1657 1657
1658 1658 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
1659 1659 {
1660 1660 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
1661 1661 self->nodes = NULL;
1662 1662
1663 1663 self->index = index;
1664 1664 /* The input capacity is in terms of revisions, while the field is in
1665 1665 * terms of nodetree nodes. */
1666 1666 self->capacity = (capacity < 4 ? 4 : capacity / 2);
1667 1667 self->depth = 0;
1668 1668 self->splits = 0;
1669 1669 if ((size_t)self->capacity > INT_MAX / sizeof(nodetreenode)) {
1670 1670 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
1671 1671 return -1;
1672 1672 }
1673 1673 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
1674 1674 if (self->nodes == NULL) {
1675 1675 PyErr_NoMemory();
1676 1676 return -1;
1677 1677 }
1678 1678 self->length = 1;
1679 1679 return 0;
1680 1680 }
1681 1681
1682 1682 static int ntobj_init(nodetreeObject *self, PyObject *args)
1683 1683 {
1684 1684 PyObject *index;
1685 1685 unsigned capacity;
1686 1686 if (!PyArg_ParseTuple(args, "O!I", &HgRevlogIndex_Type, &index,
1687 1687 &capacity))
1688 1688 return -1;
1689 1689 Py_INCREF(index);
1690 1690 return nt_init(&self->nt, (indexObject *)index, capacity);
1691 1691 }
1692 1692
1693 1693 static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
1694 1694 {
1695 1695 return nt_find(self, node, nodelen, 1);
1696 1696 }
1697 1697
1698 1698 /*
1699 1699 * Find the length of the shortest unique prefix of node.
1700 1700 *
1701 1701 * Return values:
1702 1702 *
1703 1703 * -3: error (exception set)
1704 1704 * -2: not found (no exception set)
1705 1705 * rest: length of shortest prefix
1706 1706 */
1707 1707 static int nt_shortest(nodetree *self, const char *node)
1708 1708 {
1709 1709 int level, off;
1710 1710
1711 1711 for (level = off = 0; level < 40; level++) {
1712 1712 int k, v;
1713 1713 nodetreenode *n = &self->nodes[off];
1714 1714 k = nt_level(node, level);
1715 1715 v = n->children[k];
1716 1716 if (v < 0) {
1717 1717 const char *n;
1718 1718 v = -(v + 2);
1719 1719 n = index_node_existing(self->index, v);
1720 1720 if (n == NULL)
1721 1721 return -3;
1722 1722 if (memcmp(node, n, 20) != 0)
1723 1723 /*
1724 1724 * Found a unique prefix, but it wasn't for the
1725 1725 * requested node (i.e the requested node does
1726 1726 * not exist).
1727 1727 */
1728 1728 return -2;
1729 1729 return level + 1;
1730 1730 }
1731 1731 if (v == 0)
1732 1732 return -2;
1733 1733 off = v;
1734 1734 }
1735 1735 /*
1736 1736 * The node was still not unique after 40 hex digits, so this won't
1737 1737 * happen. Also, if we get here, then there's a programming error in
1738 1738 * this file that made us insert a node longer than 40 hex digits.
1739 1739 */
1740 1740 PyErr_SetString(PyExc_Exception, "broken node tree");
1741 1741 return -3;
1742 1742 }
1743 1743
1744 1744 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
1745 1745 {
1746 1746 PyObject *val;
1747 1747 char *node;
1748 1748 int length;
1749 1749
1750 1750 if (!PyArg_ParseTuple(args, "O", &val))
1751 1751 return NULL;
1752 1752 if (node_check(val, &node) == -1)
1753 1753 return NULL;
1754 1754
1755 1755 length = nt_shortest(&self->nt, node);
1756 1756 if (length == -3)
1757 1757 return NULL;
1758 1758 if (length == -2) {
1759 1759 raise_revlog_error();
1760 1760 return NULL;
1761 1761 }
1762 1762 return PyInt_FromLong(length);
1763 1763 }
1764 1764
1765 1765 static void nt_dealloc(nodetree *self)
1766 1766 {
1767 1767 free(self->nodes);
1768 1768 self->nodes = NULL;
1769 1769 }
1770 1770
1771 1771 static void ntobj_dealloc(nodetreeObject *self)
1772 1772 {
1773 1773 Py_XDECREF(self->nt.index);
1774 1774 nt_dealloc(&self->nt);
1775 1775 PyObject_Del(self);
1776 1776 }
1777 1777
1778 1778 static PyMethodDef ntobj_methods[] = {
1779 1779 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
1780 1780 "insert an index entry"},
1781 1781 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
1782 1782 "find length of shortest hex nodeid of a binary ID"},
1783 1783 {NULL} /* Sentinel */
1784 1784 };
1785 1785
1786 1786 static PyTypeObject nodetreeType = {
1787 1787 PyVarObject_HEAD_INIT(NULL, 0) /* header */
1788 1788 "parsers.nodetree", /* tp_name */
1789 1789 sizeof(nodetreeObject), /* tp_basicsize */
1790 1790 0, /* tp_itemsize */
1791 1791 (destructor)ntobj_dealloc, /* tp_dealloc */
1792 1792 0, /* tp_print */
1793 1793 0, /* tp_getattr */
1794 1794 0, /* tp_setattr */
1795 1795 0, /* tp_compare */
1796 1796 0, /* tp_repr */
1797 1797 0, /* tp_as_number */
1798 1798 0, /* tp_as_sequence */
1799 1799 0, /* tp_as_mapping */
1800 1800 0, /* tp_hash */
1801 1801 0, /* tp_call */
1802 1802 0, /* tp_str */
1803 1803 0, /* tp_getattro */
1804 1804 0, /* tp_setattro */
1805 1805 0, /* tp_as_buffer */
1806 1806 Py_TPFLAGS_DEFAULT, /* tp_flags */
1807 1807 "nodetree", /* tp_doc */
1808 1808 0, /* tp_traverse */
1809 1809 0, /* tp_clear */
1810 1810 0, /* tp_richcompare */
1811 1811 0, /* tp_weaklistoffset */
1812 1812 0, /* tp_iter */
1813 1813 0, /* tp_iternext */
1814 1814 ntobj_methods, /* tp_methods */
1815 1815 0, /* tp_members */
1816 1816 0, /* tp_getset */
1817 1817 0, /* tp_base */
1818 1818 0, /* tp_dict */
1819 1819 0, /* tp_descr_get */
1820 1820 0, /* tp_descr_set */
1821 1821 0, /* tp_dictoffset */
1822 1822 (initproc)ntobj_init, /* tp_init */
1823 1823 0, /* tp_alloc */
1824 1824 };
1825 1825
1826 1826 static int index_init_nt(indexObject *self)
1827 1827 {
1828 1828 if (!self->ntinitialized) {
1829 1829 if (nt_init(&self->nt, self, (int)self->raw_length) == -1) {
1830 1830 nt_dealloc(&self->nt);
1831 1831 return -1;
1832 1832 }
1833 1833 if (nt_insert(&self->nt, nullid, -1) == -1) {
1834 1834 nt_dealloc(&self->nt);
1835 1835 return -1;
1836 1836 }
1837 1837 self->ntinitialized = 1;
1838 1838 self->ntrev = (int)index_length(self);
1839 1839 self->ntlookups = 1;
1840 1840 self->ntmisses = 0;
1841 1841 }
1842 1842 return 0;
1843 1843 }
1844 1844
1845 1845 /*
1846 1846 * Return values:
1847 1847 *
1848 1848 * -3: error (exception set)
1849 1849 * -2: not found (no exception set)
1850 1850 * rest: valid rev
1851 1851 */
1852 1852 static int index_find_node(indexObject *self, const char *node,
1853 1853 Py_ssize_t nodelen)
1854 1854 {
1855 1855 int rev;
1856 1856
1857 1857 if (index_init_nt(self) == -1)
1858 1858 return -3;
1859 1859
1860 1860 self->ntlookups++;
1861 1861 rev = nt_find(&self->nt, node, nodelen, 0);
1862 1862 if (rev >= -1)
1863 1863 return rev;
1864 1864
1865 1865 /*
1866 1866 * For the first handful of lookups, we scan the entire index,
1867 1867 * and cache only the matching nodes. This optimizes for cases
1868 1868 * like "hg tip", where only a few nodes are accessed.
1869 1869 *
1870 1870 * After that, we cache every node we visit, using a single
1871 1871 * scan amortized over multiple lookups. This gives the best
1872 1872 * bulk performance, e.g. for "hg log".
1873 1873 */
1874 1874 if (self->ntmisses++ < 4) {
1875 1875 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1876 1876 const char *n = index_node_existing(self, rev);
1877 1877 if (n == NULL)
1878 1878 return -3;
1879 1879 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1880 1880 if (nt_insert(&self->nt, n, rev) == -1)
1881 1881 return -3;
1882 1882 break;
1883 1883 }
1884 1884 }
1885 1885 } else {
1886 1886 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1887 1887 const char *n = index_node_existing(self, rev);
1888 1888 if (n == NULL)
1889 1889 return -3;
1890 1890 if (nt_insert(&self->nt, n, rev) == -1) {
1891 1891 self->ntrev = rev + 1;
1892 1892 return -3;
1893 1893 }
1894 1894 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1895 1895 break;
1896 1896 }
1897 1897 }
1898 1898 self->ntrev = rev;
1899 1899 }
1900 1900
1901 1901 if (rev >= 0)
1902 1902 return rev;
1903 1903 return -2;
1904 1904 }
1905 1905
1906 1906 static PyObject *index_getitem(indexObject *self, PyObject *value)
1907 1907 {
1908 1908 char *node;
1909 1909 int rev;
1910 1910
1911 1911 if (PyInt_Check(value)) {
1912 1912 long idx;
1913 1913 if (!pylong_to_long(value, &idx)) {
1914 1914 return NULL;
1915 1915 }
1916 1916 return index_get(self, idx);
1917 1917 }
1918 1918
1919 1919 if (node_check(value, &node) == -1)
1920 1920 return NULL;
1921 1921 rev = index_find_node(self, node, 20);
1922 1922 if (rev >= -1)
1923 1923 return PyInt_FromLong(rev);
1924 1924 if (rev == -2)
1925 1925 raise_revlog_error();
1926 1926 return NULL;
1927 1927 }
1928 1928
1929 1929 /*
1930 1930 * Fully populate the radix tree.
1931 1931 */
1932 1932 static int index_populate_nt(indexObject *self)
1933 1933 {
1934 1934 int rev;
1935 1935 if (self->ntrev > 0) {
1936 1936 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1937 1937 const char *n = index_node_existing(self, rev);
1938 1938 if (n == NULL)
1939 1939 return -1;
1940 1940 if (nt_insert(&self->nt, n, rev) == -1)
1941 1941 return -1;
1942 1942 }
1943 1943 self->ntrev = -1;
1944 1944 }
1945 1945 return 0;
1946 1946 }
1947 1947
1948 1948 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1949 1949 {
1950 1950 const char *fullnode;
1951 1951 Py_ssize_t nodelen;
1952 1952 char *node;
1953 1953 int rev, i;
1954 1954
1955 1955 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
1956 1956 return NULL;
1957 1957
1958 1958 if (nodelen < 1) {
1959 1959 PyErr_SetString(PyExc_ValueError, "key too short");
1960 1960 return NULL;
1961 1961 }
1962 1962
1963 1963 if (nodelen > 40) {
1964 1964 PyErr_SetString(PyExc_ValueError, "key too long");
1965 1965 return NULL;
1966 1966 }
1967 1967
1968 1968 for (i = 0; i < nodelen; i++)
1969 1969 hexdigit(node, i);
1970 1970 if (PyErr_Occurred()) {
1971 1971 /* input contains non-hex characters */
1972 1972 PyErr_Clear();
1973 1973 Py_RETURN_NONE;
1974 1974 }
1975 1975
1976 1976 if (index_init_nt(self) == -1)
1977 1977 return NULL;
1978 1978 if (index_populate_nt(self) == -1)
1979 1979 return NULL;
1980 1980 rev = nt_partialmatch(&self->nt, node, nodelen);
1981 1981
1982 1982 switch (rev) {
1983 1983 case -4:
1984 1984 raise_revlog_error();
1985 1985 return NULL;
1986 1986 case -2:
1987 1987 Py_RETURN_NONE;
1988 1988 case -1:
1989 1989 return PyBytes_FromStringAndSize(nullid, 20);
1990 1990 }
1991 1991
1992 1992 fullnode = index_node_existing(self, rev);
1993 1993 if (fullnode == NULL) {
1994 1994 return NULL;
1995 1995 }
1996 1996 return PyBytes_FromStringAndSize(fullnode, 20);
1997 1997 }
1998 1998
1999 1999 static PyObject *index_shortest(indexObject *self, PyObject *args)
2000 2000 {
2001 2001 PyObject *val;
2002 2002 char *node;
2003 2003 int length;
2004 2004
2005 2005 if (!PyArg_ParseTuple(args, "O", &val))
2006 2006 return NULL;
2007 2007 if (node_check(val, &node) == -1)
2008 2008 return NULL;
2009 2009
2010 2010 self->ntlookups++;
2011 2011 if (index_init_nt(self) == -1)
2012 2012 return NULL;
2013 2013 if (index_populate_nt(self) == -1)
2014 2014 return NULL;
2015 2015 length = nt_shortest(&self->nt, node);
2016 2016 if (length == -3)
2017 2017 return NULL;
2018 2018 if (length == -2) {
2019 2019 raise_revlog_error();
2020 2020 return NULL;
2021 2021 }
2022 2022 return PyInt_FromLong(length);
2023 2023 }
2024 2024
2025 2025 static PyObject *index_m_get(indexObject *self, PyObject *args)
2026 2026 {
2027 2027 PyObject *val;
2028 2028 char *node;
2029 2029 int rev;
2030 2030
2031 2031 if (!PyArg_ParseTuple(args, "O", &val))
2032 2032 return NULL;
2033 2033 if (node_check(val, &node) == -1)
2034 2034 return NULL;
2035 2035 rev = index_find_node(self, node, 20);
2036 2036 if (rev == -3)
2037 2037 return NULL;
2038 2038 if (rev == -2)
2039 2039 Py_RETURN_NONE;
2040 2040 return PyInt_FromLong(rev);
2041 2041 }
2042 2042
2043 2043 static int index_contains(indexObject *self, PyObject *value)
2044 2044 {
2045 2045 char *node;
2046 2046
2047 2047 if (PyInt_Check(value)) {
2048 2048 long rev;
2049 2049 if (!pylong_to_long(value, &rev)) {
2050 2050 return -1;
2051 2051 }
2052 2052 return rev >= -1 && rev < index_length(self);
2053 2053 }
2054 2054
2055 2055 if (node_check(value, &node) == -1)
2056 2056 return -1;
2057 2057
2058 2058 switch (index_find_node(self, node, 20)) {
2059 2059 case -3:
2060 2060 return -1;
2061 2061 case -2:
2062 2062 return 0;
2063 2063 default:
2064 2064 return 1;
2065 2065 }
2066 2066 }
2067 2067
2068 2068 static PyObject *index_m_has_node(indexObject *self, PyObject *args)
2069 2069 {
2070 2070 int ret = index_contains(self, args);
2071 2071 if (ret < 0)
2072 2072 return NULL;
2073 2073 return PyBool_FromLong((long)ret);
2074 2074 }
2075 2075
2076 2076 static PyObject *index_m_rev(indexObject *self, PyObject *val)
2077 2077 {
2078 2078 char *node;
2079 2079 int rev;
2080 2080
2081 2081 if (node_check(val, &node) == -1)
2082 2082 return NULL;
2083 2083 rev = index_find_node(self, node, 20);
2084 2084 if (rev >= -1)
2085 2085 return PyInt_FromLong(rev);
2086 2086 if (rev == -2)
2087 2087 raise_revlog_error();
2088 2088 return NULL;
2089 2089 }
2090 2090
2091 2091 typedef uint64_t bitmask;
2092 2092
2093 2093 /*
2094 2094 * Given a disjoint set of revs, return all candidates for the
2095 2095 * greatest common ancestor. In revset notation, this is the set
2096 2096 * "heads(::a and ::b and ...)"
2097 2097 */
2098 2098 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
2099 2099 int revcount)
2100 2100 {
2101 2101 const bitmask allseen = (1ull << revcount) - 1;
2102 2102 const bitmask poison = 1ull << revcount;
2103 2103 PyObject *gca = PyList_New(0);
2104 2104 int i, v, interesting;
2105 2105 int maxrev = -1;
2106 2106 bitmask sp;
2107 2107 bitmask *seen;
2108 2108
2109 2109 if (gca == NULL)
2110 2110 return PyErr_NoMemory();
2111 2111
2112 2112 for (i = 0; i < revcount; i++) {
2113 2113 if (revs[i] > maxrev)
2114 2114 maxrev = revs[i];
2115 2115 }
2116 2116
2117 2117 seen = calloc(sizeof(*seen), maxrev + 1);
2118 2118 if (seen == NULL) {
2119 2119 Py_DECREF(gca);
2120 2120 return PyErr_NoMemory();
2121 2121 }
2122 2122
2123 2123 for (i = 0; i < revcount; i++)
2124 2124 seen[revs[i]] = 1ull << i;
2125 2125
2126 2126 interesting = revcount;
2127 2127
2128 2128 for (v = maxrev; v >= 0 && interesting; v--) {
2129 2129 bitmask sv = seen[v];
2130 2130 int parents[2];
2131 2131
2132 2132 if (!sv)
2133 2133 continue;
2134 2134
2135 2135 if (sv < poison) {
2136 2136 interesting -= 1;
2137 2137 if (sv == allseen) {
2138 2138 PyObject *obj = PyInt_FromLong(v);
2139 2139 if (obj == NULL)
2140 2140 goto bail;
2141 2141 if (PyList_Append(gca, obj) == -1) {
2142 2142 Py_DECREF(obj);
2143 2143 goto bail;
2144 2144 }
2145 2145 sv |= poison;
2146 2146 for (i = 0; i < revcount; i++) {
2147 2147 if (revs[i] == v)
2148 2148 goto done;
2149 2149 }
2150 2150 }
2151 2151 }
2152 2152 if (index_get_parents(self, v, parents, maxrev) < 0)
2153 2153 goto bail;
2154 2154
2155 2155 for (i = 0; i < 2; i++) {
2156 2156 int p = parents[i];
2157 2157 if (p == -1)
2158 2158 continue;
2159 2159 sp = seen[p];
2160 2160 if (sv < poison) {
2161 2161 if (sp == 0) {
2162 2162 seen[p] = sv;
2163 2163 interesting++;
2164 2164 } else if (sp != sv)
2165 2165 seen[p] |= sv;
2166 2166 } else {
2167 2167 if (sp && sp < poison)
2168 2168 interesting--;
2169 2169 seen[p] = sv;
2170 2170 }
2171 2171 }
2172 2172 }
2173 2173
2174 2174 done:
2175 2175 free(seen);
2176 2176 return gca;
2177 2177 bail:
2178 2178 free(seen);
2179 2179 Py_XDECREF(gca);
2180 2180 return NULL;
2181 2181 }
2182 2182
2183 2183 /*
2184 2184 * Given a disjoint set of revs, return the subset with the longest
2185 2185 * path to the root.
2186 2186 */
2187 2187 static PyObject *find_deepest(indexObject *self, PyObject *revs)
2188 2188 {
2189 2189 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
2190 2190 static const Py_ssize_t capacity = 24;
2191 2191 int *depth, *interesting = NULL;
2192 2192 int i, j, v, ninteresting;
2193 2193 PyObject *dict = NULL, *keys = NULL;
2194 2194 long *seen = NULL;
2195 2195 int maxrev = -1;
2196 2196 long final;
2197 2197
2198 2198 if (revcount > capacity) {
2199 2199 PyErr_Format(PyExc_OverflowError,
2200 2200 "bitset size (%ld) > capacity (%ld)",
2201 2201 (long)revcount, (long)capacity);
2202 2202 return NULL;
2203 2203 }
2204 2204
2205 2205 for (i = 0; i < revcount; i++) {
2206 2206 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2207 2207 if (n > maxrev)
2208 2208 maxrev = n;
2209 2209 }
2210 2210
2211 2211 depth = calloc(sizeof(*depth), maxrev + 1);
2212 2212 if (depth == NULL)
2213 2213 return PyErr_NoMemory();
2214 2214
2215 2215 seen = calloc(sizeof(*seen), maxrev + 1);
2216 2216 if (seen == NULL) {
2217 2217 PyErr_NoMemory();
2218 2218 goto bail;
2219 2219 }
2220 2220
2221 2221 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
2222 2222 if (interesting == NULL) {
2223 2223 PyErr_NoMemory();
2224 2224 goto bail;
2225 2225 }
2226 2226
2227 2227 if (PyList_Sort(revs) == -1)
2228 2228 goto bail;
2229 2229
2230 2230 for (i = 0; i < revcount; i++) {
2231 2231 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2232 2232 long b = 1l << i;
2233 2233 depth[n] = 1;
2234 2234 seen[n] = b;
2235 2235 interesting[b] = 1;
2236 2236 }
2237 2237
2238 2238 /* invariant: ninteresting is the number of non-zero entries in
2239 2239 * interesting. */
2240 2240 ninteresting = (int)revcount;
2241 2241
2242 2242 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2243 2243 int dv = depth[v];
2244 2244 int parents[2];
2245 2245 long sv;
2246 2246
2247 2247 if (dv == 0)
2248 2248 continue;
2249 2249
2250 2250 sv = seen[v];
2251 2251 if (index_get_parents(self, v, parents, maxrev) < 0)
2252 2252 goto bail;
2253 2253
2254 2254 for (i = 0; i < 2; i++) {
2255 2255 int p = parents[i];
2256 2256 long sp;
2257 2257 int dp;
2258 2258
2259 2259 if (p == -1)
2260 2260 continue;
2261 2261
2262 2262 dp = depth[p];
2263 2263 sp = seen[p];
2264 2264 if (dp <= dv) {
2265 2265 depth[p] = dv + 1;
2266 2266 if (sp != sv) {
2267 2267 interesting[sv] += 1;
2268 2268 seen[p] = sv;
2269 2269 if (sp) {
2270 2270 interesting[sp] -= 1;
2271 2271 if (interesting[sp] == 0)
2272 2272 ninteresting -= 1;
2273 2273 }
2274 2274 }
2275 2275 } else if (dv == dp - 1) {
2276 2276 long nsp = sp | sv;
2277 2277 if (nsp == sp)
2278 2278 continue;
2279 2279 seen[p] = nsp;
2280 2280 interesting[sp] -= 1;
2281 2281 if (interesting[sp] == 0)
2282 2282 ninteresting -= 1;
2283 2283 if (interesting[nsp] == 0)
2284 2284 ninteresting += 1;
2285 2285 interesting[nsp] += 1;
2286 2286 }
2287 2287 }
2288 2288 interesting[sv] -= 1;
2289 2289 if (interesting[sv] == 0)
2290 2290 ninteresting -= 1;
2291 2291 }
2292 2292
2293 2293 final = 0;
2294 2294 j = ninteresting;
2295 2295 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2296 2296 if (interesting[i] == 0)
2297 2297 continue;
2298 2298 final |= i;
2299 2299 j -= 1;
2300 2300 }
2301 2301 if (final == 0) {
2302 2302 keys = PyList_New(0);
2303 2303 goto bail;
2304 2304 }
2305 2305
2306 2306 dict = PyDict_New();
2307 2307 if (dict == NULL)
2308 2308 goto bail;
2309 2309
2310 2310 for (i = 0; i < revcount; i++) {
2311 2311 PyObject *key;
2312 2312
2313 2313 if ((final & (1 << i)) == 0)
2314 2314 continue;
2315 2315
2316 2316 key = PyList_GET_ITEM(revs, i);
2317 2317 Py_INCREF(key);
2318 2318 Py_INCREF(Py_None);
2319 2319 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2320 2320 Py_DECREF(key);
2321 2321 Py_DECREF(Py_None);
2322 2322 goto bail;
2323 2323 }
2324 2324 }
2325 2325
2326 2326 keys = PyDict_Keys(dict);
2327 2327
2328 2328 bail:
2329 2329 free(depth);
2330 2330 free(seen);
2331 2331 free(interesting);
2332 2332 Py_XDECREF(dict);
2333 2333
2334 2334 return keys;
2335 2335 }
2336 2336
2337 2337 /*
2338 2338 * Given a (possibly overlapping) set of revs, return all the
2339 2339 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2340 2340 */
2341 2341 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2342 2342 {
2343 2343 PyObject *ret = NULL;
2344 2344 Py_ssize_t argcount, i, len;
2345 2345 bitmask repeat = 0;
2346 2346 int revcount = 0;
2347 2347 int *revs;
2348 2348
2349 2349 argcount = PySequence_Length(args);
2350 2350 revs = PyMem_Malloc(argcount * sizeof(*revs));
2351 2351 if (argcount > 0 && revs == NULL)
2352 2352 return PyErr_NoMemory();
2353 2353 len = index_length(self);
2354 2354
2355 2355 for (i = 0; i < argcount; i++) {
2356 2356 static const int capacity = 24;
2357 2357 PyObject *obj = PySequence_GetItem(args, i);
2358 2358 bitmask x;
2359 2359 long val;
2360 2360
2361 2361 if (!PyInt_Check(obj)) {
2362 2362 PyErr_SetString(PyExc_TypeError,
2363 2363 "arguments must all be ints");
2364 2364 Py_DECREF(obj);
2365 2365 goto bail;
2366 2366 }
2367 2367 val = PyInt_AsLong(obj);
2368 2368 Py_DECREF(obj);
2369 2369 if (val == -1) {
2370 2370 ret = PyList_New(0);
2371 2371 goto done;
2372 2372 }
2373 2373 if (val < 0 || val >= len) {
2374 2374 PyErr_SetString(PyExc_IndexError, "index out of range");
2375 2375 goto bail;
2376 2376 }
2377 2377 /* this cheesy bloom filter lets us avoid some more
2378 2378 * expensive duplicate checks in the common set-is-disjoint
2379 2379 * case */
2380 2380 x = 1ull << (val & 0x3f);
2381 2381 if (repeat & x) {
2382 2382 int k;
2383 2383 for (k = 0; k < revcount; k++) {
2384 2384 if (val == revs[k])
2385 2385 goto duplicate;
2386 2386 }
2387 2387 } else
2388 2388 repeat |= x;
2389 2389 if (revcount >= capacity) {
2390 2390 PyErr_Format(PyExc_OverflowError,
2391 2391 "bitset size (%d) > capacity (%d)",
2392 2392 revcount, capacity);
2393 2393 goto bail;
2394 2394 }
2395 2395 revs[revcount++] = (int)val;
2396 2396 duplicate:;
2397 2397 }
2398 2398
2399 2399 if (revcount == 0) {
2400 2400 ret = PyList_New(0);
2401 2401 goto done;
2402 2402 }
2403 2403 if (revcount == 1) {
2404 2404 PyObject *obj;
2405 2405 ret = PyList_New(1);
2406 2406 if (ret == NULL)
2407 2407 goto bail;
2408 2408 obj = PyInt_FromLong(revs[0]);
2409 2409 if (obj == NULL)
2410 2410 goto bail;
2411 2411 PyList_SET_ITEM(ret, 0, obj);
2412 2412 goto done;
2413 2413 }
2414 2414
2415 2415 ret = find_gca_candidates(self, revs, revcount);
2416 2416 if (ret == NULL)
2417 2417 goto bail;
2418 2418
2419 2419 done:
2420 2420 PyMem_Free(revs);
2421 2421 return ret;
2422 2422
2423 2423 bail:
2424 2424 PyMem_Free(revs);
2425 2425 Py_XDECREF(ret);
2426 2426 return NULL;
2427 2427 }
2428 2428
2429 2429 /*
2430 2430 * Given a (possibly overlapping) set of revs, return the greatest
2431 2431 * common ancestors: those with the longest path to the root.
2432 2432 */
2433 2433 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2434 2434 {
2435 2435 PyObject *ret;
2436 2436 PyObject *gca = index_commonancestorsheads(self, args);
2437 2437 if (gca == NULL)
2438 2438 return NULL;
2439 2439
2440 2440 if (PyList_GET_SIZE(gca) <= 1) {
2441 2441 return gca;
2442 2442 }
2443 2443
2444 2444 ret = find_deepest(self, gca);
2445 2445 Py_DECREF(gca);
2446 2446 return ret;
2447 2447 }
2448 2448
2449 2449 /*
2450 2450 * Invalidate any trie entries introduced by added revs.
2451 2451 */
2452 2452 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2453 2453 {
2454 2454 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
2455 2455
2456 2456 for (i = start; i < len; i++) {
2457 2457 PyObject *tuple = PyList_GET_ITEM(self->added, i);
2458 2458 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
2459 2459
2460 2460 nt_delete_node(&self->nt, PyBytes_AS_STRING(node));
2461 2461 }
2462 2462
2463 2463 if (start == 0)
2464 2464 Py_CLEAR(self->added);
2465 2465 }
2466 2466
2467 2467 /*
2468 2468 * Delete a numeric range of revs, which must be at the end of the
2469 2469 * range, but exclude the sentinel nullid entry.
2470 2470 */
2471 2471 static int index_slice_del(indexObject *self, PyObject *item)
2472 2472 {
2473 2473 Py_ssize_t start, stop, step, slicelength;
2474 2474 Py_ssize_t length = index_length(self) + 1;
2475 2475 int ret = 0;
2476 2476
2477 2477 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
2478 2478 #ifdef IS_PY3K
2479 2479 if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
2480 2480 &slicelength) < 0)
2481 2481 #else
2482 2482 if (PySlice_GetIndicesEx((PySliceObject *)item, length, &start, &stop,
2483 2483 &step, &slicelength) < 0)
2484 2484 #endif
2485 2485 return -1;
2486 2486
2487 2487 if (slicelength <= 0)
2488 2488 return 0;
2489 2489
2490 2490 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2491 2491 stop = start;
2492 2492
2493 2493 if (step < 0) {
2494 2494 stop = start + 1;
2495 2495 start = stop + step * (slicelength - 1) - 1;
2496 2496 step = -step;
2497 2497 }
2498 2498
2499 2499 if (step != 1) {
2500 2500 PyErr_SetString(PyExc_ValueError,
2501 2501 "revlog index delete requires step size of 1");
2502 2502 return -1;
2503 2503 }
2504 2504
2505 2505 if (stop != length - 1) {
2506 2506 PyErr_SetString(PyExc_IndexError,
2507 2507 "revlog index deletion indices are invalid");
2508 2508 return -1;
2509 2509 }
2510 2510
2511 2511 if (start < self->length) {
2512 2512 if (self->ntinitialized) {
2513 2513 Py_ssize_t i;
2514 2514
2515 2515 for (i = start; i < self->length; i++) {
2516 2516 const char *node = index_node_existing(self, i);
2517 2517 if (node == NULL)
2518 2518 return -1;
2519 2519
2520 2520 nt_delete_node(&self->nt, node);
2521 2521 }
2522 2522 if (self->added)
2523 2523 index_invalidate_added(self, 0);
2524 2524 if (self->ntrev > start)
2525 2525 self->ntrev = (int)start;
2526 2526 }
2527 2527 self->length = start;
2528 2528 if (start < self->raw_length) {
2529 2529 if (self->cache) {
2530 2530 Py_ssize_t i;
2531 2531 for (i = start; i < self->raw_length; i++)
2532 2532 Py_CLEAR(self->cache[i]);
2533 2533 }
2534 2534 self->raw_length = start;
2535 2535 }
2536 2536 goto done;
2537 2537 }
2538 2538
2539 2539 if (self->ntinitialized) {
2540 2540 index_invalidate_added(self, start - self->length);
2541 2541 if (self->ntrev > start)
2542 2542 self->ntrev = (int)start;
2543 2543 }
2544 2544 if (self->added)
2545 2545 ret = PyList_SetSlice(self->added, start - self->length,
2546 2546 PyList_GET_SIZE(self->added), NULL);
2547 2547 done:
2548 2548 Py_CLEAR(self->headrevs);
2549 2549 return ret;
2550 2550 }
2551 2551
2552 2552 /*
2553 2553 * Supported ops:
2554 2554 *
2555 2555 * slice deletion
2556 2556 * string assignment (extend node->rev mapping)
2557 2557 * string deletion (shrink node->rev mapping)
2558 2558 */
2559 2559 static int index_assign_subscript(indexObject *self, PyObject *item,
2560 2560 PyObject *value)
2561 2561 {
2562 2562 char *node;
2563 2563 long rev;
2564 2564
2565 2565 if (PySlice_Check(item) && value == NULL)
2566 2566 return index_slice_del(self, item);
2567 2567
2568 2568 if (node_check(item, &node) == -1)
2569 2569 return -1;
2570 2570
2571 2571 if (value == NULL)
2572 2572 return self->ntinitialized ? nt_delete_node(&self->nt, node)
2573 2573 : 0;
2574 2574 rev = PyInt_AsLong(value);
2575 2575 if (rev > INT_MAX || rev < 0) {
2576 2576 if (!PyErr_Occurred())
2577 2577 PyErr_SetString(PyExc_ValueError, "rev out of range");
2578 2578 return -1;
2579 2579 }
2580 2580
2581 2581 if (index_init_nt(self) == -1)
2582 2582 return -1;
2583 2583 return nt_insert(&self->nt, node, (int)rev);
2584 2584 }
2585 2585
2586 2586 /*
2587 2587 * Find all RevlogNG entries in an index that has inline data. Update
2588 2588 * the optional "offsets" table with those entries.
2589 2589 */
2590 2590 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2591 2591 {
2592 2592 const char *data = (const char *)self->buf.buf;
2593 2593 Py_ssize_t pos = 0;
2594 2594 Py_ssize_t end = self->buf.len;
2595 2595 long incr = v1_hdrsize;
2596 2596 Py_ssize_t len = 0;
2597 2597
2598 2598 while (pos + v1_hdrsize <= end && pos >= 0) {
2599 2599 uint32_t comp_len;
2600 2600 /* 3rd element of header is length of compressed inline data */
2601 2601 comp_len = getbe32(data + pos + 8);
2602 2602 incr = v1_hdrsize + comp_len;
2603 2603 if (offsets)
2604 2604 offsets[len] = data + pos;
2605 2605 len++;
2606 2606 pos += incr;
2607 2607 }
2608 2608
2609 2609 if (pos != end) {
2610 2610 if (!PyErr_Occurred())
2611 2611 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2612 2612 return -1;
2613 2613 }
2614 2614
2615 2615 return len;
2616 2616 }
2617 2617
2618 2618 static int index_init(indexObject *self, PyObject *args)
2619 2619 {
2620 2620 PyObject *data_obj, *inlined_obj;
2621 2621 Py_ssize_t size;
2622 2622
2623 2623 /* Initialize before argument-checking to avoid index_dealloc() crash.
2624 2624 */
2625 2625 self->raw_length = 0;
2626 2626 self->added = NULL;
2627 2627 self->cache = NULL;
2628 2628 self->data = NULL;
2629 2629 memset(&self->buf, 0, sizeof(self->buf));
2630 2630 self->headrevs = NULL;
2631 2631 self->filteredrevs = Py_None;
2632 2632 Py_INCREF(Py_None);
2633 2633 self->ntinitialized = 0;
2634 2634 self->offsets = NULL;
2635 2635
2636 2636 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
2637 2637 return -1;
2638 2638 if (!PyObject_CheckBuffer(data_obj)) {
2639 2639 PyErr_SetString(PyExc_TypeError,
2640 2640 "data does not support buffer interface");
2641 2641 return -1;
2642 2642 }
2643 2643
2644 2644 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
2645 2645 return -1;
2646 2646 size = self->buf.len;
2647 2647
2648 2648 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
2649 2649 self->data = data_obj;
2650 2650
2651 2651 self->ntlookups = self->ntmisses = 0;
2652 2652 self->ntrev = -1;
2653 2653 Py_INCREF(self->data);
2654 2654
2655 2655 if (self->inlined) {
2656 2656 Py_ssize_t len = inline_scan(self, NULL);
2657 2657 if (len == -1)
2658 2658 goto bail;
2659 2659 self->raw_length = len;
2660 2660 self->length = len;
2661 2661 } else {
2662 2662 if (size % v1_hdrsize) {
2663 2663 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2664 2664 goto bail;
2665 2665 }
2666 2666 self->raw_length = size / v1_hdrsize;
2667 2667 self->length = self->raw_length;
2668 2668 }
2669 2669
2670 2670 return 0;
2671 2671 bail:
2672 2672 return -1;
2673 2673 }
2674 2674
2675 2675 static PyObject *index_nodemap(indexObject *self)
2676 2676 {
2677 2677 Py_INCREF(self);
2678 2678 return (PyObject *)self;
2679 2679 }
2680 2680
2681 2681 static void _index_clearcaches(indexObject *self)
2682 2682 {
2683 2683 if (self->cache) {
2684 2684 Py_ssize_t i;
2685 2685
2686 2686 for (i = 0; i < self->raw_length; i++)
2687 2687 Py_CLEAR(self->cache[i]);
2688 2688 free(self->cache);
2689 2689 self->cache = NULL;
2690 2690 }
2691 2691 if (self->offsets) {
2692 2692 PyMem_Free((void *)self->offsets);
2693 2693 self->offsets = NULL;
2694 2694 }
2695 2695 if (self->ntinitialized) {
2696 2696 nt_dealloc(&self->nt);
2697 2697 }
2698 2698 self->ntinitialized = 0;
2699 2699 Py_CLEAR(self->headrevs);
2700 2700 }
2701 2701
2702 2702 static PyObject *index_clearcaches(indexObject *self)
2703 2703 {
2704 2704 _index_clearcaches(self);
2705 2705 self->ntrev = -1;
2706 2706 self->ntlookups = self->ntmisses = 0;
2707 2707 Py_RETURN_NONE;
2708 2708 }
2709 2709
2710 2710 static void index_dealloc(indexObject *self)
2711 2711 {
2712 2712 _index_clearcaches(self);
2713 2713 Py_XDECREF(self->filteredrevs);
2714 2714 if (self->buf.buf) {
2715 2715 PyBuffer_Release(&self->buf);
2716 2716 memset(&self->buf, 0, sizeof(self->buf));
2717 2717 }
2718 2718 Py_XDECREF(self->data);
2719 2719 Py_XDECREF(self->added);
2720 2720 PyObject_Del(self);
2721 2721 }
2722 2722
2723 2723 static PySequenceMethods index_sequence_methods = {
2724 2724 (lenfunc)index_length, /* sq_length */
2725 2725 0, /* sq_concat */
2726 2726 0, /* sq_repeat */
2727 2727 (ssizeargfunc)index_get, /* sq_item */
2728 2728 0, /* sq_slice */
2729 2729 0, /* sq_ass_item */
2730 2730 0, /* sq_ass_slice */
2731 2731 (objobjproc)index_contains, /* sq_contains */
2732 2732 };
2733 2733
2734 2734 static PyMappingMethods index_mapping_methods = {
2735 2735 (lenfunc)index_length, /* mp_length */
2736 2736 (binaryfunc)index_getitem, /* mp_subscript */
2737 2737 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
2738 2738 };
2739 2739
2740 2740 static PyMethodDef index_methods[] = {
2741 2741 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
2742 2742 "return the gca set of the given revs"},
2743 2743 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
2744 2744 METH_VARARGS,
2745 2745 "return the heads of the common ancestors of the given revs"},
2746 2746 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
2747 2747 "clear the index caches"},
2748 2748 {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
2749 {"get_rev", (PyCFunction)index_m_get, METH_VARARGS,
2750 "return `rev` associated with a node or None"},
2749 2751 {"has_node", (PyCFunction)index_m_has_node, METH_O,
2750 2752 "return True if the node exist in the index"},
2751 2753 {"rev", (PyCFunction)index_m_rev, METH_O,
2752 2754 "return `rev` associated with a node or raise RevlogError"},
2753 2755 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
2754 2756 "compute phases"},
2755 2757 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
2756 2758 "reachableroots"},
2757 2759 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
2758 2760 "get head revisions"}, /* Can do filtering since 3.2 */
2759 2761 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
2760 2762 "get filtered head revisions"}, /* Can always do filtering */
2761 2763 {"issnapshot", (PyCFunction)index_issnapshot, METH_O,
2762 2764 "True if the object is a snapshot"},
2763 2765 {"findsnapshots", (PyCFunction)index_findsnapshots, METH_VARARGS,
2764 2766 "Gather snapshot data in a cache dict"},
2765 2767 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
2766 2768 "determine revisions with deltas to reconstruct fulltext"},
2767 2769 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
2768 2770 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
2769 2771 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
2770 2772 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2771 2773 "match a potentially ambiguous node ID"},
2772 2774 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
2773 2775 "find length of shortest hex nodeid of a binary ID"},
2774 2776 {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
2775 2777 {NULL} /* Sentinel */
2776 2778 };
2777 2779
2778 2780 static PyGetSetDef index_getset[] = {
2779 2781 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2780 2782 {NULL} /* Sentinel */
2781 2783 };
2782 2784
2783 2785 PyTypeObject HgRevlogIndex_Type = {
2784 2786 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2785 2787 "parsers.index", /* tp_name */
2786 2788 sizeof(indexObject), /* tp_basicsize */
2787 2789 0, /* tp_itemsize */
2788 2790 (destructor)index_dealloc, /* tp_dealloc */
2789 2791 0, /* tp_print */
2790 2792 0, /* tp_getattr */
2791 2793 0, /* tp_setattr */
2792 2794 0, /* tp_compare */
2793 2795 0, /* tp_repr */
2794 2796 0, /* tp_as_number */
2795 2797 &index_sequence_methods, /* tp_as_sequence */
2796 2798 &index_mapping_methods, /* tp_as_mapping */
2797 2799 0, /* tp_hash */
2798 2800 0, /* tp_call */
2799 2801 0, /* tp_str */
2800 2802 0, /* tp_getattro */
2801 2803 0, /* tp_setattro */
2802 2804 0, /* tp_as_buffer */
2803 2805 Py_TPFLAGS_DEFAULT, /* tp_flags */
2804 2806 "revlog index", /* tp_doc */
2805 2807 0, /* tp_traverse */
2806 2808 0, /* tp_clear */
2807 2809 0, /* tp_richcompare */
2808 2810 0, /* tp_weaklistoffset */
2809 2811 0, /* tp_iter */
2810 2812 0, /* tp_iternext */
2811 2813 index_methods, /* tp_methods */
2812 2814 0, /* tp_members */
2813 2815 index_getset, /* tp_getset */
2814 2816 0, /* tp_base */
2815 2817 0, /* tp_dict */
2816 2818 0, /* tp_descr_get */
2817 2819 0, /* tp_descr_set */
2818 2820 0, /* tp_dictoffset */
2819 2821 (initproc)index_init, /* tp_init */
2820 2822 0, /* tp_alloc */
2821 2823 };
2822 2824
2823 2825 /*
2824 2826 * returns a tuple of the form (index, index, cache) with elements as
2825 2827 * follows:
2826 2828 *
2827 2829 * index: an index object that lazily parses RevlogNG records
2828 2830 * cache: if data is inlined, a tuple (0, index_file_content), else None
2829 2831 * index_file_content could be a string, or a buffer
2830 2832 *
2831 2833 * added complications are for backwards compatibility
2832 2834 */
2833 2835 PyObject *parse_index2(PyObject *self, PyObject *args)
2834 2836 {
2835 2837 PyObject *tuple = NULL, *cache = NULL;
2836 2838 indexObject *idx;
2837 2839 int ret;
2838 2840
2839 2841 idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
2840 2842 if (idx == NULL)
2841 2843 goto bail;
2842 2844
2843 2845 ret = index_init(idx, args);
2844 2846 if (ret == -1)
2845 2847 goto bail;
2846 2848
2847 2849 if (idx->inlined) {
2848 2850 cache = Py_BuildValue("iO", 0, idx->data);
2849 2851 if (cache == NULL)
2850 2852 goto bail;
2851 2853 } else {
2852 2854 cache = Py_None;
2853 2855 Py_INCREF(cache);
2854 2856 }
2855 2857
2856 2858 tuple = Py_BuildValue("NN", idx, cache);
2857 2859 if (!tuple)
2858 2860 goto bail;
2859 2861 return tuple;
2860 2862
2861 2863 bail:
2862 2864 Py_XDECREF(idx);
2863 2865 Py_XDECREF(cache);
2864 2866 Py_XDECREF(tuple);
2865 2867 return NULL;
2866 2868 }
2867 2869
2868 2870 #ifdef WITH_RUST
2869 2871
2870 2872 /* rustlazyancestors: iteration over ancestors implemented in Rust
2871 2873 *
2872 2874 * This class holds a reference to an index and to the Rust iterator.
2873 2875 */
2874 2876 typedef struct rustlazyancestorsObjectStruct rustlazyancestorsObject;
2875 2877
2876 2878 struct rustlazyancestorsObjectStruct {
2877 2879 PyObject_HEAD
2878 2880 /* Type-specific fields go here. */
2879 2881 indexObject *index; /* Ref kept to avoid GC'ing the index */
2880 2882 void *iter; /* Rust iterator */
2881 2883 };
2882 2884
2883 2885 /* FFI exposed from Rust code */
2884 2886 rustlazyancestorsObject *rustlazyancestors_init(indexObject *index,
2885 2887 /* intrevs vector */
2886 2888 Py_ssize_t initrevslen,
2887 2889 long *initrevs, long stoprev,
2888 2890 int inclusive);
2889 2891 void rustlazyancestors_drop(rustlazyancestorsObject *self);
2890 2892 int rustlazyancestors_next(rustlazyancestorsObject *self);
2891 2893 int rustlazyancestors_contains(rustlazyancestorsObject *self, long rev);
2892 2894
2893 2895 /* CPython instance methods */
2894 2896 static int rustla_init(rustlazyancestorsObject *self, PyObject *args)
2895 2897 {
2896 2898 PyObject *initrevsarg = NULL;
2897 2899 PyObject *inclusivearg = NULL;
2898 2900 long stoprev = 0;
2899 2901 long *initrevs = NULL;
2900 2902 int inclusive = 0;
2901 2903 Py_ssize_t i;
2902 2904
2903 2905 indexObject *index;
2904 2906 if (!PyArg_ParseTuple(args, "O!O!lO!", &HgRevlogIndex_Type, &index,
2905 2907 &PyList_Type, &initrevsarg, &stoprev,
2906 2908 &PyBool_Type, &inclusivearg))
2907 2909 return -1;
2908 2910
2909 2911 Py_INCREF(index);
2910 2912 self->index = index;
2911 2913
2912 2914 if (inclusivearg == Py_True)
2913 2915 inclusive = 1;
2914 2916
2915 2917 Py_ssize_t linit = PyList_GET_SIZE(initrevsarg);
2916 2918
2917 2919 initrevs = (long *)calloc(linit, sizeof(long));
2918 2920
2919 2921 if (initrevs == NULL) {
2920 2922 PyErr_NoMemory();
2921 2923 goto bail;
2922 2924 }
2923 2925
2924 2926 for (i = 0; i < linit; i++) {
2925 2927 initrevs[i] = PyInt_AsLong(PyList_GET_ITEM(initrevsarg, i));
2926 2928 }
2927 2929 if (PyErr_Occurred())
2928 2930 goto bail;
2929 2931
2930 2932 self->iter =
2931 2933 rustlazyancestors_init(index, linit, initrevs, stoprev, inclusive);
2932 2934 if (self->iter == NULL) {
2933 2935 /* if this is because of GraphError::ParentOutOfRange
2934 2936 * HgRevlogIndex_GetParents() has already set the proper
2935 2937 * exception */
2936 2938 goto bail;
2937 2939 }
2938 2940
2939 2941 free(initrevs);
2940 2942 return 0;
2941 2943
2942 2944 bail:
2943 2945 free(initrevs);
2944 2946 return -1;
2945 2947 };
2946 2948
2947 2949 static void rustla_dealloc(rustlazyancestorsObject *self)
2948 2950 {
2949 2951 Py_XDECREF(self->index);
2950 2952 if (self->iter != NULL) { /* can happen if rustla_init failed */
2951 2953 rustlazyancestors_drop(self->iter);
2952 2954 }
2953 2955 PyObject_Del(self);
2954 2956 }
2955 2957
2956 2958 static PyObject *rustla_next(rustlazyancestorsObject *self)
2957 2959 {
2958 2960 int res = rustlazyancestors_next(self->iter);
2959 2961 if (res == -1) {
2960 2962 /* Setting an explicit exception seems unnecessary
2961 2963 * as examples from Python source code (Objects/rangeobjets.c
2962 2964 * and Modules/_io/stringio.c) seem to demonstrate.
2963 2965 */
2964 2966 return NULL;
2965 2967 }
2966 2968 return PyInt_FromLong(res);
2967 2969 }
2968 2970
2969 2971 static int rustla_contains(rustlazyancestorsObject *self, PyObject *rev)
2970 2972 {
2971 2973 long lrev;
2972 2974 if (!pylong_to_long(rev, &lrev)) {
2973 2975 PyErr_Clear();
2974 2976 return 0;
2975 2977 }
2976 2978 return rustlazyancestors_contains(self->iter, lrev);
2977 2979 }
2978 2980
2979 2981 static PySequenceMethods rustla_sequence_methods = {
2980 2982 0, /* sq_length */
2981 2983 0, /* sq_concat */
2982 2984 0, /* sq_repeat */
2983 2985 0, /* sq_item */
2984 2986 0, /* sq_slice */
2985 2987 0, /* sq_ass_item */
2986 2988 0, /* sq_ass_slice */
2987 2989 (objobjproc)rustla_contains, /* sq_contains */
2988 2990 };
2989 2991
2990 2992 static PyTypeObject rustlazyancestorsType = {
2991 2993 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2992 2994 "parsers.rustlazyancestors", /* tp_name */
2993 2995 sizeof(rustlazyancestorsObject), /* tp_basicsize */
2994 2996 0, /* tp_itemsize */
2995 2997 (destructor)rustla_dealloc, /* tp_dealloc */
2996 2998 0, /* tp_print */
2997 2999 0, /* tp_getattr */
2998 3000 0, /* tp_setattr */
2999 3001 0, /* tp_compare */
3000 3002 0, /* tp_repr */
3001 3003 0, /* tp_as_number */
3002 3004 &rustla_sequence_methods, /* tp_as_sequence */
3003 3005 0, /* tp_as_mapping */
3004 3006 0, /* tp_hash */
3005 3007 0, /* tp_call */
3006 3008 0, /* tp_str */
3007 3009 0, /* tp_getattro */
3008 3010 0, /* tp_setattro */
3009 3011 0, /* tp_as_buffer */
3010 3012 Py_TPFLAGS_DEFAULT, /* tp_flags */
3011 3013 "Iterator over ancestors, implemented in Rust", /* tp_doc */
3012 3014 0, /* tp_traverse */
3013 3015 0, /* tp_clear */
3014 3016 0, /* tp_richcompare */
3015 3017 0, /* tp_weaklistoffset */
3016 3018 0, /* tp_iter */
3017 3019 (iternextfunc)rustla_next, /* tp_iternext */
3018 3020 0, /* tp_methods */
3019 3021 0, /* tp_members */
3020 3022 0, /* tp_getset */
3021 3023 0, /* tp_base */
3022 3024 0, /* tp_dict */
3023 3025 0, /* tp_descr_get */
3024 3026 0, /* tp_descr_set */
3025 3027 0, /* tp_dictoffset */
3026 3028 (initproc)rustla_init, /* tp_init */
3027 3029 0, /* tp_alloc */
3028 3030 };
3029 3031 #endif /* WITH_RUST */
3030 3032
3031 3033 void revlog_module_init(PyObject *mod)
3032 3034 {
3033 3035 PyObject *caps = NULL;
3034 3036 HgRevlogIndex_Type.tp_new = PyType_GenericNew;
3035 3037 if (PyType_Ready(&HgRevlogIndex_Type) < 0)
3036 3038 return;
3037 3039 Py_INCREF(&HgRevlogIndex_Type);
3038 3040 PyModule_AddObject(mod, "index", (PyObject *)&HgRevlogIndex_Type);
3039 3041
3040 3042 nodetreeType.tp_new = PyType_GenericNew;
3041 3043 if (PyType_Ready(&nodetreeType) < 0)
3042 3044 return;
3043 3045 Py_INCREF(&nodetreeType);
3044 3046 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
3045 3047
3046 3048 if (!nullentry) {
3047 3049 nullentry =
3048 3050 Py_BuildValue(PY23("iiiiiiis#", "iiiiiiiy#"), 0, 0, 0, -1,
3049 3051 -1, -1, -1, nullid, (Py_ssize_t)20);
3050 3052 }
3051 3053 if (nullentry)
3052 3054 PyObject_GC_UnTrack(nullentry);
3053 3055
3054 3056 caps = PyCapsule_New(HgRevlogIndex_GetParents,
3055 3057 "mercurial.cext.parsers.index_get_parents_CAPI",
3056 3058 NULL);
3057 3059 if (caps != NULL)
3058 3060 PyModule_AddObject(mod, "index_get_parents_CAPI", caps);
3059 3061
3060 3062 #ifdef WITH_RUST
3061 3063 rustlazyancestorsType.tp_new = PyType_GenericNew;
3062 3064 if (PyType_Ready(&rustlazyancestorsType) < 0)
3063 3065 return;
3064 3066 Py_INCREF(&rustlazyancestorsType);
3065 3067 PyModule_AddObject(mod, "rustlazyancestors",
3066 3068 (PyObject *)&rustlazyancestorsType);
3067 3069 #endif
3068 3070 }
@@ -1,157 +1,157 b''
1 1 # policy.py - module policy logic for Mercurial.
2 2 #
3 3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import sys
12 12
13 13 from .pycompat import getattr
14 14
15 15 # Rules for how modules can be loaded. Values are:
16 16 #
17 17 # c - require C extensions
18 18 # rust+c - require Rust and C extensions
19 19 # rust+c-allow - allow Rust and C extensions with fallback to pure Python
20 20 # for each
21 21 # allow - allow pure Python implementation when C loading fails
22 22 # cffi - required cffi versions (implemented within pure module)
23 23 # cffi-allow - allow pure Python implementation if cffi version is missing
24 24 # py - only load pure Python modules
25 25 #
26 26 # By default, fall back to the pure modules so the in-place build can
27 27 # run without recompiling the C extensions. This will be overridden by
28 28 # __modulepolicy__ generated by setup.py.
29 29 policy = b'allow'
30 30 _packageprefs = {
31 31 # policy: (versioned package, pure package)
32 32 b'c': ('cext', None),
33 33 b'allow': ('cext', 'pure'),
34 34 b'cffi': ('cffi', None),
35 35 b'cffi-allow': ('cffi', 'pure'),
36 36 b'py': (None, 'pure'),
37 37 # For now, rust policies impact importrust only
38 38 b'rust+c': ('cext', None),
39 39 b'rust+c-allow': ('cext', 'pure'),
40 40 }
41 41
42 42 try:
43 43 from . import __modulepolicy__
44 44
45 45 policy = __modulepolicy__.modulepolicy
46 46 except ImportError:
47 47 pass
48 48
49 49 # PyPy doesn't load C extensions.
50 50 #
51 51 # The canonical way to do this is to test platform.python_implementation().
52 52 # But we don't import platform and don't bloat for it here.
53 53 if '__pypy__' in sys.builtin_module_names:
54 54 policy = b'cffi'
55 55
56 56 # Environment variable can always force settings.
57 57 if sys.version_info[0] >= 3:
58 58 if 'HGMODULEPOLICY' in os.environ:
59 59 policy = os.environ['HGMODULEPOLICY'].encode('utf-8')
60 60 else:
61 61 policy = os.environ.get('HGMODULEPOLICY', policy)
62 62
63 63
64 64 def _importfrom(pkgname, modname):
65 65 # from .<pkgname> import <modname> (where . is looked through this module)
66 66 fakelocals = {}
67 67 pkg = __import__(pkgname, globals(), fakelocals, [modname], level=1)
68 68 try:
69 69 fakelocals[modname] = mod = getattr(pkg, modname)
70 70 except AttributeError:
71 71 raise ImportError('cannot import name %s' % modname)
72 72 # force import; fakelocals[modname] may be replaced with the real module
73 73 getattr(mod, '__doc__', None)
74 74 return fakelocals[modname]
75 75
76 76
77 77 # keep in sync with "version" in C modules
78 78 _cextversions = {
79 79 ('cext', 'base85'): 1,
80 80 ('cext', 'bdiff'): 3,
81 81 ('cext', 'mpatch'): 1,
82 82 ('cext', 'osutil'): 4,
83 ('cext', 'parsers'): 15,
83 ('cext', 'parsers'): 16,
84 84 }
85 85
86 86 # map import request to other package or module
87 87 _modredirects = {
88 88 ('cext', 'charencode'): ('cext', 'parsers'),
89 89 ('cffi', 'base85'): ('pure', 'base85'),
90 90 ('cffi', 'charencode'): ('pure', 'charencode'),
91 91 ('cffi', 'parsers'): ('pure', 'parsers'),
92 92 }
93 93
94 94
95 95 def _checkmod(pkgname, modname, mod):
96 96 expected = _cextversions.get((pkgname, modname))
97 97 actual = getattr(mod, 'version', None)
98 98 if actual != expected:
99 99 raise ImportError(
100 100 'cannot import module %s.%s '
101 101 '(expected version: %d, actual: %r)'
102 102 % (pkgname, modname, expected, actual)
103 103 )
104 104
105 105
106 106 def importmod(modname):
107 107 """Import module according to policy and check API version"""
108 108 try:
109 109 verpkg, purepkg = _packageprefs[policy]
110 110 except KeyError:
111 111 raise ImportError('invalid HGMODULEPOLICY %r' % policy)
112 112 assert verpkg or purepkg
113 113 if verpkg:
114 114 pn, mn = _modredirects.get((verpkg, modname), (verpkg, modname))
115 115 try:
116 116 mod = _importfrom(pn, mn)
117 117 if pn == verpkg:
118 118 _checkmod(pn, mn, mod)
119 119 return mod
120 120 except ImportError:
121 121 if not purepkg:
122 122 raise
123 123 pn, mn = _modredirects.get((purepkg, modname), (purepkg, modname))
124 124 return _importfrom(pn, mn)
125 125
126 126
127 127 def _isrustpermissive():
128 128 """Assuming the policy is a Rust one, tell if it's permissive."""
129 129 return policy.endswith(b'-allow')
130 130
131 131
132 132 def importrust(modname, member=None, default=None):
133 133 """Import Rust module according to policy and availability.
134 134
135 135 If policy isn't a Rust one, this returns `default`.
136 136
137 137 If either the module or its member is not available, this returns `default`
138 138 if policy is permissive and raises `ImportError` if not.
139 139 """
140 140 if not policy.startswith(b'rust'):
141 141 return default
142 142
143 143 try:
144 144 mod = _importfrom('rustext', modname)
145 145 except ImportError:
146 146 if _isrustpermissive():
147 147 return default
148 148 raise
149 149 if member is None:
150 150 return mod
151 151
152 152 try:
153 153 return getattr(mod, member)
154 154 except AttributeError:
155 155 if _isrustpermissive():
156 156 return default
157 157 raise ImportError("Cannot import name %s" % member)
@@ -1,223 +1,229 b''
1 1 # parsers.py - Python implementation of parsers.c
2 2 #
3 3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import struct
11 11 import zlib
12 12
13 13 from ..node import nullid, nullrev
14 14 from .. import (
15 15 pycompat,
16 16 revlogutils,
17 17 util,
18 18 )
19 19
20 20 stringio = pycompat.bytesio
21 21
22 22
23 23 _pack = struct.pack
24 24 _unpack = struct.unpack
25 25 _compress = zlib.compress
26 26 _decompress = zlib.decompress
27 27
28 28 # Some code below makes tuples directly because it's more convenient. However,
29 29 # code outside this module should always use dirstatetuple.
30 30 def dirstatetuple(*x):
31 31 # x is a tuple
32 32 return x
33 33
34 34
35 35 indexformatng = b">Qiiiiii20s12x"
36 36 indexfirst = struct.calcsize(b'Q')
37 37 sizeint = struct.calcsize(b'i')
38 38 indexsize = struct.calcsize(indexformatng)
39 39
40 40
41 41 def gettype(q):
42 42 return int(q & 0xFFFF)
43 43
44 44
45 45 def offset_type(offset, type):
46 46 return int(int(offset) << 16 | type)
47 47
48 48
49 49 class BaseIndexObject(object):
50 50 @util.propertycache
51 51 def nodemap(self):
52 52 nodemap = revlogutils.NodeMap({nullid: nullrev})
53 53 for r in range(0, len(self)):
54 54 n = self[r][7]
55 55 nodemap[n] = r
56 56 return nodemap
57 57
58 58 def has_node(self, node):
59 59 """return True if the node exist in the index"""
60 60 return node in self.nodemap
61 61
62 62 def rev(self, node):
63 63 """return a revision for a node
64 64
65 65 If the node is unknown, raise a RevlogError"""
66 66 return self.nodemap[node]
67 67
68 def get_rev(self, node):
69 """return a revision for a node
70
71 If the node is unknown, return None"""
72 return self.nodemap.get(node)
73
68 74 def _stripnodes(self, start):
69 75 if 'nodemap' in vars(self):
70 76 for r in range(start, len(self)):
71 77 n = self[r][7]
72 78 del self.nodemap[n]
73 79
74 80 def clearcaches(self):
75 81 self.__dict__.pop('nodemap', None)
76 82
77 83 def __len__(self):
78 84 return self._lgt + len(self._extra)
79 85
80 86 def append(self, tup):
81 87 if 'nodemap' in vars(self):
82 88 self.nodemap[tup[7]] = len(self)
83 89 self._extra.append(tup)
84 90
85 91 def _check_index(self, i):
86 92 if not isinstance(i, int):
87 93 raise TypeError(b"expecting int indexes")
88 94 if i < 0 or i >= len(self):
89 95 raise IndexError
90 96
91 97 def __getitem__(self, i):
92 98 if i == -1:
93 99 return (0, 0, 0, -1, -1, -1, -1, nullid)
94 100 self._check_index(i)
95 101 if i >= self._lgt:
96 102 return self._extra[i - self._lgt]
97 103 index = self._calculate_index(i)
98 104 r = struct.unpack(indexformatng, self._data[index : index + indexsize])
99 105 if i == 0:
100 106 e = list(r)
101 107 type = gettype(e[0])
102 108 e[0] = offset_type(0, type)
103 109 return tuple(e)
104 110 return r
105 111
106 112
107 113 class IndexObject(BaseIndexObject):
108 114 def __init__(self, data):
109 115 assert len(data) % indexsize == 0
110 116 self._data = data
111 117 self._lgt = len(data) // indexsize
112 118 self._extra = []
113 119
114 120 def _calculate_index(self, i):
115 121 return i * indexsize
116 122
117 123 def __delitem__(self, i):
118 124 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
119 125 raise ValueError(b"deleting slices only supports a:-1 with step 1")
120 126 i = i.start
121 127 self._check_index(i)
122 128 self._stripnodes(i)
123 129 if i < self._lgt:
124 130 self._data = self._data[: i * indexsize]
125 131 self._lgt = i
126 132 self._extra = []
127 133 else:
128 134 self._extra = self._extra[: i - self._lgt]
129 135
130 136
131 137 class InlinedIndexObject(BaseIndexObject):
132 138 def __init__(self, data, inline=0):
133 139 self._data = data
134 140 self._lgt = self._inline_scan(None)
135 141 self._inline_scan(self._lgt)
136 142 self._extra = []
137 143
138 144 def _inline_scan(self, lgt):
139 145 off = 0
140 146 if lgt is not None:
141 147 self._offsets = [0] * lgt
142 148 count = 0
143 149 while off <= len(self._data) - indexsize:
144 150 (s,) = struct.unpack(
145 151 b'>i', self._data[off + indexfirst : off + sizeint + indexfirst]
146 152 )
147 153 if lgt is not None:
148 154 self._offsets[count] = off
149 155 count += 1
150 156 off += indexsize + s
151 157 if off != len(self._data):
152 158 raise ValueError(b"corrupted data")
153 159 return count
154 160
155 161 def __delitem__(self, i):
156 162 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
157 163 raise ValueError(b"deleting slices only supports a:-1 with step 1")
158 164 i = i.start
159 165 self._check_index(i)
160 166 self._stripnodes(i)
161 167 if i < self._lgt:
162 168 self._offsets = self._offsets[:i]
163 169 self._lgt = i
164 170 self._extra = []
165 171 else:
166 172 self._extra = self._extra[: i - self._lgt]
167 173
168 174 def _calculate_index(self, i):
169 175 return self._offsets[i]
170 176
171 177
172 178 def parse_index2(data, inline):
173 179 if not inline:
174 180 return IndexObject(data), None
175 181 return InlinedIndexObject(data, inline), (0, data)
176 182
177 183
178 184 def parse_dirstate(dmap, copymap, st):
179 185 parents = [st[:20], st[20:40]]
180 186 # dereference fields so they will be local in loop
181 187 format = b">cllll"
182 188 e_size = struct.calcsize(format)
183 189 pos1 = 40
184 190 l = len(st)
185 191
186 192 # the inner loop
187 193 while pos1 < l:
188 194 pos2 = pos1 + e_size
189 195 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
190 196 pos1 = pos2 + e[4]
191 197 f = st[pos2:pos1]
192 198 if b'\0' in f:
193 199 f, c = f.split(b'\0')
194 200 copymap[f] = c
195 201 dmap[f] = e[:4]
196 202 return parents
197 203
198 204
199 205 def pack_dirstate(dmap, copymap, pl, now):
200 206 now = int(now)
201 207 cs = stringio()
202 208 write = cs.write
203 209 write(b"".join(pl))
204 210 for f, e in pycompat.iteritems(dmap):
205 211 if e[0] == b'n' and e[3] == now:
206 212 # The file was last modified "simultaneously" with the current
207 213 # write to dirstate (i.e. within the same second for file-
208 214 # systems with a granularity of 1 sec). This commonly happens
209 215 # for at least a couple of files on 'update'.
210 216 # The user could change the file without changing its size
211 217 # within the same second. Invalidate the file's mtime in
212 218 # dirstate, forcing future 'status' calls to compare the
213 219 # contents of the file if the size is the same. This prevents
214 220 # mistakenly treating such files as clean.
215 221 e = dirstatetuple(e[0], e[1], e[2], -1)
216 222 dmap[f] = e
217 223
218 224 if f in copymap:
219 225 f = b"%s\0%s" % (f, copymap[f])
220 226 e = _pack(b">cllll", e[0], e[1], e[2], e[3], len(f))
221 227 write(e)
222 228 write(f)
223 229 return cs.getvalue()
@@ -1,2970 +1,2976 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import io
20 20 import os
21 21 import struct
22 22 import zlib
23 23
24 24 # import stuff from node for others to import from revlog
25 25 from .node import (
26 26 bin,
27 27 hex,
28 28 nullhex,
29 29 nullid,
30 30 nullrev,
31 31 short,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .pycompat import getattr
39 39 from .revlogutils.constants import (
40 40 FLAG_GENERALDELTA,
41 41 FLAG_INLINE_DATA,
42 42 REVLOGV0,
43 43 REVLOGV1,
44 44 REVLOGV1_FLAGS,
45 45 REVLOGV2,
46 46 REVLOGV2_FLAGS,
47 47 REVLOG_DEFAULT_FLAGS,
48 48 REVLOG_DEFAULT_FORMAT,
49 49 REVLOG_DEFAULT_VERSION,
50 50 )
51 51 from .revlogutils.flagutil import (
52 52 REVIDX_DEFAULT_FLAGS,
53 53 REVIDX_ELLIPSIS,
54 54 REVIDX_EXTSTORED,
55 55 REVIDX_FLAGS_ORDER,
56 56 REVIDX_ISCENSORED,
57 57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 58 REVIDX_SIDEDATA,
59 59 )
60 60 from .thirdparty import attr
61 61 from . import (
62 62 ancestor,
63 63 dagop,
64 64 error,
65 65 mdiff,
66 66 policy,
67 67 pycompat,
68 68 revlogutils,
69 69 templatefilters,
70 70 util,
71 71 )
72 72 from .interfaces import (
73 73 repository,
74 74 util as interfaceutil,
75 75 )
76 76 from .revlogutils import (
77 77 deltas as deltautil,
78 78 flagutil,
79 79 sidedata as sidedatautil,
80 80 )
81 81 from .utils import (
82 82 storageutil,
83 83 stringutil,
84 84 )
85 85
86 86 # blanked usage of all the name to prevent pyflakes constraints
87 87 # We need these name available in the module for extensions.
88 88 REVLOGV0
89 89 REVLOGV1
90 90 REVLOGV2
91 91 FLAG_INLINE_DATA
92 92 FLAG_GENERALDELTA
93 93 REVLOG_DEFAULT_FLAGS
94 94 REVLOG_DEFAULT_FORMAT
95 95 REVLOG_DEFAULT_VERSION
96 96 REVLOGV1_FLAGS
97 97 REVLOGV2_FLAGS
98 98 REVIDX_ISCENSORED
99 99 REVIDX_ELLIPSIS
100 100 REVIDX_SIDEDATA
101 101 REVIDX_EXTSTORED
102 102 REVIDX_DEFAULT_FLAGS
103 103 REVIDX_FLAGS_ORDER
104 104 REVIDX_RAWTEXT_CHANGING_FLAGS
105 105
106 106 parsers = policy.importmod('parsers')
107 107 rustancestor = policy.importrust('ancestor')
108 108 rustdagop = policy.importrust('dagop')
109 109
110 110 # Aliased for performance.
111 111 _zlibdecompress = zlib.decompress
112 112
113 113 # max size of revlog with inline data
114 114 _maxinline = 131072
115 115 _chunksize = 1048576
116 116
117 117 # Flag processors for REVIDX_ELLIPSIS.
118 118 def ellipsisreadprocessor(rl, text):
119 119 return text, False, {}
120 120
121 121
122 122 def ellipsiswriteprocessor(rl, text, sidedata):
123 123 return text, False
124 124
125 125
126 126 def ellipsisrawprocessor(rl, text):
127 127 return False
128 128
129 129
130 130 ellipsisprocessor = (
131 131 ellipsisreadprocessor,
132 132 ellipsiswriteprocessor,
133 133 ellipsisrawprocessor,
134 134 )
135 135
136 136
137 137 def getoffset(q):
138 138 return int(q >> 16)
139 139
140 140
141 141 def gettype(q):
142 142 return int(q & 0xFFFF)
143 143
144 144
145 145 def offset_type(offset, type):
146 146 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
147 147 raise ValueError(b'unknown revlog index flags')
148 148 return int(int(offset) << 16 | type)
149 149
150 150
151 151 @attr.s(slots=True, frozen=True)
152 152 class _revisioninfo(object):
153 153 """Information about a revision that allows building its fulltext
154 154 node: expected hash of the revision
155 155 p1, p2: parent revs of the revision
156 156 btext: built text cache consisting of a one-element list
157 157 cachedelta: (baserev, uncompressed_delta) or None
158 158 flags: flags associated to the revision storage
159 159
160 160 One of btext[0] or cachedelta must be set.
161 161 """
162 162
163 163 node = attr.ib()
164 164 p1 = attr.ib()
165 165 p2 = attr.ib()
166 166 btext = attr.ib()
167 167 textlen = attr.ib()
168 168 cachedelta = attr.ib()
169 169 flags = attr.ib()
170 170
171 171
172 172 @interfaceutil.implementer(repository.irevisiondelta)
173 173 @attr.s(slots=True)
174 174 class revlogrevisiondelta(object):
175 175 node = attr.ib()
176 176 p1node = attr.ib()
177 177 p2node = attr.ib()
178 178 basenode = attr.ib()
179 179 flags = attr.ib()
180 180 baserevisionsize = attr.ib()
181 181 revision = attr.ib()
182 182 delta = attr.ib()
183 183 linknode = attr.ib(default=None)
184 184
185 185
186 186 @interfaceutil.implementer(repository.iverifyproblem)
187 187 @attr.s(frozen=True)
188 188 class revlogproblem(object):
189 189 warning = attr.ib(default=None)
190 190 error = attr.ib(default=None)
191 191 node = attr.ib(default=None)
192 192
193 193
194 194 # index v0:
195 195 # 4 bytes: offset
196 196 # 4 bytes: compressed length
197 197 # 4 bytes: base rev
198 198 # 4 bytes: link rev
199 199 # 20 bytes: parent 1 nodeid
200 200 # 20 bytes: parent 2 nodeid
201 201 # 20 bytes: nodeid
202 202 indexformatv0 = struct.Struct(b">4l20s20s20s")
203 203 indexformatv0_pack = indexformatv0.pack
204 204 indexformatv0_unpack = indexformatv0.unpack
205 205
206 206
207 207 class revlogoldindex(list):
208 208 @util.propertycache
209 209 def nodemap(self):
210 210 nodemap = revlogutils.NodeMap({nullid: nullrev})
211 211 for r in range(0, len(self)):
212 212 n = self[r][7]
213 213 nodemap[n] = r
214 214 return nodemap
215 215
216 216 def has_node(self, node):
217 217 """return True if the node exist in the index"""
218 218 return node in self.nodemap
219 219
220 220 def rev(self, node):
221 221 """return a revision for a node
222 222
223 223 If the node is unknown, raise a RevlogError"""
224 224 return self.nodemap[node]
225 225
226 def get_rev(self, node):
227 """return a revision for a node
228
229 If the node is unknown, return None"""
230 return self.nodemap.get(node)
231
226 232 def append(self, tup):
227 233 self.nodemap[tup[7]] = len(self)
228 234 super(revlogoldindex, self).append(tup)
229 235
230 236 def __delitem__(self, i):
231 237 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
232 238 raise ValueError(b"deleting slices only supports a:-1 with step 1")
233 239 for r in pycompat.xrange(i.start, len(self)):
234 240 del self.nodemap[self[r][7]]
235 241 super(revlogoldindex, self).__delitem__(i)
236 242
237 243 def clearcaches(self):
238 244 self.__dict__.pop('nodemap', None)
239 245
240 246 def __getitem__(self, i):
241 247 if i == -1:
242 248 return (0, 0, 0, -1, -1, -1, -1, nullid)
243 249 return list.__getitem__(self, i)
244 250
245 251
246 252 class revlogoldio(object):
247 253 def __init__(self):
248 254 self.size = indexformatv0.size
249 255
250 256 def parseindex(self, data, inline):
251 257 s = self.size
252 258 index = []
253 259 nodemap = revlogutils.NodeMap({nullid: nullrev})
254 260 n = off = 0
255 261 l = len(data)
256 262 while off + s <= l:
257 263 cur = data[off : off + s]
258 264 off += s
259 265 e = indexformatv0_unpack(cur)
260 266 # transform to revlogv1 format
261 267 e2 = (
262 268 offset_type(e[0], 0),
263 269 e[1],
264 270 -1,
265 271 e[2],
266 272 e[3],
267 273 nodemap.get(e[4], nullrev),
268 274 nodemap.get(e[5], nullrev),
269 275 e[6],
270 276 )
271 277 index.append(e2)
272 278 nodemap[e[6]] = n
273 279 n += 1
274 280
275 281 index = revlogoldindex(index)
276 282 return index, None
277 283
278 284 def packentry(self, entry, node, version, rev):
279 285 if gettype(entry[0]):
280 286 raise error.RevlogError(
281 287 _(b'index entry flags need revlog version 1')
282 288 )
283 289 e2 = (
284 290 getoffset(entry[0]),
285 291 entry[1],
286 292 entry[3],
287 293 entry[4],
288 294 node(entry[5]),
289 295 node(entry[6]),
290 296 entry[7],
291 297 )
292 298 return indexformatv0_pack(*e2)
293 299
294 300
295 301 # index ng:
296 302 # 6 bytes: offset
297 303 # 2 bytes: flags
298 304 # 4 bytes: compressed length
299 305 # 4 bytes: uncompressed length
300 306 # 4 bytes: base rev
301 307 # 4 bytes: link rev
302 308 # 4 bytes: parent 1 rev
303 309 # 4 bytes: parent 2 rev
304 310 # 32 bytes: nodeid
305 311 indexformatng = struct.Struct(b">Qiiiiii20s12x")
306 312 indexformatng_pack = indexformatng.pack
307 313 versionformat = struct.Struct(b">I")
308 314 versionformat_pack = versionformat.pack
309 315 versionformat_unpack = versionformat.unpack
310 316
311 317 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
312 318 # signed integer)
313 319 _maxentrysize = 0x7FFFFFFF
314 320
315 321
316 322 class revlogio(object):
317 323 def __init__(self):
318 324 self.size = indexformatng.size
319 325
320 326 def parseindex(self, data, inline):
321 327 # call the C implementation to parse the index data
322 328 index, cache = parsers.parse_index2(data, inline)
323 329 return index, cache
324 330
325 331 def packentry(self, entry, node, version, rev):
326 332 p = indexformatng_pack(*entry)
327 333 if rev == 0:
328 334 p = versionformat_pack(version) + p[4:]
329 335 return p
330 336
331 337
332 338 class revlog(object):
333 339 """
334 340 the underlying revision storage object
335 341
336 342 A revlog consists of two parts, an index and the revision data.
337 343
338 344 The index is a file with a fixed record size containing
339 345 information on each revision, including its nodeid (hash), the
340 346 nodeids of its parents, the position and offset of its data within
341 347 the data file, and the revision it's based on. Finally, each entry
342 348 contains a linkrev entry that can serve as a pointer to external
343 349 data.
344 350
345 351 The revision data itself is a linear collection of data chunks.
346 352 Each chunk represents a revision and is usually represented as a
347 353 delta against the previous chunk. To bound lookup time, runs of
348 354 deltas are limited to about 2 times the length of the original
349 355 version data. This makes retrieval of a version proportional to
350 356 its size, or O(1) relative to the number of revisions.
351 357
352 358 Both pieces of the revlog are written to in an append-only
353 359 fashion, which means we never need to rewrite a file to insert or
354 360 remove data, and can use some simple techniques to avoid the need
355 361 for locking while reading.
356 362
357 363 If checkambig, indexfile is opened with checkambig=True at
358 364 writing, to avoid file stat ambiguity.
359 365
360 366 If mmaplargeindex is True, and an mmapindexthreshold is set, the
361 367 index will be mmapped rather than read if it is larger than the
362 368 configured threshold.
363 369
364 370 If censorable is True, the revlog can have censored revisions.
365 371
366 372 If `upperboundcomp` is not None, this is the expected maximal gain from
367 373 compression for the data content.
368 374 """
369 375
370 376 _flagserrorclass = error.RevlogError
371 377
372 378 def __init__(
373 379 self,
374 380 opener,
375 381 indexfile,
376 382 datafile=None,
377 383 checkambig=False,
378 384 mmaplargeindex=False,
379 385 censorable=False,
380 386 upperboundcomp=None,
381 387 ):
382 388 """
383 389 create a revlog object
384 390
385 391 opener is a function that abstracts the file opening operation
386 392 and can be used to implement COW semantics or the like.
387 393
388 394 """
389 395 self.upperboundcomp = upperboundcomp
390 396 self.indexfile = indexfile
391 397 self.datafile = datafile or (indexfile[:-2] + b".d")
392 398 self.opener = opener
393 399 # When True, indexfile is opened with checkambig=True at writing, to
394 400 # avoid file stat ambiguity.
395 401 self._checkambig = checkambig
396 402 self._mmaplargeindex = mmaplargeindex
397 403 self._censorable = censorable
398 404 # 3-tuple of (node, rev, text) for a raw revision.
399 405 self._revisioncache = None
400 406 # Maps rev to chain base rev.
401 407 self._chainbasecache = util.lrucachedict(100)
402 408 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
403 409 self._chunkcache = (0, b'')
404 410 # How much data to read and cache into the raw revlog data cache.
405 411 self._chunkcachesize = 65536
406 412 self._maxchainlen = None
407 413 self._deltabothparents = True
408 414 self.index = None
409 415 # Mapping of partial identifiers to full nodes.
410 416 self._pcache = {}
411 417 # Mapping of revision integer to full node.
412 418 self._nodepos = None
413 419 self._compengine = b'zlib'
414 420 self._compengineopts = {}
415 421 self._maxdeltachainspan = -1
416 422 self._withsparseread = False
417 423 self._sparserevlog = False
418 424 self._srdensitythreshold = 0.50
419 425 self._srmingapsize = 262144
420 426
421 427 # Make copy of flag processors so each revlog instance can support
422 428 # custom flags.
423 429 self._flagprocessors = dict(flagutil.flagprocessors)
424 430
425 431 # 2-tuple of file handles being used for active writing.
426 432 self._writinghandles = None
427 433
428 434 self._loadindex()
429 435
430 436 def _loadindex(self):
431 437 mmapindexthreshold = None
432 438 opts = self.opener.options
433 439
434 440 if b'revlogv2' in opts:
435 441 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
436 442 elif b'revlogv1' in opts:
437 443 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
438 444 if b'generaldelta' in opts:
439 445 newversionflags |= FLAG_GENERALDELTA
440 446 elif b'revlogv0' in self.opener.options:
441 447 newversionflags = REVLOGV0
442 448 else:
443 449 newversionflags = REVLOG_DEFAULT_VERSION
444 450
445 451 if b'chunkcachesize' in opts:
446 452 self._chunkcachesize = opts[b'chunkcachesize']
447 453 if b'maxchainlen' in opts:
448 454 self._maxchainlen = opts[b'maxchainlen']
449 455 if b'deltabothparents' in opts:
450 456 self._deltabothparents = opts[b'deltabothparents']
451 457 self._lazydelta = bool(opts.get(b'lazydelta', True))
452 458 self._lazydeltabase = False
453 459 if self._lazydelta:
454 460 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
455 461 if b'compengine' in opts:
456 462 self._compengine = opts[b'compengine']
457 463 if b'zlib.level' in opts:
458 464 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
459 465 if b'zstd.level' in opts:
460 466 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
461 467 if b'maxdeltachainspan' in opts:
462 468 self._maxdeltachainspan = opts[b'maxdeltachainspan']
463 469 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
464 470 mmapindexthreshold = opts[b'mmapindexthreshold']
465 471 self.hassidedata = bool(opts.get(b'side-data', False))
466 472 if self.hassidedata:
467 473 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
468 474 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
469 475 withsparseread = bool(opts.get(b'with-sparse-read', False))
470 476 # sparse-revlog forces sparse-read
471 477 self._withsparseread = self._sparserevlog or withsparseread
472 478 if b'sparse-read-density-threshold' in opts:
473 479 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
474 480 if b'sparse-read-min-gap-size' in opts:
475 481 self._srmingapsize = opts[b'sparse-read-min-gap-size']
476 482 if opts.get(b'enableellipsis'):
477 483 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
478 484
479 485 # revlog v0 doesn't have flag processors
480 486 for flag, processor in pycompat.iteritems(
481 487 opts.get(b'flagprocessors', {})
482 488 ):
483 489 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
484 490
485 491 if self._chunkcachesize <= 0:
486 492 raise error.RevlogError(
487 493 _(b'revlog chunk cache size %r is not greater than 0')
488 494 % self._chunkcachesize
489 495 )
490 496 elif self._chunkcachesize & (self._chunkcachesize - 1):
491 497 raise error.RevlogError(
492 498 _(b'revlog chunk cache size %r is not a power of 2')
493 499 % self._chunkcachesize
494 500 )
495 501
496 502 indexdata = b''
497 503 self._initempty = True
498 504 try:
499 505 with self._indexfp() as f:
500 506 if (
501 507 mmapindexthreshold is not None
502 508 and self.opener.fstat(f).st_size >= mmapindexthreshold
503 509 ):
504 510 # TODO: should .close() to release resources without
505 511 # relying on Python GC
506 512 indexdata = util.buffer(util.mmapread(f))
507 513 else:
508 514 indexdata = f.read()
509 515 if len(indexdata) > 0:
510 516 versionflags = versionformat_unpack(indexdata[:4])[0]
511 517 self._initempty = False
512 518 else:
513 519 versionflags = newversionflags
514 520 except IOError as inst:
515 521 if inst.errno != errno.ENOENT:
516 522 raise
517 523
518 524 versionflags = newversionflags
519 525
520 526 self.version = versionflags
521 527
522 528 flags = versionflags & ~0xFFFF
523 529 fmt = versionflags & 0xFFFF
524 530
525 531 if fmt == REVLOGV0:
526 532 if flags:
527 533 raise error.RevlogError(
528 534 _(b'unknown flags (%#04x) in version %d revlog %s')
529 535 % (flags >> 16, fmt, self.indexfile)
530 536 )
531 537
532 538 self._inline = False
533 539 self._generaldelta = False
534 540
535 541 elif fmt == REVLOGV1:
536 542 if flags & ~REVLOGV1_FLAGS:
537 543 raise error.RevlogError(
538 544 _(b'unknown flags (%#04x) in version %d revlog %s')
539 545 % (flags >> 16, fmt, self.indexfile)
540 546 )
541 547
542 548 self._inline = versionflags & FLAG_INLINE_DATA
543 549 self._generaldelta = versionflags & FLAG_GENERALDELTA
544 550
545 551 elif fmt == REVLOGV2:
546 552 if flags & ~REVLOGV2_FLAGS:
547 553 raise error.RevlogError(
548 554 _(b'unknown flags (%#04x) in version %d revlog %s')
549 555 % (flags >> 16, fmt, self.indexfile)
550 556 )
551 557
552 558 self._inline = versionflags & FLAG_INLINE_DATA
553 559 # generaldelta implied by version 2 revlogs.
554 560 self._generaldelta = True
555 561
556 562 else:
557 563 raise error.RevlogError(
558 564 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
559 565 )
560 566 # sparse-revlog can't be on without general-delta (issue6056)
561 567 if not self._generaldelta:
562 568 self._sparserevlog = False
563 569
564 570 self._storedeltachains = True
565 571
566 572 self._io = revlogio()
567 573 if self.version == REVLOGV0:
568 574 self._io = revlogoldio()
569 575 try:
570 576 d = self._io.parseindex(indexdata, self._inline)
571 577 except (ValueError, IndexError):
572 578 raise error.RevlogError(
573 579 _(b"index %s is corrupted") % self.indexfile
574 580 )
575 581 self.index, self._chunkcache = d
576 582 self.nodemap = self.index.nodemap
577 583 if not self._chunkcache:
578 584 self._chunkclear()
579 585 # revnum -> (chain-length, sum-delta-length)
580 586 self._chaininfocache = {}
581 587 # revlog header -> revlog compressor
582 588 self._decompressors = {}
583 589
584 590 @util.propertycache
585 591 def _compressor(self):
586 592 engine = util.compengines[self._compengine]
587 593 return engine.revlogcompressor(self._compengineopts)
588 594
589 595 def _indexfp(self, mode=b'r'):
590 596 """file object for the revlog's index file"""
591 597 args = {'mode': mode}
592 598 if mode != b'r':
593 599 args['checkambig'] = self._checkambig
594 600 if mode == b'w':
595 601 args['atomictemp'] = True
596 602 return self.opener(self.indexfile, **args)
597 603
598 604 def _datafp(self, mode=b'r'):
599 605 """file object for the revlog's data file"""
600 606 return self.opener(self.datafile, mode=mode)
601 607
602 608 @contextlib.contextmanager
603 609 def _datareadfp(self, existingfp=None):
604 610 """file object suitable to read data"""
605 611 # Use explicit file handle, if given.
606 612 if existingfp is not None:
607 613 yield existingfp
608 614
609 615 # Use a file handle being actively used for writes, if available.
610 616 # There is some danger to doing this because reads will seek the
611 617 # file. However, _writeentry() performs a SEEK_END before all writes,
612 618 # so we should be safe.
613 619 elif self._writinghandles:
614 620 if self._inline:
615 621 yield self._writinghandles[0]
616 622 else:
617 623 yield self._writinghandles[1]
618 624
619 625 # Otherwise open a new file handle.
620 626 else:
621 627 if self._inline:
622 628 func = self._indexfp
623 629 else:
624 630 func = self._datafp
625 631 with func() as fp:
626 632 yield fp
627 633
628 634 def tiprev(self):
629 635 return len(self.index) - 1
630 636
631 637 def tip(self):
632 638 return self.node(self.tiprev())
633 639
634 640 def __contains__(self, rev):
635 641 return 0 <= rev < len(self)
636 642
637 643 def __len__(self):
638 644 return len(self.index)
639 645
640 646 def __iter__(self):
641 647 return iter(pycompat.xrange(len(self)))
642 648
643 649 def revs(self, start=0, stop=None):
644 650 """iterate over all rev in this revlog (from start to stop)"""
645 651 return storageutil.iterrevs(len(self), start=start, stop=stop)
646 652
647 653 @util.propertycache
648 654 def nodemap(self):
649 655 if self.index:
650 656 # populate mapping down to the initial node
651 657 node0 = self.index[0][7] # get around changelog filtering
652 658 self.rev(node0)
653 659 return self.index.nodemap
654 660
655 661 @property
656 662 def _nodecache(self):
657 663 msg = "revlog._nodecache is deprecated, use revlog.index.nodemap"
658 664 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
659 665 return self.index.nodemap
660 666
661 667 def hasnode(self, node):
662 668 try:
663 669 self.rev(node)
664 670 return True
665 671 except KeyError:
666 672 return False
667 673
668 674 def candelta(self, baserev, rev):
669 675 """whether two revisions (baserev, rev) can be delta-ed or not"""
670 676 # Disable delta if either rev requires a content-changing flag
671 677 # processor (ex. LFS). This is because such flag processor can alter
672 678 # the rawtext content that the delta will be based on, and two clients
673 679 # could have a same revlog node with different flags (i.e. different
674 680 # rawtext contents) and the delta could be incompatible.
675 681 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
676 682 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
677 683 ):
678 684 return False
679 685 return True
680 686
681 687 def clearcaches(self):
682 688 self._revisioncache = None
683 689 self._chainbasecache.clear()
684 690 self._chunkcache = (0, b'')
685 691 self._pcache = {}
686 692 self.index.clearcaches()
687 693
688 694 def rev(self, node):
689 695 try:
690 696 return self.index.rev(node)
691 697 except TypeError:
692 698 raise
693 699 except error.RevlogError:
694 700 # parsers.c radix tree lookup failed
695 701 if node == wdirid or node in wdirfilenodeids:
696 702 raise error.WdirUnsupported
697 703 raise error.LookupError(node, self.indexfile, _(b'no node'))
698 704
699 705 # Accessors for index entries.
700 706
701 707 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
702 708 # are flags.
703 709 def start(self, rev):
704 710 return int(self.index[rev][0] >> 16)
705 711
706 712 def flags(self, rev):
707 713 return self.index[rev][0] & 0xFFFF
708 714
709 715 def length(self, rev):
710 716 return self.index[rev][1]
711 717
712 718 def rawsize(self, rev):
713 719 """return the length of the uncompressed text for a given revision"""
714 720 l = self.index[rev][2]
715 721 if l >= 0:
716 722 return l
717 723
718 724 t = self.rawdata(rev)
719 725 return len(t)
720 726
721 727 def size(self, rev):
722 728 """length of non-raw text (processed by a "read" flag processor)"""
723 729 # fast path: if no "read" flag processor could change the content,
724 730 # size is rawsize. note: ELLIPSIS is known to not change the content.
725 731 flags = self.flags(rev)
726 732 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
727 733 return self.rawsize(rev)
728 734
729 735 return len(self.revision(rev, raw=False))
730 736
731 737 def chainbase(self, rev):
732 738 base = self._chainbasecache.get(rev)
733 739 if base is not None:
734 740 return base
735 741
736 742 index = self.index
737 743 iterrev = rev
738 744 base = index[iterrev][3]
739 745 while base != iterrev:
740 746 iterrev = base
741 747 base = index[iterrev][3]
742 748
743 749 self._chainbasecache[rev] = base
744 750 return base
745 751
746 752 def linkrev(self, rev):
747 753 return self.index[rev][4]
748 754
749 755 def parentrevs(self, rev):
750 756 try:
751 757 entry = self.index[rev]
752 758 except IndexError:
753 759 if rev == wdirrev:
754 760 raise error.WdirUnsupported
755 761 raise
756 762
757 763 return entry[5], entry[6]
758 764
759 765 # fast parentrevs(rev) where rev isn't filtered
760 766 _uncheckedparentrevs = parentrevs
761 767
762 768 def node(self, rev):
763 769 try:
764 770 return self.index[rev][7]
765 771 except IndexError:
766 772 if rev == wdirrev:
767 773 raise error.WdirUnsupported
768 774 raise
769 775
770 776 # Derived from index values.
771 777
772 778 def end(self, rev):
773 779 return self.start(rev) + self.length(rev)
774 780
775 781 def parents(self, node):
776 782 i = self.index
777 783 d = i[self.rev(node)]
778 784 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
779 785
780 786 def chainlen(self, rev):
781 787 return self._chaininfo(rev)[0]
782 788
783 789 def _chaininfo(self, rev):
784 790 chaininfocache = self._chaininfocache
785 791 if rev in chaininfocache:
786 792 return chaininfocache[rev]
787 793 index = self.index
788 794 generaldelta = self._generaldelta
789 795 iterrev = rev
790 796 e = index[iterrev]
791 797 clen = 0
792 798 compresseddeltalen = 0
793 799 while iterrev != e[3]:
794 800 clen += 1
795 801 compresseddeltalen += e[1]
796 802 if generaldelta:
797 803 iterrev = e[3]
798 804 else:
799 805 iterrev -= 1
800 806 if iterrev in chaininfocache:
801 807 t = chaininfocache[iterrev]
802 808 clen += t[0]
803 809 compresseddeltalen += t[1]
804 810 break
805 811 e = index[iterrev]
806 812 else:
807 813 # Add text length of base since decompressing that also takes
808 814 # work. For cache hits the length is already included.
809 815 compresseddeltalen += e[1]
810 816 r = (clen, compresseddeltalen)
811 817 chaininfocache[rev] = r
812 818 return r
813 819
814 820 def _deltachain(self, rev, stoprev=None):
815 821 """Obtain the delta chain for a revision.
816 822
817 823 ``stoprev`` specifies a revision to stop at. If not specified, we
818 824 stop at the base of the chain.
819 825
820 826 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
821 827 revs in ascending order and ``stopped`` is a bool indicating whether
822 828 ``stoprev`` was hit.
823 829 """
824 830 # Try C implementation.
825 831 try:
826 832 return self.index.deltachain(rev, stoprev, self._generaldelta)
827 833 except AttributeError:
828 834 pass
829 835
830 836 chain = []
831 837
832 838 # Alias to prevent attribute lookup in tight loop.
833 839 index = self.index
834 840 generaldelta = self._generaldelta
835 841
836 842 iterrev = rev
837 843 e = index[iterrev]
838 844 while iterrev != e[3] and iterrev != stoprev:
839 845 chain.append(iterrev)
840 846 if generaldelta:
841 847 iterrev = e[3]
842 848 else:
843 849 iterrev -= 1
844 850 e = index[iterrev]
845 851
846 852 if iterrev == stoprev:
847 853 stopped = True
848 854 else:
849 855 chain.append(iterrev)
850 856 stopped = False
851 857
852 858 chain.reverse()
853 859 return chain, stopped
854 860
855 861 def ancestors(self, revs, stoprev=0, inclusive=False):
856 862 """Generate the ancestors of 'revs' in reverse revision order.
857 863 Does not generate revs lower than stoprev.
858 864
859 865 See the documentation for ancestor.lazyancestors for more details."""
860 866
861 867 # first, make sure start revisions aren't filtered
862 868 revs = list(revs)
863 869 checkrev = self.node
864 870 for r in revs:
865 871 checkrev(r)
866 872 # and we're sure ancestors aren't filtered as well
867 873
868 874 if rustancestor is not None:
869 875 lazyancestors = rustancestor.LazyAncestors
870 876 arg = self.index
871 877 elif util.safehasattr(parsers, b'rustlazyancestors'):
872 878 lazyancestors = ancestor.rustlazyancestors
873 879 arg = self.index
874 880 else:
875 881 lazyancestors = ancestor.lazyancestors
876 882 arg = self._uncheckedparentrevs
877 883 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
878 884
879 885 def descendants(self, revs):
880 886 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
881 887
882 888 def findcommonmissing(self, common=None, heads=None):
883 889 """Return a tuple of the ancestors of common and the ancestors of heads
884 890 that are not ancestors of common. In revset terminology, we return the
885 891 tuple:
886 892
887 893 ::common, (::heads) - (::common)
888 894
889 895 The list is sorted by revision number, meaning it is
890 896 topologically sorted.
891 897
892 898 'heads' and 'common' are both lists of node IDs. If heads is
893 899 not supplied, uses all of the revlog's heads. If common is not
894 900 supplied, uses nullid."""
895 901 if common is None:
896 902 common = [nullid]
897 903 if heads is None:
898 904 heads = self.heads()
899 905
900 906 common = [self.rev(n) for n in common]
901 907 heads = [self.rev(n) for n in heads]
902 908
903 909 # we want the ancestors, but inclusive
904 910 class lazyset(object):
905 911 def __init__(self, lazyvalues):
906 912 self.addedvalues = set()
907 913 self.lazyvalues = lazyvalues
908 914
909 915 def __contains__(self, value):
910 916 return value in self.addedvalues or value in self.lazyvalues
911 917
912 918 def __iter__(self):
913 919 added = self.addedvalues
914 920 for r in added:
915 921 yield r
916 922 for r in self.lazyvalues:
917 923 if not r in added:
918 924 yield r
919 925
920 926 def add(self, value):
921 927 self.addedvalues.add(value)
922 928
923 929 def update(self, values):
924 930 self.addedvalues.update(values)
925 931
926 932 has = lazyset(self.ancestors(common))
927 933 has.add(nullrev)
928 934 has.update(common)
929 935
930 936 # take all ancestors from heads that aren't in has
931 937 missing = set()
932 938 visit = collections.deque(r for r in heads if r not in has)
933 939 while visit:
934 940 r = visit.popleft()
935 941 if r in missing:
936 942 continue
937 943 else:
938 944 missing.add(r)
939 945 for p in self.parentrevs(r):
940 946 if p not in has:
941 947 visit.append(p)
942 948 missing = list(missing)
943 949 missing.sort()
944 950 return has, [self.node(miss) for miss in missing]
945 951
946 952 def incrementalmissingrevs(self, common=None):
947 953 """Return an object that can be used to incrementally compute the
948 954 revision numbers of the ancestors of arbitrary sets that are not
949 955 ancestors of common. This is an ancestor.incrementalmissingancestors
950 956 object.
951 957
952 958 'common' is a list of revision numbers. If common is not supplied, uses
953 959 nullrev.
954 960 """
955 961 if common is None:
956 962 common = [nullrev]
957 963
958 964 if rustancestor is not None:
959 965 return rustancestor.MissingAncestors(self.index, common)
960 966 return ancestor.incrementalmissingancestors(self.parentrevs, common)
961 967
962 968 def findmissingrevs(self, common=None, heads=None):
963 969 """Return the revision numbers of the ancestors of heads that
964 970 are not ancestors of common.
965 971
966 972 More specifically, return a list of revision numbers corresponding to
967 973 nodes N such that every N satisfies the following constraints:
968 974
969 975 1. N is an ancestor of some node in 'heads'
970 976 2. N is not an ancestor of any node in 'common'
971 977
972 978 The list is sorted by revision number, meaning it is
973 979 topologically sorted.
974 980
975 981 'heads' and 'common' are both lists of revision numbers. If heads is
976 982 not supplied, uses all of the revlog's heads. If common is not
977 983 supplied, uses nullid."""
978 984 if common is None:
979 985 common = [nullrev]
980 986 if heads is None:
981 987 heads = self.headrevs()
982 988
983 989 inc = self.incrementalmissingrevs(common=common)
984 990 return inc.missingancestors(heads)
985 991
986 992 def findmissing(self, common=None, heads=None):
987 993 """Return the ancestors of heads that are not ancestors of common.
988 994
989 995 More specifically, return a list of nodes N such that every N
990 996 satisfies the following constraints:
991 997
992 998 1. N is an ancestor of some node in 'heads'
993 999 2. N is not an ancestor of any node in 'common'
994 1000
995 1001 The list is sorted by revision number, meaning it is
996 1002 topologically sorted.
997 1003
998 1004 'heads' and 'common' are both lists of node IDs. If heads is
999 1005 not supplied, uses all of the revlog's heads. If common is not
1000 1006 supplied, uses nullid."""
1001 1007 if common is None:
1002 1008 common = [nullid]
1003 1009 if heads is None:
1004 1010 heads = self.heads()
1005 1011
1006 1012 common = [self.rev(n) for n in common]
1007 1013 heads = [self.rev(n) for n in heads]
1008 1014
1009 1015 inc = self.incrementalmissingrevs(common=common)
1010 1016 return [self.node(r) for r in inc.missingancestors(heads)]
1011 1017
1012 1018 def nodesbetween(self, roots=None, heads=None):
1013 1019 """Return a topological path from 'roots' to 'heads'.
1014 1020
1015 1021 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1016 1022 topologically sorted list of all nodes N that satisfy both of
1017 1023 these constraints:
1018 1024
1019 1025 1. N is a descendant of some node in 'roots'
1020 1026 2. N is an ancestor of some node in 'heads'
1021 1027
1022 1028 Every node is considered to be both a descendant and an ancestor
1023 1029 of itself, so every reachable node in 'roots' and 'heads' will be
1024 1030 included in 'nodes'.
1025 1031
1026 1032 'outroots' is the list of reachable nodes in 'roots', i.e., the
1027 1033 subset of 'roots' that is returned in 'nodes'. Likewise,
1028 1034 'outheads' is the subset of 'heads' that is also in 'nodes'.
1029 1035
1030 1036 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1031 1037 unspecified, uses nullid as the only root. If 'heads' is
1032 1038 unspecified, uses list of all of the revlog's heads."""
1033 1039 nonodes = ([], [], [])
1034 1040 if roots is not None:
1035 1041 roots = list(roots)
1036 1042 if not roots:
1037 1043 return nonodes
1038 1044 lowestrev = min([self.rev(n) for n in roots])
1039 1045 else:
1040 1046 roots = [nullid] # Everybody's a descendant of nullid
1041 1047 lowestrev = nullrev
1042 1048 if (lowestrev == nullrev) and (heads is None):
1043 1049 # We want _all_ the nodes!
1044 1050 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1045 1051 if heads is None:
1046 1052 # All nodes are ancestors, so the latest ancestor is the last
1047 1053 # node.
1048 1054 highestrev = len(self) - 1
1049 1055 # Set ancestors to None to signal that every node is an ancestor.
1050 1056 ancestors = None
1051 1057 # Set heads to an empty dictionary for later discovery of heads
1052 1058 heads = {}
1053 1059 else:
1054 1060 heads = list(heads)
1055 1061 if not heads:
1056 1062 return nonodes
1057 1063 ancestors = set()
1058 1064 # Turn heads into a dictionary so we can remove 'fake' heads.
1059 1065 # Also, later we will be using it to filter out the heads we can't
1060 1066 # find from roots.
1061 1067 heads = dict.fromkeys(heads, False)
1062 1068 # Start at the top and keep marking parents until we're done.
1063 1069 nodestotag = set(heads)
1064 1070 # Remember where the top was so we can use it as a limit later.
1065 1071 highestrev = max([self.rev(n) for n in nodestotag])
1066 1072 while nodestotag:
1067 1073 # grab a node to tag
1068 1074 n = nodestotag.pop()
1069 1075 # Never tag nullid
1070 1076 if n == nullid:
1071 1077 continue
1072 1078 # A node's revision number represents its place in a
1073 1079 # topologically sorted list of nodes.
1074 1080 r = self.rev(n)
1075 1081 if r >= lowestrev:
1076 1082 if n not in ancestors:
1077 1083 # If we are possibly a descendant of one of the roots
1078 1084 # and we haven't already been marked as an ancestor
1079 1085 ancestors.add(n) # Mark as ancestor
1080 1086 # Add non-nullid parents to list of nodes to tag.
1081 1087 nodestotag.update(
1082 1088 [p for p in self.parents(n) if p != nullid]
1083 1089 )
1084 1090 elif n in heads: # We've seen it before, is it a fake head?
1085 1091 # So it is, real heads should not be the ancestors of
1086 1092 # any other heads.
1087 1093 heads.pop(n)
1088 1094 if not ancestors:
1089 1095 return nonodes
1090 1096 # Now that we have our set of ancestors, we want to remove any
1091 1097 # roots that are not ancestors.
1092 1098
1093 1099 # If one of the roots was nullid, everything is included anyway.
1094 1100 if lowestrev > nullrev:
1095 1101 # But, since we weren't, let's recompute the lowest rev to not
1096 1102 # include roots that aren't ancestors.
1097 1103
1098 1104 # Filter out roots that aren't ancestors of heads
1099 1105 roots = [root for root in roots if root in ancestors]
1100 1106 # Recompute the lowest revision
1101 1107 if roots:
1102 1108 lowestrev = min([self.rev(root) for root in roots])
1103 1109 else:
1104 1110 # No more roots? Return empty list
1105 1111 return nonodes
1106 1112 else:
1107 1113 # We are descending from nullid, and don't need to care about
1108 1114 # any other roots.
1109 1115 lowestrev = nullrev
1110 1116 roots = [nullid]
1111 1117 # Transform our roots list into a set.
1112 1118 descendants = set(roots)
1113 1119 # Also, keep the original roots so we can filter out roots that aren't
1114 1120 # 'real' roots (i.e. are descended from other roots).
1115 1121 roots = descendants.copy()
1116 1122 # Our topologically sorted list of output nodes.
1117 1123 orderedout = []
1118 1124 # Don't start at nullid since we don't want nullid in our output list,
1119 1125 # and if nullid shows up in descendants, empty parents will look like
1120 1126 # they're descendants.
1121 1127 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1122 1128 n = self.node(r)
1123 1129 isdescendant = False
1124 1130 if lowestrev == nullrev: # Everybody is a descendant of nullid
1125 1131 isdescendant = True
1126 1132 elif n in descendants:
1127 1133 # n is already a descendant
1128 1134 isdescendant = True
1129 1135 # This check only needs to be done here because all the roots
1130 1136 # will start being marked is descendants before the loop.
1131 1137 if n in roots:
1132 1138 # If n was a root, check if it's a 'real' root.
1133 1139 p = tuple(self.parents(n))
1134 1140 # If any of its parents are descendants, it's not a root.
1135 1141 if (p[0] in descendants) or (p[1] in descendants):
1136 1142 roots.remove(n)
1137 1143 else:
1138 1144 p = tuple(self.parents(n))
1139 1145 # A node is a descendant if either of its parents are
1140 1146 # descendants. (We seeded the dependents list with the roots
1141 1147 # up there, remember?)
1142 1148 if (p[0] in descendants) or (p[1] in descendants):
1143 1149 descendants.add(n)
1144 1150 isdescendant = True
1145 1151 if isdescendant and ((ancestors is None) or (n in ancestors)):
1146 1152 # Only include nodes that are both descendants and ancestors.
1147 1153 orderedout.append(n)
1148 1154 if (ancestors is not None) and (n in heads):
1149 1155 # We're trying to figure out which heads are reachable
1150 1156 # from roots.
1151 1157 # Mark this head as having been reached
1152 1158 heads[n] = True
1153 1159 elif ancestors is None:
1154 1160 # Otherwise, we're trying to discover the heads.
1155 1161 # Assume this is a head because if it isn't, the next step
1156 1162 # will eventually remove it.
1157 1163 heads[n] = True
1158 1164 # But, obviously its parents aren't.
1159 1165 for p in self.parents(n):
1160 1166 heads.pop(p, None)
1161 1167 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1162 1168 roots = list(roots)
1163 1169 assert orderedout
1164 1170 assert roots
1165 1171 assert heads
1166 1172 return (orderedout, roots, heads)
1167 1173
1168 1174 def headrevs(self, revs=None):
1169 1175 if revs is None:
1170 1176 try:
1171 1177 return self.index.headrevs()
1172 1178 except AttributeError:
1173 1179 return self._headrevs()
1174 1180 if rustdagop is not None:
1175 1181 return rustdagop.headrevs(self.index, revs)
1176 1182 return dagop.headrevs(revs, self._uncheckedparentrevs)
1177 1183
1178 1184 def computephases(self, roots):
1179 1185 return self.index.computephasesmapsets(roots)
1180 1186
1181 1187 def _headrevs(self):
1182 1188 count = len(self)
1183 1189 if not count:
1184 1190 return [nullrev]
1185 1191 # we won't iter over filtered rev so nobody is a head at start
1186 1192 ishead = [0] * (count + 1)
1187 1193 index = self.index
1188 1194 for r in self:
1189 1195 ishead[r] = 1 # I may be an head
1190 1196 e = index[r]
1191 1197 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1192 1198 return [r for r, val in enumerate(ishead) if val]
1193 1199
1194 1200 def heads(self, start=None, stop=None):
1195 1201 """return the list of all nodes that have no children
1196 1202
1197 1203 if start is specified, only heads that are descendants of
1198 1204 start will be returned
1199 1205 if stop is specified, it will consider all the revs from stop
1200 1206 as if they had no children
1201 1207 """
1202 1208 if start is None and stop is None:
1203 1209 if not len(self):
1204 1210 return [nullid]
1205 1211 return [self.node(r) for r in self.headrevs()]
1206 1212
1207 1213 if start is None:
1208 1214 start = nullrev
1209 1215 else:
1210 1216 start = self.rev(start)
1211 1217
1212 1218 stoprevs = set(self.rev(n) for n in stop or [])
1213 1219
1214 1220 revs = dagop.headrevssubset(
1215 1221 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1216 1222 )
1217 1223
1218 1224 return [self.node(rev) for rev in revs]
1219 1225
1220 1226 def children(self, node):
1221 1227 """find the children of a given node"""
1222 1228 c = []
1223 1229 p = self.rev(node)
1224 1230 for r in self.revs(start=p + 1):
1225 1231 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1226 1232 if prevs:
1227 1233 for pr in prevs:
1228 1234 if pr == p:
1229 1235 c.append(self.node(r))
1230 1236 elif p == nullrev:
1231 1237 c.append(self.node(r))
1232 1238 return c
1233 1239
1234 1240 def commonancestorsheads(self, a, b):
1235 1241 """calculate all the heads of the common ancestors of nodes a and b"""
1236 1242 a, b = self.rev(a), self.rev(b)
1237 1243 ancs = self._commonancestorsheads(a, b)
1238 1244 return pycompat.maplist(self.node, ancs)
1239 1245
1240 1246 def _commonancestorsheads(self, *revs):
1241 1247 """calculate all the heads of the common ancestors of revs"""
1242 1248 try:
1243 1249 ancs = self.index.commonancestorsheads(*revs)
1244 1250 except (AttributeError, OverflowError): # C implementation failed
1245 1251 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1246 1252 return ancs
1247 1253
1248 1254 def isancestor(self, a, b):
1249 1255 """return True if node a is an ancestor of node b
1250 1256
1251 1257 A revision is considered an ancestor of itself."""
1252 1258 a, b = self.rev(a), self.rev(b)
1253 1259 return self.isancestorrev(a, b)
1254 1260
1255 1261 def isancestorrev(self, a, b):
1256 1262 """return True if revision a is an ancestor of revision b
1257 1263
1258 1264 A revision is considered an ancestor of itself.
1259 1265
1260 1266 The implementation of this is trivial but the use of
1261 1267 reachableroots is not."""
1262 1268 if a == nullrev:
1263 1269 return True
1264 1270 elif a == b:
1265 1271 return True
1266 1272 elif a > b:
1267 1273 return False
1268 1274 return bool(self.reachableroots(a, [b], [a], includepath=False))
1269 1275
1270 1276 def reachableroots(self, minroot, heads, roots, includepath=False):
1271 1277 """return (heads(::<roots> and <roots>::<heads>))
1272 1278
1273 1279 If includepath is True, return (<roots>::<heads>)."""
1274 1280 try:
1275 1281 return self.index.reachableroots2(
1276 1282 minroot, heads, roots, includepath
1277 1283 )
1278 1284 except AttributeError:
1279 1285 return dagop._reachablerootspure(
1280 1286 self.parentrevs, minroot, roots, heads, includepath
1281 1287 )
1282 1288
1283 1289 def ancestor(self, a, b):
1284 1290 """calculate the "best" common ancestor of nodes a and b"""
1285 1291
1286 1292 a, b = self.rev(a), self.rev(b)
1287 1293 try:
1288 1294 ancs = self.index.ancestors(a, b)
1289 1295 except (AttributeError, OverflowError):
1290 1296 ancs = ancestor.ancestors(self.parentrevs, a, b)
1291 1297 if ancs:
1292 1298 # choose a consistent winner when there's a tie
1293 1299 return min(map(self.node, ancs))
1294 1300 return nullid
1295 1301
1296 1302 def _match(self, id):
1297 1303 if isinstance(id, int):
1298 1304 # rev
1299 1305 return self.node(id)
1300 1306 if len(id) == 20:
1301 1307 # possibly a binary node
1302 1308 # odds of a binary node being all hex in ASCII are 1 in 10**25
1303 1309 try:
1304 1310 node = id
1305 1311 self.rev(node) # quick search the index
1306 1312 return node
1307 1313 except error.LookupError:
1308 1314 pass # may be partial hex id
1309 1315 try:
1310 1316 # str(rev)
1311 1317 rev = int(id)
1312 1318 if b"%d" % rev != id:
1313 1319 raise ValueError
1314 1320 if rev < 0:
1315 1321 rev = len(self) + rev
1316 1322 if rev < 0 or rev >= len(self):
1317 1323 raise ValueError
1318 1324 return self.node(rev)
1319 1325 except (ValueError, OverflowError):
1320 1326 pass
1321 1327 if len(id) == 40:
1322 1328 try:
1323 1329 # a full hex nodeid?
1324 1330 node = bin(id)
1325 1331 self.rev(node)
1326 1332 return node
1327 1333 except (TypeError, error.LookupError):
1328 1334 pass
1329 1335
1330 1336 def _partialmatch(self, id):
1331 1337 # we don't care wdirfilenodeids as they should be always full hash
1332 1338 maybewdir = wdirhex.startswith(id)
1333 1339 try:
1334 1340 partial = self.index.partialmatch(id)
1335 1341 if partial and self.hasnode(partial):
1336 1342 if maybewdir:
1337 1343 # single 'ff...' match in radix tree, ambiguous with wdir
1338 1344 raise error.RevlogError
1339 1345 return partial
1340 1346 if maybewdir:
1341 1347 # no 'ff...' match in radix tree, wdir identified
1342 1348 raise error.WdirUnsupported
1343 1349 return None
1344 1350 except error.RevlogError:
1345 1351 # parsers.c radix tree lookup gave multiple matches
1346 1352 # fast path: for unfiltered changelog, radix tree is accurate
1347 1353 if not getattr(self, 'filteredrevs', None):
1348 1354 raise error.AmbiguousPrefixLookupError(
1349 1355 id, self.indexfile, _(b'ambiguous identifier')
1350 1356 )
1351 1357 # fall through to slow path that filters hidden revisions
1352 1358 except (AttributeError, ValueError):
1353 1359 # we are pure python, or key was too short to search radix tree
1354 1360 pass
1355 1361
1356 1362 if id in self._pcache:
1357 1363 return self._pcache[id]
1358 1364
1359 1365 if len(id) <= 40:
1360 1366 try:
1361 1367 # hex(node)[:...]
1362 1368 l = len(id) // 2 # grab an even number of digits
1363 1369 prefix = bin(id[: l * 2])
1364 1370 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1365 1371 nl = [
1366 1372 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1367 1373 ]
1368 1374 if nullhex.startswith(id):
1369 1375 nl.append(nullid)
1370 1376 if len(nl) > 0:
1371 1377 if len(nl) == 1 and not maybewdir:
1372 1378 self._pcache[id] = nl[0]
1373 1379 return nl[0]
1374 1380 raise error.AmbiguousPrefixLookupError(
1375 1381 id, self.indexfile, _(b'ambiguous identifier')
1376 1382 )
1377 1383 if maybewdir:
1378 1384 raise error.WdirUnsupported
1379 1385 return None
1380 1386 except TypeError:
1381 1387 pass
1382 1388
1383 1389 def lookup(self, id):
1384 1390 """locate a node based on:
1385 1391 - revision number or str(revision number)
1386 1392 - nodeid or subset of hex nodeid
1387 1393 """
1388 1394 n = self._match(id)
1389 1395 if n is not None:
1390 1396 return n
1391 1397 n = self._partialmatch(id)
1392 1398 if n:
1393 1399 return n
1394 1400
1395 1401 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1396 1402
1397 1403 def shortest(self, node, minlength=1):
1398 1404 """Find the shortest unambiguous prefix that matches node."""
1399 1405
1400 1406 def isvalid(prefix):
1401 1407 try:
1402 1408 matchednode = self._partialmatch(prefix)
1403 1409 except error.AmbiguousPrefixLookupError:
1404 1410 return False
1405 1411 except error.WdirUnsupported:
1406 1412 # single 'ff...' match
1407 1413 return True
1408 1414 if matchednode is None:
1409 1415 raise error.LookupError(node, self.indexfile, _(b'no node'))
1410 1416 return True
1411 1417
1412 1418 def maybewdir(prefix):
1413 1419 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1414 1420
1415 1421 hexnode = hex(node)
1416 1422
1417 1423 def disambiguate(hexnode, minlength):
1418 1424 """Disambiguate against wdirid."""
1419 1425 for length in range(minlength, 41):
1420 1426 prefix = hexnode[:length]
1421 1427 if not maybewdir(prefix):
1422 1428 return prefix
1423 1429
1424 1430 if not getattr(self, 'filteredrevs', None):
1425 1431 try:
1426 1432 length = max(self.index.shortest(node), minlength)
1427 1433 return disambiguate(hexnode, length)
1428 1434 except error.RevlogError:
1429 1435 if node != wdirid:
1430 1436 raise error.LookupError(node, self.indexfile, _(b'no node'))
1431 1437 except AttributeError:
1432 1438 # Fall through to pure code
1433 1439 pass
1434 1440
1435 1441 if node == wdirid:
1436 1442 for length in range(minlength, 41):
1437 1443 prefix = hexnode[:length]
1438 1444 if isvalid(prefix):
1439 1445 return prefix
1440 1446
1441 1447 for length in range(minlength, 41):
1442 1448 prefix = hexnode[:length]
1443 1449 if isvalid(prefix):
1444 1450 return disambiguate(hexnode, length)
1445 1451
1446 1452 def cmp(self, node, text):
1447 1453 """compare text with a given file revision
1448 1454
1449 1455 returns True if text is different than what is stored.
1450 1456 """
1451 1457 p1, p2 = self.parents(node)
1452 1458 return storageutil.hashrevisionsha1(text, p1, p2) != node
1453 1459
1454 1460 def _cachesegment(self, offset, data):
1455 1461 """Add a segment to the revlog cache.
1456 1462
1457 1463 Accepts an absolute offset and the data that is at that location.
1458 1464 """
1459 1465 o, d = self._chunkcache
1460 1466 # try to add to existing cache
1461 1467 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1462 1468 self._chunkcache = o, d + data
1463 1469 else:
1464 1470 self._chunkcache = offset, data
1465 1471
1466 1472 def _readsegment(self, offset, length, df=None):
1467 1473 """Load a segment of raw data from the revlog.
1468 1474
1469 1475 Accepts an absolute offset, length to read, and an optional existing
1470 1476 file handle to read from.
1471 1477
1472 1478 If an existing file handle is passed, it will be seeked and the
1473 1479 original seek position will NOT be restored.
1474 1480
1475 1481 Returns a str or buffer of raw byte data.
1476 1482
1477 1483 Raises if the requested number of bytes could not be read.
1478 1484 """
1479 1485 # Cache data both forward and backward around the requested
1480 1486 # data, in a fixed size window. This helps speed up operations
1481 1487 # involving reading the revlog backwards.
1482 1488 cachesize = self._chunkcachesize
1483 1489 realoffset = offset & ~(cachesize - 1)
1484 1490 reallength = (
1485 1491 (offset + length + cachesize) & ~(cachesize - 1)
1486 1492 ) - realoffset
1487 1493 with self._datareadfp(df) as df:
1488 1494 df.seek(realoffset)
1489 1495 d = df.read(reallength)
1490 1496
1491 1497 self._cachesegment(realoffset, d)
1492 1498 if offset != realoffset or reallength != length:
1493 1499 startoffset = offset - realoffset
1494 1500 if len(d) - startoffset < length:
1495 1501 raise error.RevlogError(
1496 1502 _(
1497 1503 b'partial read of revlog %s; expected %d bytes from '
1498 1504 b'offset %d, got %d'
1499 1505 )
1500 1506 % (
1501 1507 self.indexfile if self._inline else self.datafile,
1502 1508 length,
1503 1509 realoffset,
1504 1510 len(d) - startoffset,
1505 1511 )
1506 1512 )
1507 1513
1508 1514 return util.buffer(d, startoffset, length)
1509 1515
1510 1516 if len(d) < length:
1511 1517 raise error.RevlogError(
1512 1518 _(
1513 1519 b'partial read of revlog %s; expected %d bytes from offset '
1514 1520 b'%d, got %d'
1515 1521 )
1516 1522 % (
1517 1523 self.indexfile if self._inline else self.datafile,
1518 1524 length,
1519 1525 offset,
1520 1526 len(d),
1521 1527 )
1522 1528 )
1523 1529
1524 1530 return d
1525 1531
1526 1532 def _getsegment(self, offset, length, df=None):
1527 1533 """Obtain a segment of raw data from the revlog.
1528 1534
1529 1535 Accepts an absolute offset, length of bytes to obtain, and an
1530 1536 optional file handle to the already-opened revlog. If the file
1531 1537 handle is used, it's original seek position will not be preserved.
1532 1538
1533 1539 Requests for data may be returned from a cache.
1534 1540
1535 1541 Returns a str or a buffer instance of raw byte data.
1536 1542 """
1537 1543 o, d = self._chunkcache
1538 1544 l = len(d)
1539 1545
1540 1546 # is it in the cache?
1541 1547 cachestart = offset - o
1542 1548 cacheend = cachestart + length
1543 1549 if cachestart >= 0 and cacheend <= l:
1544 1550 if cachestart == 0 and cacheend == l:
1545 1551 return d # avoid a copy
1546 1552 return util.buffer(d, cachestart, cacheend - cachestart)
1547 1553
1548 1554 return self._readsegment(offset, length, df=df)
1549 1555
1550 1556 def _getsegmentforrevs(self, startrev, endrev, df=None):
1551 1557 """Obtain a segment of raw data corresponding to a range of revisions.
1552 1558
1553 1559 Accepts the start and end revisions and an optional already-open
1554 1560 file handle to be used for reading. If the file handle is read, its
1555 1561 seek position will not be preserved.
1556 1562
1557 1563 Requests for data may be satisfied by a cache.
1558 1564
1559 1565 Returns a 2-tuple of (offset, data) for the requested range of
1560 1566 revisions. Offset is the integer offset from the beginning of the
1561 1567 revlog and data is a str or buffer of the raw byte data.
1562 1568
1563 1569 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1564 1570 to determine where each revision's data begins and ends.
1565 1571 """
1566 1572 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1567 1573 # (functions are expensive).
1568 1574 index = self.index
1569 1575 istart = index[startrev]
1570 1576 start = int(istart[0] >> 16)
1571 1577 if startrev == endrev:
1572 1578 end = start + istart[1]
1573 1579 else:
1574 1580 iend = index[endrev]
1575 1581 end = int(iend[0] >> 16) + iend[1]
1576 1582
1577 1583 if self._inline:
1578 1584 start += (startrev + 1) * self._io.size
1579 1585 end += (endrev + 1) * self._io.size
1580 1586 length = end - start
1581 1587
1582 1588 return start, self._getsegment(start, length, df=df)
1583 1589
1584 1590 def _chunk(self, rev, df=None):
1585 1591 """Obtain a single decompressed chunk for a revision.
1586 1592
1587 1593 Accepts an integer revision and an optional already-open file handle
1588 1594 to be used for reading. If used, the seek position of the file will not
1589 1595 be preserved.
1590 1596
1591 1597 Returns a str holding uncompressed data for the requested revision.
1592 1598 """
1593 1599 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1594 1600
1595 1601 def _chunks(self, revs, df=None, targetsize=None):
1596 1602 """Obtain decompressed chunks for the specified revisions.
1597 1603
1598 1604 Accepts an iterable of numeric revisions that are assumed to be in
1599 1605 ascending order. Also accepts an optional already-open file handle
1600 1606 to be used for reading. If used, the seek position of the file will
1601 1607 not be preserved.
1602 1608
1603 1609 This function is similar to calling ``self._chunk()`` multiple times,
1604 1610 but is faster.
1605 1611
1606 1612 Returns a list with decompressed data for each requested revision.
1607 1613 """
1608 1614 if not revs:
1609 1615 return []
1610 1616 start = self.start
1611 1617 length = self.length
1612 1618 inline = self._inline
1613 1619 iosize = self._io.size
1614 1620 buffer = util.buffer
1615 1621
1616 1622 l = []
1617 1623 ladd = l.append
1618 1624
1619 1625 if not self._withsparseread:
1620 1626 slicedchunks = (revs,)
1621 1627 else:
1622 1628 slicedchunks = deltautil.slicechunk(
1623 1629 self, revs, targetsize=targetsize
1624 1630 )
1625 1631
1626 1632 for revschunk in slicedchunks:
1627 1633 firstrev = revschunk[0]
1628 1634 # Skip trailing revisions with empty diff
1629 1635 for lastrev in revschunk[::-1]:
1630 1636 if length(lastrev) != 0:
1631 1637 break
1632 1638
1633 1639 try:
1634 1640 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1635 1641 except OverflowError:
1636 1642 # issue4215 - we can't cache a run of chunks greater than
1637 1643 # 2G on Windows
1638 1644 return [self._chunk(rev, df=df) for rev in revschunk]
1639 1645
1640 1646 decomp = self.decompress
1641 1647 for rev in revschunk:
1642 1648 chunkstart = start(rev)
1643 1649 if inline:
1644 1650 chunkstart += (rev + 1) * iosize
1645 1651 chunklength = length(rev)
1646 1652 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1647 1653
1648 1654 return l
1649 1655
1650 1656 def _chunkclear(self):
1651 1657 """Clear the raw chunk cache."""
1652 1658 self._chunkcache = (0, b'')
1653 1659
1654 1660 def deltaparent(self, rev):
1655 1661 """return deltaparent of the given revision"""
1656 1662 base = self.index[rev][3]
1657 1663 if base == rev:
1658 1664 return nullrev
1659 1665 elif self._generaldelta:
1660 1666 return base
1661 1667 else:
1662 1668 return rev - 1
1663 1669
1664 1670 def issnapshot(self, rev):
1665 1671 """tells whether rev is a snapshot
1666 1672 """
1667 1673 if not self._sparserevlog:
1668 1674 return self.deltaparent(rev) == nullrev
1669 1675 elif util.safehasattr(self.index, b'issnapshot'):
1670 1676 # directly assign the method to cache the testing and access
1671 1677 self.issnapshot = self.index.issnapshot
1672 1678 return self.issnapshot(rev)
1673 1679 if rev == nullrev:
1674 1680 return True
1675 1681 entry = self.index[rev]
1676 1682 base = entry[3]
1677 1683 if base == rev:
1678 1684 return True
1679 1685 if base == nullrev:
1680 1686 return True
1681 1687 p1 = entry[5]
1682 1688 p2 = entry[6]
1683 1689 if base == p1 or base == p2:
1684 1690 return False
1685 1691 return self.issnapshot(base)
1686 1692
1687 1693 def snapshotdepth(self, rev):
1688 1694 """number of snapshot in the chain before this one"""
1689 1695 if not self.issnapshot(rev):
1690 1696 raise error.ProgrammingError(b'revision %d not a snapshot')
1691 1697 return len(self._deltachain(rev)[0]) - 1
1692 1698
1693 1699 def revdiff(self, rev1, rev2):
1694 1700 """return or calculate a delta between two revisions
1695 1701
1696 1702 The delta calculated is in binary form and is intended to be written to
1697 1703 revlog data directly. So this function needs raw revision data.
1698 1704 """
1699 1705 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1700 1706 return bytes(self._chunk(rev2))
1701 1707
1702 1708 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1703 1709
1704 1710 def _processflags(self, text, flags, operation, raw=False):
1705 1711 """deprecated entry point to access flag processors"""
1706 1712 msg = b'_processflag(...) use the specialized variant'
1707 1713 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1708 1714 if raw:
1709 1715 return text, flagutil.processflagsraw(self, text, flags)
1710 1716 elif operation == b'read':
1711 1717 return flagutil.processflagsread(self, text, flags)
1712 1718 else: # write operation
1713 1719 return flagutil.processflagswrite(self, text, flags)
1714 1720
1715 1721 def revision(self, nodeorrev, _df=None, raw=False):
1716 1722 """return an uncompressed revision of a given node or revision
1717 1723 number.
1718 1724
1719 1725 _df - an existing file handle to read from. (internal-only)
1720 1726 raw - an optional argument specifying if the revision data is to be
1721 1727 treated as raw data when applying flag transforms. 'raw' should be set
1722 1728 to True when generating changegroups or in debug commands.
1723 1729 """
1724 1730 if raw:
1725 1731 msg = (
1726 1732 b'revlog.revision(..., raw=True) is deprecated, '
1727 1733 b'use revlog.rawdata(...)'
1728 1734 )
1729 1735 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1730 1736 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1731 1737
1732 1738 def sidedata(self, nodeorrev, _df=None):
1733 1739 """a map of extra data related to the changeset but not part of the hash
1734 1740
1735 1741 This function currently return a dictionary. However, more advanced
1736 1742 mapping object will likely be used in the future for a more
1737 1743 efficient/lazy code.
1738 1744 """
1739 1745 return self._revisiondata(nodeorrev, _df)[1]
1740 1746
1741 1747 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1742 1748 # deal with <nodeorrev> argument type
1743 1749 if isinstance(nodeorrev, int):
1744 1750 rev = nodeorrev
1745 1751 node = self.node(rev)
1746 1752 else:
1747 1753 node = nodeorrev
1748 1754 rev = None
1749 1755
1750 1756 # fast path the special `nullid` rev
1751 1757 if node == nullid:
1752 1758 return b"", {}
1753 1759
1754 1760 # The text as stored inside the revlog. Might be the revision or might
1755 1761 # need to be processed to retrieve the revision.
1756 1762 rawtext = None
1757 1763
1758 1764 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1759 1765
1760 1766 if raw and validated:
1761 1767 # if we don't want to process the raw text and that raw
1762 1768 # text is cached, we can exit early.
1763 1769 return rawtext, {}
1764 1770 if rev is None:
1765 1771 rev = self.rev(node)
1766 1772 # the revlog's flag for this revision
1767 1773 # (usually alter its state or content)
1768 1774 flags = self.flags(rev)
1769 1775
1770 1776 if validated and flags == REVIDX_DEFAULT_FLAGS:
1771 1777 # no extra flags set, no flag processor runs, text = rawtext
1772 1778 return rawtext, {}
1773 1779
1774 1780 sidedata = {}
1775 1781 if raw:
1776 1782 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1777 1783 text = rawtext
1778 1784 else:
1779 1785 try:
1780 1786 r = flagutil.processflagsread(self, rawtext, flags)
1781 1787 except error.SidedataHashError as exc:
1782 1788 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1783 1789 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1784 1790 raise error.RevlogError(msg)
1785 1791 text, validatehash, sidedata = r
1786 1792 if validatehash:
1787 1793 self.checkhash(text, node, rev=rev)
1788 1794 if not validated:
1789 1795 self._revisioncache = (node, rev, rawtext)
1790 1796
1791 1797 return text, sidedata
1792 1798
1793 1799 def _rawtext(self, node, rev, _df=None):
1794 1800 """return the possibly unvalidated rawtext for a revision
1795 1801
1796 1802 returns (rev, rawtext, validated)
1797 1803 """
1798 1804
1799 1805 # revision in the cache (could be useful to apply delta)
1800 1806 cachedrev = None
1801 1807 # An intermediate text to apply deltas to
1802 1808 basetext = None
1803 1809
1804 1810 # Check if we have the entry in cache
1805 1811 # The cache entry looks like (node, rev, rawtext)
1806 1812 if self._revisioncache:
1807 1813 if self._revisioncache[0] == node:
1808 1814 return (rev, self._revisioncache[2], True)
1809 1815 cachedrev = self._revisioncache[1]
1810 1816
1811 1817 if rev is None:
1812 1818 rev = self.rev(node)
1813 1819
1814 1820 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1815 1821 if stopped:
1816 1822 basetext = self._revisioncache[2]
1817 1823
1818 1824 # drop cache to save memory, the caller is expected to
1819 1825 # update self._revisioncache after validating the text
1820 1826 self._revisioncache = None
1821 1827
1822 1828 targetsize = None
1823 1829 rawsize = self.index[rev][2]
1824 1830 if 0 <= rawsize:
1825 1831 targetsize = 4 * rawsize
1826 1832
1827 1833 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1828 1834 if basetext is None:
1829 1835 basetext = bytes(bins[0])
1830 1836 bins = bins[1:]
1831 1837
1832 1838 rawtext = mdiff.patches(basetext, bins)
1833 1839 del basetext # let us have a chance to free memory early
1834 1840 return (rev, rawtext, False)
1835 1841
1836 1842 def rawdata(self, nodeorrev, _df=None):
1837 1843 """return an uncompressed raw data of a given node or revision number.
1838 1844
1839 1845 _df - an existing file handle to read from. (internal-only)
1840 1846 """
1841 1847 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1842 1848
1843 1849 def hash(self, text, p1, p2):
1844 1850 """Compute a node hash.
1845 1851
1846 1852 Available as a function so that subclasses can replace the hash
1847 1853 as needed.
1848 1854 """
1849 1855 return storageutil.hashrevisionsha1(text, p1, p2)
1850 1856
1851 1857 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1852 1858 """Check node hash integrity.
1853 1859
1854 1860 Available as a function so that subclasses can extend hash mismatch
1855 1861 behaviors as needed.
1856 1862 """
1857 1863 try:
1858 1864 if p1 is None and p2 is None:
1859 1865 p1, p2 = self.parents(node)
1860 1866 if node != self.hash(text, p1, p2):
1861 1867 # Clear the revision cache on hash failure. The revision cache
1862 1868 # only stores the raw revision and clearing the cache does have
1863 1869 # the side-effect that we won't have a cache hit when the raw
1864 1870 # revision data is accessed. But this case should be rare and
1865 1871 # it is extra work to teach the cache about the hash
1866 1872 # verification state.
1867 1873 if self._revisioncache and self._revisioncache[0] == node:
1868 1874 self._revisioncache = None
1869 1875
1870 1876 revornode = rev
1871 1877 if revornode is None:
1872 1878 revornode = templatefilters.short(hex(node))
1873 1879 raise error.RevlogError(
1874 1880 _(b"integrity check failed on %s:%s")
1875 1881 % (self.indexfile, pycompat.bytestr(revornode))
1876 1882 )
1877 1883 except error.RevlogError:
1878 1884 if self._censorable and storageutil.iscensoredtext(text):
1879 1885 raise error.CensoredNodeError(self.indexfile, node, text)
1880 1886 raise
1881 1887
1882 1888 def _enforceinlinesize(self, tr, fp=None):
1883 1889 """Check if the revlog is too big for inline and convert if so.
1884 1890
1885 1891 This should be called after revisions are added to the revlog. If the
1886 1892 revlog has grown too large to be an inline revlog, it will convert it
1887 1893 to use multiple index and data files.
1888 1894 """
1889 1895 tiprev = len(self) - 1
1890 1896 if (
1891 1897 not self._inline
1892 1898 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1893 1899 ):
1894 1900 return
1895 1901
1896 1902 trinfo = tr.find(self.indexfile)
1897 1903 if trinfo is None:
1898 1904 raise error.RevlogError(
1899 1905 _(b"%s not found in the transaction") % self.indexfile
1900 1906 )
1901 1907
1902 1908 trindex = trinfo[2]
1903 1909 if trindex is not None:
1904 1910 dataoff = self.start(trindex)
1905 1911 else:
1906 1912 # revlog was stripped at start of transaction, use all leftover data
1907 1913 trindex = len(self) - 1
1908 1914 dataoff = self.end(tiprev)
1909 1915
1910 1916 tr.add(self.datafile, dataoff)
1911 1917
1912 1918 if fp:
1913 1919 fp.flush()
1914 1920 fp.close()
1915 1921 # We can't use the cached file handle after close(). So prevent
1916 1922 # its usage.
1917 1923 self._writinghandles = None
1918 1924
1919 1925 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1920 1926 for r in self:
1921 1927 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1922 1928
1923 1929 with self._indexfp(b'w') as fp:
1924 1930 self.version &= ~FLAG_INLINE_DATA
1925 1931 self._inline = False
1926 1932 io = self._io
1927 1933 for i in self:
1928 1934 e = io.packentry(self.index[i], self.node, self.version, i)
1929 1935 fp.write(e)
1930 1936
1931 1937 # the temp file replace the real index when we exit the context
1932 1938 # manager
1933 1939
1934 1940 tr.replace(self.indexfile, trindex * self._io.size)
1935 1941 self._chunkclear()
1936 1942
1937 1943 def _nodeduplicatecallback(self, transaction, node):
1938 1944 """called when trying to add a node already stored.
1939 1945 """
1940 1946
1941 1947 def addrevision(
1942 1948 self,
1943 1949 text,
1944 1950 transaction,
1945 1951 link,
1946 1952 p1,
1947 1953 p2,
1948 1954 cachedelta=None,
1949 1955 node=None,
1950 1956 flags=REVIDX_DEFAULT_FLAGS,
1951 1957 deltacomputer=None,
1952 1958 sidedata=None,
1953 1959 ):
1954 1960 """add a revision to the log
1955 1961
1956 1962 text - the revision data to add
1957 1963 transaction - the transaction object used for rollback
1958 1964 link - the linkrev data to add
1959 1965 p1, p2 - the parent nodeids of the revision
1960 1966 cachedelta - an optional precomputed delta
1961 1967 node - nodeid of revision; typically node is not specified, and it is
1962 1968 computed by default as hash(text, p1, p2), however subclasses might
1963 1969 use different hashing method (and override checkhash() in such case)
1964 1970 flags - the known flags to set on the revision
1965 1971 deltacomputer - an optional deltacomputer instance shared between
1966 1972 multiple calls
1967 1973 """
1968 1974 if link == nullrev:
1969 1975 raise error.RevlogError(
1970 1976 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1971 1977 )
1972 1978
1973 1979 if sidedata is None:
1974 1980 sidedata = {}
1975 1981 flags = flags & ~REVIDX_SIDEDATA
1976 1982 elif not self.hassidedata:
1977 1983 raise error.ProgrammingError(
1978 1984 _(b"trying to add sidedata to a revlog who don't support them")
1979 1985 )
1980 1986 else:
1981 1987 flags |= REVIDX_SIDEDATA
1982 1988
1983 1989 if flags:
1984 1990 node = node or self.hash(text, p1, p2)
1985 1991
1986 1992 rawtext, validatehash = flagutil.processflagswrite(
1987 1993 self, text, flags, sidedata=sidedata
1988 1994 )
1989 1995
1990 1996 # If the flag processor modifies the revision data, ignore any provided
1991 1997 # cachedelta.
1992 1998 if rawtext != text:
1993 1999 cachedelta = None
1994 2000
1995 2001 if len(rawtext) > _maxentrysize:
1996 2002 raise error.RevlogError(
1997 2003 _(
1998 2004 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
1999 2005 )
2000 2006 % (self.indexfile, len(rawtext))
2001 2007 )
2002 2008
2003 2009 node = node or self.hash(rawtext, p1, p2)
2004 2010 if self.index.has_node(node):
2005 2011 return node
2006 2012
2007 2013 if validatehash:
2008 2014 self.checkhash(rawtext, node, p1=p1, p2=p2)
2009 2015
2010 2016 return self.addrawrevision(
2011 2017 rawtext,
2012 2018 transaction,
2013 2019 link,
2014 2020 p1,
2015 2021 p2,
2016 2022 node,
2017 2023 flags,
2018 2024 cachedelta=cachedelta,
2019 2025 deltacomputer=deltacomputer,
2020 2026 )
2021 2027
2022 2028 def addrawrevision(
2023 2029 self,
2024 2030 rawtext,
2025 2031 transaction,
2026 2032 link,
2027 2033 p1,
2028 2034 p2,
2029 2035 node,
2030 2036 flags,
2031 2037 cachedelta=None,
2032 2038 deltacomputer=None,
2033 2039 ):
2034 2040 """add a raw revision with known flags, node and parents
2035 2041 useful when reusing a revision not stored in this revlog (ex: received
2036 2042 over wire, or read from an external bundle).
2037 2043 """
2038 2044 dfh = None
2039 2045 if not self._inline:
2040 2046 dfh = self._datafp(b"a+")
2041 2047 ifh = self._indexfp(b"a+")
2042 2048 try:
2043 2049 return self._addrevision(
2044 2050 node,
2045 2051 rawtext,
2046 2052 transaction,
2047 2053 link,
2048 2054 p1,
2049 2055 p2,
2050 2056 flags,
2051 2057 cachedelta,
2052 2058 ifh,
2053 2059 dfh,
2054 2060 deltacomputer=deltacomputer,
2055 2061 )
2056 2062 finally:
2057 2063 if dfh:
2058 2064 dfh.close()
2059 2065 ifh.close()
2060 2066
2061 2067 def compress(self, data):
2062 2068 """Generate a possibly-compressed representation of data."""
2063 2069 if not data:
2064 2070 return b'', data
2065 2071
2066 2072 compressed = self._compressor.compress(data)
2067 2073
2068 2074 if compressed:
2069 2075 # The revlog compressor added the header in the returned data.
2070 2076 return b'', compressed
2071 2077
2072 2078 if data[0:1] == b'\0':
2073 2079 return b'', data
2074 2080 return b'u', data
2075 2081
2076 2082 def decompress(self, data):
2077 2083 """Decompress a revlog chunk.
2078 2084
2079 2085 The chunk is expected to begin with a header identifying the
2080 2086 format type so it can be routed to an appropriate decompressor.
2081 2087 """
2082 2088 if not data:
2083 2089 return data
2084 2090
2085 2091 # Revlogs are read much more frequently than they are written and many
2086 2092 # chunks only take microseconds to decompress, so performance is
2087 2093 # important here.
2088 2094 #
2089 2095 # We can make a few assumptions about revlogs:
2090 2096 #
2091 2097 # 1) the majority of chunks will be compressed (as opposed to inline
2092 2098 # raw data).
2093 2099 # 2) decompressing *any* data will likely by at least 10x slower than
2094 2100 # returning raw inline data.
2095 2101 # 3) we want to prioritize common and officially supported compression
2096 2102 # engines
2097 2103 #
2098 2104 # It follows that we want to optimize for "decompress compressed data
2099 2105 # when encoded with common and officially supported compression engines"
2100 2106 # case over "raw data" and "data encoded by less common or non-official
2101 2107 # compression engines." That is why we have the inline lookup first
2102 2108 # followed by the compengines lookup.
2103 2109 #
2104 2110 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2105 2111 # compressed chunks. And this matters for changelog and manifest reads.
2106 2112 t = data[0:1]
2107 2113
2108 2114 if t == b'x':
2109 2115 try:
2110 2116 return _zlibdecompress(data)
2111 2117 except zlib.error as e:
2112 2118 raise error.RevlogError(
2113 2119 _(b'revlog decompress error: %s')
2114 2120 % stringutil.forcebytestr(e)
2115 2121 )
2116 2122 # '\0' is more common than 'u' so it goes first.
2117 2123 elif t == b'\0':
2118 2124 return data
2119 2125 elif t == b'u':
2120 2126 return util.buffer(data, 1)
2121 2127
2122 2128 try:
2123 2129 compressor = self._decompressors[t]
2124 2130 except KeyError:
2125 2131 try:
2126 2132 engine = util.compengines.forrevlogheader(t)
2127 2133 compressor = engine.revlogcompressor(self._compengineopts)
2128 2134 self._decompressors[t] = compressor
2129 2135 except KeyError:
2130 2136 raise error.RevlogError(_(b'unknown compression type %r') % t)
2131 2137
2132 2138 return compressor.decompress(data)
2133 2139
2134 2140 def _addrevision(
2135 2141 self,
2136 2142 node,
2137 2143 rawtext,
2138 2144 transaction,
2139 2145 link,
2140 2146 p1,
2141 2147 p2,
2142 2148 flags,
2143 2149 cachedelta,
2144 2150 ifh,
2145 2151 dfh,
2146 2152 alwayscache=False,
2147 2153 deltacomputer=None,
2148 2154 ):
2149 2155 """internal function to add revisions to the log
2150 2156
2151 2157 see addrevision for argument descriptions.
2152 2158
2153 2159 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2154 2160
2155 2161 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2156 2162 be used.
2157 2163
2158 2164 invariants:
2159 2165 - rawtext is optional (can be None); if not set, cachedelta must be set.
2160 2166 if both are set, they must correspond to each other.
2161 2167 """
2162 2168 if node == nullid:
2163 2169 raise error.RevlogError(
2164 2170 _(b"%s: attempt to add null revision") % self.indexfile
2165 2171 )
2166 2172 if node == wdirid or node in wdirfilenodeids:
2167 2173 raise error.RevlogError(
2168 2174 _(b"%s: attempt to add wdir revision") % self.indexfile
2169 2175 )
2170 2176
2171 2177 if self._inline:
2172 2178 fh = ifh
2173 2179 else:
2174 2180 fh = dfh
2175 2181
2176 2182 btext = [rawtext]
2177 2183
2178 2184 curr = len(self)
2179 2185 prev = curr - 1
2180 2186 offset = self.end(prev)
2181 2187 p1r, p2r = self.rev(p1), self.rev(p2)
2182 2188
2183 2189 # full versions are inserted when the needed deltas
2184 2190 # become comparable to the uncompressed text
2185 2191 if rawtext is None:
2186 2192 # need rawtext size, before changed by flag processors, which is
2187 2193 # the non-raw size. use revlog explicitly to avoid filelog's extra
2188 2194 # logic that might remove metadata size.
2189 2195 textlen = mdiff.patchedsize(
2190 2196 revlog.size(self, cachedelta[0]), cachedelta[1]
2191 2197 )
2192 2198 else:
2193 2199 textlen = len(rawtext)
2194 2200
2195 2201 if deltacomputer is None:
2196 2202 deltacomputer = deltautil.deltacomputer(self)
2197 2203
2198 2204 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2199 2205
2200 2206 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2201 2207
2202 2208 e = (
2203 2209 offset_type(offset, flags),
2204 2210 deltainfo.deltalen,
2205 2211 textlen,
2206 2212 deltainfo.base,
2207 2213 link,
2208 2214 p1r,
2209 2215 p2r,
2210 2216 node,
2211 2217 )
2212 2218 self.index.append(e)
2213 2219
2214 2220 # Reset the pure node cache start lookup offset to account for new
2215 2221 # revision.
2216 2222 if self._nodepos is not None:
2217 2223 self._nodepos = curr
2218 2224
2219 2225 entry = self._io.packentry(e, self.node, self.version, curr)
2220 2226 self._writeentry(
2221 2227 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2222 2228 )
2223 2229
2224 2230 rawtext = btext[0]
2225 2231
2226 2232 if alwayscache and rawtext is None:
2227 2233 rawtext = deltacomputer.buildtext(revinfo, fh)
2228 2234
2229 2235 if type(rawtext) == bytes: # only accept immutable objects
2230 2236 self._revisioncache = (node, curr, rawtext)
2231 2237 self._chainbasecache[curr] = deltainfo.chainbase
2232 2238 return node
2233 2239
2234 2240 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2235 2241 # Files opened in a+ mode have inconsistent behavior on various
2236 2242 # platforms. Windows requires that a file positioning call be made
2237 2243 # when the file handle transitions between reads and writes. See
2238 2244 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2239 2245 # platforms, Python or the platform itself can be buggy. Some versions
2240 2246 # of Solaris have been observed to not append at the end of the file
2241 2247 # if the file was seeked to before the end. See issue4943 for more.
2242 2248 #
2243 2249 # We work around this issue by inserting a seek() before writing.
2244 2250 # Note: This is likely not necessary on Python 3. However, because
2245 2251 # the file handle is reused for reads and may be seeked there, we need
2246 2252 # to be careful before changing this.
2247 2253 ifh.seek(0, os.SEEK_END)
2248 2254 if dfh:
2249 2255 dfh.seek(0, os.SEEK_END)
2250 2256
2251 2257 curr = len(self) - 1
2252 2258 if not self._inline:
2253 2259 transaction.add(self.datafile, offset)
2254 2260 transaction.add(self.indexfile, curr * len(entry))
2255 2261 if data[0]:
2256 2262 dfh.write(data[0])
2257 2263 dfh.write(data[1])
2258 2264 ifh.write(entry)
2259 2265 else:
2260 2266 offset += curr * self._io.size
2261 2267 transaction.add(self.indexfile, offset, curr)
2262 2268 ifh.write(entry)
2263 2269 ifh.write(data[0])
2264 2270 ifh.write(data[1])
2265 2271 self._enforceinlinesize(transaction, ifh)
2266 2272
2267 2273 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2268 2274 """
2269 2275 add a delta group
2270 2276
2271 2277 given a set of deltas, add them to the revision log. the
2272 2278 first delta is against its parent, which should be in our
2273 2279 log, the rest are against the previous delta.
2274 2280
2275 2281 If ``addrevisioncb`` is defined, it will be called with arguments of
2276 2282 this revlog and the node that was added.
2277 2283 """
2278 2284
2279 2285 if self._writinghandles:
2280 2286 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2281 2287
2282 2288 nodes = []
2283 2289
2284 2290 r = len(self)
2285 2291 end = 0
2286 2292 if r:
2287 2293 end = self.end(r - 1)
2288 2294 ifh = self._indexfp(b"a+")
2289 2295 isize = r * self._io.size
2290 2296 if self._inline:
2291 2297 transaction.add(self.indexfile, end + isize, r)
2292 2298 dfh = None
2293 2299 else:
2294 2300 transaction.add(self.indexfile, isize, r)
2295 2301 transaction.add(self.datafile, end)
2296 2302 dfh = self._datafp(b"a+")
2297 2303
2298 2304 def flush():
2299 2305 if dfh:
2300 2306 dfh.flush()
2301 2307 ifh.flush()
2302 2308
2303 2309 self._writinghandles = (ifh, dfh)
2304 2310
2305 2311 try:
2306 2312 deltacomputer = deltautil.deltacomputer(self)
2307 2313 # loop through our set of deltas
2308 2314 for data in deltas:
2309 2315 node, p1, p2, linknode, deltabase, delta, flags = data
2310 2316 link = linkmapper(linknode)
2311 2317 flags = flags or REVIDX_DEFAULT_FLAGS
2312 2318
2313 2319 nodes.append(node)
2314 2320
2315 2321 if self.index.has_node(node):
2316 2322 self._nodeduplicatecallback(transaction, node)
2317 2323 # this can happen if two branches make the same change
2318 2324 continue
2319 2325
2320 2326 for p in (p1, p2):
2321 2327 if not self.index.has_node(p):
2322 2328 raise error.LookupError(
2323 2329 p, self.indexfile, _(b'unknown parent')
2324 2330 )
2325 2331
2326 2332 if not self.index.has_node(deltabase):
2327 2333 raise error.LookupError(
2328 2334 deltabase, self.indexfile, _(b'unknown delta base')
2329 2335 )
2330 2336
2331 2337 baserev = self.rev(deltabase)
2332 2338
2333 2339 if baserev != nullrev and self.iscensored(baserev):
2334 2340 # if base is censored, delta must be full replacement in a
2335 2341 # single patch operation
2336 2342 hlen = struct.calcsize(b">lll")
2337 2343 oldlen = self.rawsize(baserev)
2338 2344 newlen = len(delta) - hlen
2339 2345 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2340 2346 raise error.CensoredBaseError(
2341 2347 self.indexfile, self.node(baserev)
2342 2348 )
2343 2349
2344 2350 if not flags and self._peek_iscensored(baserev, delta, flush):
2345 2351 flags |= REVIDX_ISCENSORED
2346 2352
2347 2353 # We assume consumers of addrevisioncb will want to retrieve
2348 2354 # the added revision, which will require a call to
2349 2355 # revision(). revision() will fast path if there is a cache
2350 2356 # hit. So, we tell _addrevision() to always cache in this case.
2351 2357 # We're only using addgroup() in the context of changegroup
2352 2358 # generation so the revision data can always be handled as raw
2353 2359 # by the flagprocessor.
2354 2360 self._addrevision(
2355 2361 node,
2356 2362 None,
2357 2363 transaction,
2358 2364 link,
2359 2365 p1,
2360 2366 p2,
2361 2367 flags,
2362 2368 (baserev, delta),
2363 2369 ifh,
2364 2370 dfh,
2365 2371 alwayscache=bool(addrevisioncb),
2366 2372 deltacomputer=deltacomputer,
2367 2373 )
2368 2374
2369 2375 if addrevisioncb:
2370 2376 addrevisioncb(self, node)
2371 2377
2372 2378 if not dfh and not self._inline:
2373 2379 # addrevision switched from inline to conventional
2374 2380 # reopen the index
2375 2381 ifh.close()
2376 2382 dfh = self._datafp(b"a+")
2377 2383 ifh = self._indexfp(b"a+")
2378 2384 self._writinghandles = (ifh, dfh)
2379 2385 finally:
2380 2386 self._writinghandles = None
2381 2387
2382 2388 if dfh:
2383 2389 dfh.close()
2384 2390 ifh.close()
2385 2391
2386 2392 return nodes
2387 2393
2388 2394 def iscensored(self, rev):
2389 2395 """Check if a file revision is censored."""
2390 2396 if not self._censorable:
2391 2397 return False
2392 2398
2393 2399 return self.flags(rev) & REVIDX_ISCENSORED
2394 2400
2395 2401 def _peek_iscensored(self, baserev, delta, flush):
2396 2402 """Quickly check if a delta produces a censored revision."""
2397 2403 if not self._censorable:
2398 2404 return False
2399 2405
2400 2406 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2401 2407
2402 2408 def getstrippoint(self, minlink):
2403 2409 """find the minimum rev that must be stripped to strip the linkrev
2404 2410
2405 2411 Returns a tuple containing the minimum rev and a set of all revs that
2406 2412 have linkrevs that will be broken by this strip.
2407 2413 """
2408 2414 return storageutil.resolvestripinfo(
2409 2415 minlink,
2410 2416 len(self) - 1,
2411 2417 self.headrevs(),
2412 2418 self.linkrev,
2413 2419 self.parentrevs,
2414 2420 )
2415 2421
2416 2422 def strip(self, minlink, transaction):
2417 2423 """truncate the revlog on the first revision with a linkrev >= minlink
2418 2424
2419 2425 This function is called when we're stripping revision minlink and
2420 2426 its descendants from the repository.
2421 2427
2422 2428 We have to remove all revisions with linkrev >= minlink, because
2423 2429 the equivalent changelog revisions will be renumbered after the
2424 2430 strip.
2425 2431
2426 2432 So we truncate the revlog on the first of these revisions, and
2427 2433 trust that the caller has saved the revisions that shouldn't be
2428 2434 removed and that it'll re-add them after this truncation.
2429 2435 """
2430 2436 if len(self) == 0:
2431 2437 return
2432 2438
2433 2439 rev, _ = self.getstrippoint(minlink)
2434 2440 if rev == len(self):
2435 2441 return
2436 2442
2437 2443 # first truncate the files on disk
2438 2444 end = self.start(rev)
2439 2445 if not self._inline:
2440 2446 transaction.add(self.datafile, end)
2441 2447 end = rev * self._io.size
2442 2448 else:
2443 2449 end += rev * self._io.size
2444 2450
2445 2451 transaction.add(self.indexfile, end)
2446 2452
2447 2453 # then reset internal state in memory to forget those revisions
2448 2454 self._revisioncache = None
2449 2455 self._chaininfocache = {}
2450 2456 self._chunkclear()
2451 2457
2452 2458 del self.index[rev:-1]
2453 2459 self._nodepos = None
2454 2460
2455 2461 def checksize(self):
2456 2462 """Check size of index and data files
2457 2463
2458 2464 return a (dd, di) tuple.
2459 2465 - dd: extra bytes for the "data" file
2460 2466 - di: extra bytes for the "index" file
2461 2467
2462 2468 A healthy revlog will return (0, 0).
2463 2469 """
2464 2470 expected = 0
2465 2471 if len(self):
2466 2472 expected = max(0, self.end(len(self) - 1))
2467 2473
2468 2474 try:
2469 2475 with self._datafp() as f:
2470 2476 f.seek(0, io.SEEK_END)
2471 2477 actual = f.tell()
2472 2478 dd = actual - expected
2473 2479 except IOError as inst:
2474 2480 if inst.errno != errno.ENOENT:
2475 2481 raise
2476 2482 dd = 0
2477 2483
2478 2484 try:
2479 2485 f = self.opener(self.indexfile)
2480 2486 f.seek(0, io.SEEK_END)
2481 2487 actual = f.tell()
2482 2488 f.close()
2483 2489 s = self._io.size
2484 2490 i = max(0, actual // s)
2485 2491 di = actual - (i * s)
2486 2492 if self._inline:
2487 2493 databytes = 0
2488 2494 for r in self:
2489 2495 databytes += max(0, self.length(r))
2490 2496 dd = 0
2491 2497 di = actual - len(self) * s - databytes
2492 2498 except IOError as inst:
2493 2499 if inst.errno != errno.ENOENT:
2494 2500 raise
2495 2501 di = 0
2496 2502
2497 2503 return (dd, di)
2498 2504
2499 2505 def files(self):
2500 2506 res = [self.indexfile]
2501 2507 if not self._inline:
2502 2508 res.append(self.datafile)
2503 2509 return res
2504 2510
2505 2511 def emitrevisions(
2506 2512 self,
2507 2513 nodes,
2508 2514 nodesorder=None,
2509 2515 revisiondata=False,
2510 2516 assumehaveparentrevisions=False,
2511 2517 deltamode=repository.CG_DELTAMODE_STD,
2512 2518 ):
2513 2519 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2514 2520 raise error.ProgrammingError(
2515 2521 b'unhandled value for nodesorder: %s' % nodesorder
2516 2522 )
2517 2523
2518 2524 if nodesorder is None and not self._generaldelta:
2519 2525 nodesorder = b'storage'
2520 2526
2521 2527 if (
2522 2528 not self._storedeltachains
2523 2529 and deltamode != repository.CG_DELTAMODE_PREV
2524 2530 ):
2525 2531 deltamode = repository.CG_DELTAMODE_FULL
2526 2532
2527 2533 return storageutil.emitrevisions(
2528 2534 self,
2529 2535 nodes,
2530 2536 nodesorder,
2531 2537 revlogrevisiondelta,
2532 2538 deltaparentfn=self.deltaparent,
2533 2539 candeltafn=self.candelta,
2534 2540 rawsizefn=self.rawsize,
2535 2541 revdifffn=self.revdiff,
2536 2542 flagsfn=self.flags,
2537 2543 deltamode=deltamode,
2538 2544 revisiondata=revisiondata,
2539 2545 assumehaveparentrevisions=assumehaveparentrevisions,
2540 2546 )
2541 2547
2542 2548 DELTAREUSEALWAYS = b'always'
2543 2549 DELTAREUSESAMEREVS = b'samerevs'
2544 2550 DELTAREUSENEVER = b'never'
2545 2551
2546 2552 DELTAREUSEFULLADD = b'fulladd'
2547 2553
2548 2554 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2549 2555
2550 2556 def clone(
2551 2557 self,
2552 2558 tr,
2553 2559 destrevlog,
2554 2560 addrevisioncb=None,
2555 2561 deltareuse=DELTAREUSESAMEREVS,
2556 2562 forcedeltabothparents=None,
2557 2563 sidedatacompanion=None,
2558 2564 ):
2559 2565 """Copy this revlog to another, possibly with format changes.
2560 2566
2561 2567 The destination revlog will contain the same revisions and nodes.
2562 2568 However, it may not be bit-for-bit identical due to e.g. delta encoding
2563 2569 differences.
2564 2570
2565 2571 The ``deltareuse`` argument control how deltas from the existing revlog
2566 2572 are preserved in the destination revlog. The argument can have the
2567 2573 following values:
2568 2574
2569 2575 DELTAREUSEALWAYS
2570 2576 Deltas will always be reused (if possible), even if the destination
2571 2577 revlog would not select the same revisions for the delta. This is the
2572 2578 fastest mode of operation.
2573 2579 DELTAREUSESAMEREVS
2574 2580 Deltas will be reused if the destination revlog would pick the same
2575 2581 revisions for the delta. This mode strikes a balance between speed
2576 2582 and optimization.
2577 2583 DELTAREUSENEVER
2578 2584 Deltas will never be reused. This is the slowest mode of execution.
2579 2585 This mode can be used to recompute deltas (e.g. if the diff/delta
2580 2586 algorithm changes).
2581 2587 DELTAREUSEFULLADD
2582 2588 Revision will be re-added as if their were new content. This is
2583 2589 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2584 2590 eg: large file detection and handling.
2585 2591
2586 2592 Delta computation can be slow, so the choice of delta reuse policy can
2587 2593 significantly affect run time.
2588 2594
2589 2595 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2590 2596 two extremes. Deltas will be reused if they are appropriate. But if the
2591 2597 delta could choose a better revision, it will do so. This means if you
2592 2598 are converting a non-generaldelta revlog to a generaldelta revlog,
2593 2599 deltas will be recomputed if the delta's parent isn't a parent of the
2594 2600 revision.
2595 2601
2596 2602 In addition to the delta policy, the ``forcedeltabothparents``
2597 2603 argument controls whether to force compute deltas against both parents
2598 2604 for merges. By default, the current default is used.
2599 2605
2600 2606 If not None, the `sidedatacompanion` is callable that accept two
2601 2607 arguments:
2602 2608
2603 2609 (srcrevlog, rev)
2604 2610
2605 2611 and return a triplet that control changes to sidedata content from the
2606 2612 old revision to the new clone result:
2607 2613
2608 2614 (dropall, filterout, update)
2609 2615
2610 2616 * if `dropall` is True, all sidedata should be dropped
2611 2617 * `filterout` is a set of sidedata keys that should be dropped
2612 2618 * `update` is a mapping of additionnal/new key -> value
2613 2619 """
2614 2620 if deltareuse not in self.DELTAREUSEALL:
2615 2621 raise ValueError(
2616 2622 _(b'value for deltareuse invalid: %s') % deltareuse
2617 2623 )
2618 2624
2619 2625 if len(destrevlog):
2620 2626 raise ValueError(_(b'destination revlog is not empty'))
2621 2627
2622 2628 if getattr(self, 'filteredrevs', None):
2623 2629 raise ValueError(_(b'source revlog has filtered revisions'))
2624 2630 if getattr(destrevlog, 'filteredrevs', None):
2625 2631 raise ValueError(_(b'destination revlog has filtered revisions'))
2626 2632
2627 2633 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2628 2634 # if possible.
2629 2635 oldlazydelta = destrevlog._lazydelta
2630 2636 oldlazydeltabase = destrevlog._lazydeltabase
2631 2637 oldamd = destrevlog._deltabothparents
2632 2638
2633 2639 try:
2634 2640 if deltareuse == self.DELTAREUSEALWAYS:
2635 2641 destrevlog._lazydeltabase = True
2636 2642 destrevlog._lazydelta = True
2637 2643 elif deltareuse == self.DELTAREUSESAMEREVS:
2638 2644 destrevlog._lazydeltabase = False
2639 2645 destrevlog._lazydelta = True
2640 2646 elif deltareuse == self.DELTAREUSENEVER:
2641 2647 destrevlog._lazydeltabase = False
2642 2648 destrevlog._lazydelta = False
2643 2649
2644 2650 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2645 2651
2646 2652 self._clone(
2647 2653 tr,
2648 2654 destrevlog,
2649 2655 addrevisioncb,
2650 2656 deltareuse,
2651 2657 forcedeltabothparents,
2652 2658 sidedatacompanion,
2653 2659 )
2654 2660
2655 2661 finally:
2656 2662 destrevlog._lazydelta = oldlazydelta
2657 2663 destrevlog._lazydeltabase = oldlazydeltabase
2658 2664 destrevlog._deltabothparents = oldamd
2659 2665
2660 2666 def _clone(
2661 2667 self,
2662 2668 tr,
2663 2669 destrevlog,
2664 2670 addrevisioncb,
2665 2671 deltareuse,
2666 2672 forcedeltabothparents,
2667 2673 sidedatacompanion,
2668 2674 ):
2669 2675 """perform the core duty of `revlog.clone` after parameter processing"""
2670 2676 deltacomputer = deltautil.deltacomputer(destrevlog)
2671 2677 index = self.index
2672 2678 for rev in self:
2673 2679 entry = index[rev]
2674 2680
2675 2681 # Some classes override linkrev to take filtered revs into
2676 2682 # account. Use raw entry from index.
2677 2683 flags = entry[0] & 0xFFFF
2678 2684 linkrev = entry[4]
2679 2685 p1 = index[entry[5]][7]
2680 2686 p2 = index[entry[6]][7]
2681 2687 node = entry[7]
2682 2688
2683 2689 sidedataactions = (False, [], {})
2684 2690 if sidedatacompanion is not None:
2685 2691 sidedataactions = sidedatacompanion(self, rev)
2686 2692
2687 2693 # (Possibly) reuse the delta from the revlog if allowed and
2688 2694 # the revlog chunk is a delta.
2689 2695 cachedelta = None
2690 2696 rawtext = None
2691 2697 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2692 2698 dropall, filterout, update = sidedataactions
2693 2699 text, sidedata = self._revisiondata(rev)
2694 2700 if dropall:
2695 2701 sidedata = {}
2696 2702 for key in filterout:
2697 2703 sidedata.pop(key, None)
2698 2704 sidedata.update(update)
2699 2705 if not sidedata:
2700 2706 sidedata = None
2701 2707 destrevlog.addrevision(
2702 2708 text,
2703 2709 tr,
2704 2710 linkrev,
2705 2711 p1,
2706 2712 p2,
2707 2713 cachedelta=cachedelta,
2708 2714 node=node,
2709 2715 flags=flags,
2710 2716 deltacomputer=deltacomputer,
2711 2717 sidedata=sidedata,
2712 2718 )
2713 2719 else:
2714 2720 if destrevlog._lazydelta:
2715 2721 dp = self.deltaparent(rev)
2716 2722 if dp != nullrev:
2717 2723 cachedelta = (dp, bytes(self._chunk(rev)))
2718 2724
2719 2725 if not cachedelta:
2720 2726 rawtext = self.rawdata(rev)
2721 2727
2722 2728 ifh = destrevlog.opener(
2723 2729 destrevlog.indexfile, b'a+', checkambig=False
2724 2730 )
2725 2731 dfh = None
2726 2732 if not destrevlog._inline:
2727 2733 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2728 2734 try:
2729 2735 destrevlog._addrevision(
2730 2736 node,
2731 2737 rawtext,
2732 2738 tr,
2733 2739 linkrev,
2734 2740 p1,
2735 2741 p2,
2736 2742 flags,
2737 2743 cachedelta,
2738 2744 ifh,
2739 2745 dfh,
2740 2746 deltacomputer=deltacomputer,
2741 2747 )
2742 2748 finally:
2743 2749 if dfh:
2744 2750 dfh.close()
2745 2751 ifh.close()
2746 2752
2747 2753 if addrevisioncb:
2748 2754 addrevisioncb(self, rev, node)
2749 2755
2750 2756 def censorrevision(self, tr, censornode, tombstone=b''):
2751 2757 if (self.version & 0xFFFF) == REVLOGV0:
2752 2758 raise error.RevlogError(
2753 2759 _(b'cannot censor with version %d revlogs') % self.version
2754 2760 )
2755 2761
2756 2762 censorrev = self.rev(censornode)
2757 2763 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2758 2764
2759 2765 if len(tombstone) > self.rawsize(censorrev):
2760 2766 raise error.Abort(
2761 2767 _(b'censor tombstone must be no longer than censored data')
2762 2768 )
2763 2769
2764 2770 # Rewriting the revlog in place is hard. Our strategy for censoring is
2765 2771 # to create a new revlog, copy all revisions to it, then replace the
2766 2772 # revlogs on transaction close.
2767 2773
2768 2774 newindexfile = self.indexfile + b'.tmpcensored'
2769 2775 newdatafile = self.datafile + b'.tmpcensored'
2770 2776
2771 2777 # This is a bit dangerous. We could easily have a mismatch of state.
2772 2778 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2773 2779 newrl.version = self.version
2774 2780 newrl._generaldelta = self._generaldelta
2775 2781 newrl._io = self._io
2776 2782
2777 2783 for rev in self.revs():
2778 2784 node = self.node(rev)
2779 2785 p1, p2 = self.parents(node)
2780 2786
2781 2787 if rev == censorrev:
2782 2788 newrl.addrawrevision(
2783 2789 tombstone,
2784 2790 tr,
2785 2791 self.linkrev(censorrev),
2786 2792 p1,
2787 2793 p2,
2788 2794 censornode,
2789 2795 REVIDX_ISCENSORED,
2790 2796 )
2791 2797
2792 2798 if newrl.deltaparent(rev) != nullrev:
2793 2799 raise error.Abort(
2794 2800 _(
2795 2801 b'censored revision stored as delta; '
2796 2802 b'cannot censor'
2797 2803 ),
2798 2804 hint=_(
2799 2805 b'censoring of revlogs is not '
2800 2806 b'fully implemented; please report '
2801 2807 b'this bug'
2802 2808 ),
2803 2809 )
2804 2810 continue
2805 2811
2806 2812 if self.iscensored(rev):
2807 2813 if self.deltaparent(rev) != nullrev:
2808 2814 raise error.Abort(
2809 2815 _(
2810 2816 b'cannot censor due to censored '
2811 2817 b'revision having delta stored'
2812 2818 )
2813 2819 )
2814 2820 rawtext = self._chunk(rev)
2815 2821 else:
2816 2822 rawtext = self.rawdata(rev)
2817 2823
2818 2824 newrl.addrawrevision(
2819 2825 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2820 2826 )
2821 2827
2822 2828 tr.addbackup(self.indexfile, location=b'store')
2823 2829 if not self._inline:
2824 2830 tr.addbackup(self.datafile, location=b'store')
2825 2831
2826 2832 self.opener.rename(newrl.indexfile, self.indexfile)
2827 2833 if not self._inline:
2828 2834 self.opener.rename(newrl.datafile, self.datafile)
2829 2835
2830 2836 self.clearcaches()
2831 2837 self._loadindex()
2832 2838
2833 2839 def verifyintegrity(self, state):
2834 2840 """Verifies the integrity of the revlog.
2835 2841
2836 2842 Yields ``revlogproblem`` instances describing problems that are
2837 2843 found.
2838 2844 """
2839 2845 dd, di = self.checksize()
2840 2846 if dd:
2841 2847 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2842 2848 if di:
2843 2849 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2844 2850
2845 2851 version = self.version & 0xFFFF
2846 2852
2847 2853 # The verifier tells us what version revlog we should be.
2848 2854 if version != state[b'expectedversion']:
2849 2855 yield revlogproblem(
2850 2856 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2851 2857 % (self.indexfile, version, state[b'expectedversion'])
2852 2858 )
2853 2859
2854 2860 state[b'skipread'] = set()
2855 2861
2856 2862 for rev in self:
2857 2863 node = self.node(rev)
2858 2864
2859 2865 # Verify contents. 4 cases to care about:
2860 2866 #
2861 2867 # common: the most common case
2862 2868 # rename: with a rename
2863 2869 # meta: file content starts with b'\1\n', the metadata
2864 2870 # header defined in filelog.py, but without a rename
2865 2871 # ext: content stored externally
2866 2872 #
2867 2873 # More formally, their differences are shown below:
2868 2874 #
2869 2875 # | common | rename | meta | ext
2870 2876 # -------------------------------------------------------
2871 2877 # flags() | 0 | 0 | 0 | not 0
2872 2878 # renamed() | False | True | False | ?
2873 2879 # rawtext[0:2]=='\1\n'| False | True | True | ?
2874 2880 #
2875 2881 # "rawtext" means the raw text stored in revlog data, which
2876 2882 # could be retrieved by "rawdata(rev)". "text"
2877 2883 # mentioned below is "revision(rev)".
2878 2884 #
2879 2885 # There are 3 different lengths stored physically:
2880 2886 # 1. L1: rawsize, stored in revlog index
2881 2887 # 2. L2: len(rawtext), stored in revlog data
2882 2888 # 3. L3: len(text), stored in revlog data if flags==0, or
2883 2889 # possibly somewhere else if flags!=0
2884 2890 #
2885 2891 # L1 should be equal to L2. L3 could be different from them.
2886 2892 # "text" may or may not affect commit hash depending on flag
2887 2893 # processors (see flagutil.addflagprocessor).
2888 2894 #
2889 2895 # | common | rename | meta | ext
2890 2896 # -------------------------------------------------
2891 2897 # rawsize() | L1 | L1 | L1 | L1
2892 2898 # size() | L1 | L2-LM | L1(*) | L1 (?)
2893 2899 # len(rawtext) | L2 | L2 | L2 | L2
2894 2900 # len(text) | L2 | L2 | L2 | L3
2895 2901 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2896 2902 #
2897 2903 # LM: length of metadata, depending on rawtext
2898 2904 # (*): not ideal, see comment in filelog.size
2899 2905 # (?): could be "- len(meta)" if the resolved content has
2900 2906 # rename metadata
2901 2907 #
2902 2908 # Checks needed to be done:
2903 2909 # 1. length check: L1 == L2, in all cases.
2904 2910 # 2. hash check: depending on flag processor, we may need to
2905 2911 # use either "text" (external), or "rawtext" (in revlog).
2906 2912
2907 2913 try:
2908 2914 skipflags = state.get(b'skipflags', 0)
2909 2915 if skipflags:
2910 2916 skipflags &= self.flags(rev)
2911 2917
2912 2918 if skipflags:
2913 2919 state[b'skipread'].add(node)
2914 2920 else:
2915 2921 # Side-effect: read content and verify hash.
2916 2922 self.revision(node)
2917 2923
2918 2924 l1 = self.rawsize(rev)
2919 2925 l2 = len(self.rawdata(node))
2920 2926
2921 2927 if l1 != l2:
2922 2928 yield revlogproblem(
2923 2929 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
2924 2930 node=node,
2925 2931 )
2926 2932
2927 2933 except error.CensoredNodeError:
2928 2934 if state[b'erroroncensored']:
2929 2935 yield revlogproblem(
2930 2936 error=_(b'censored file data'), node=node
2931 2937 )
2932 2938 state[b'skipread'].add(node)
2933 2939 except Exception as e:
2934 2940 yield revlogproblem(
2935 2941 error=_(b'unpacking %s: %s')
2936 2942 % (short(node), stringutil.forcebytestr(e)),
2937 2943 node=node,
2938 2944 )
2939 2945 state[b'skipread'].add(node)
2940 2946
2941 2947 def storageinfo(
2942 2948 self,
2943 2949 exclusivefiles=False,
2944 2950 sharedfiles=False,
2945 2951 revisionscount=False,
2946 2952 trackedsize=False,
2947 2953 storedsize=False,
2948 2954 ):
2949 2955 d = {}
2950 2956
2951 2957 if exclusivefiles:
2952 2958 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
2953 2959 if not self._inline:
2954 2960 d[b'exclusivefiles'].append((self.opener, self.datafile))
2955 2961
2956 2962 if sharedfiles:
2957 2963 d[b'sharedfiles'] = []
2958 2964
2959 2965 if revisionscount:
2960 2966 d[b'revisionscount'] = len(self)
2961 2967
2962 2968 if trackedsize:
2963 2969 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
2964 2970
2965 2971 if storedsize:
2966 2972 d[b'storedsize'] = sum(
2967 2973 self.opener.stat(path).st_size for path in self.files()
2968 2974 )
2969 2975
2970 2976 return d
General Comments 0
You need to be logged in to leave comments. Login now