##// END OF EJS Templates
revlog: use node tree (native code) for shortest() calculation...
Martin von Zweigbergk -
r37987:0304f224 default
parent child Browse files
Show More
@@ -1,797 +1,797
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #include <Python.h>
11 11 #include <ctype.h>
12 12 #include <stddef.h>
13 13 #include <string.h>
14 14
15 15 #include "bitmanipulation.h"
16 16 #include "charencode.h"
17 17 #include "util.h"
18 18
19 19 #ifdef IS_PY3K
20 20 /* The mapping of Python types is meant to be temporary to get Python
21 21 * 3 to compile. We should remove this once Python 3 support is fully
22 22 * supported and proper types are used in the extensions themselves. */
23 23 #define PyInt_Check PyLong_Check
24 24 #define PyInt_FromLong PyLong_FromLong
25 25 #define PyInt_FromSsize_t PyLong_FromSsize_t
26 26 #define PyInt_AsLong PyLong_AsLong
27 27 #endif
28 28
29 29 static const char *const versionerrortext = "Python minor version mismatch";
30 30
31 31 static PyObject *dict_new_presized(PyObject *self, PyObject *args)
32 32 {
33 33 Py_ssize_t expected_size;
34 34
35 35 if (!PyArg_ParseTuple(args, "n:make_presized_dict", &expected_size))
36 36 return NULL;
37 37
38 38 return _dict_new_presized(expected_size);
39 39 }
40 40
41 41 /*
42 42 * This code assumes that a manifest is stitched together with newline
43 43 * ('\n') characters.
44 44 */
45 45 static PyObject *parse_manifest(PyObject *self, PyObject *args)
46 46 {
47 47 PyObject *mfdict, *fdict;
48 48 char *str, *start, *end;
49 49 int len;
50 50
51 51 if (!PyArg_ParseTuple(
52 52 args, PY23("O!O!s#:parse_manifest", "O!O!y#:parse_manifest"),
53 53 &PyDict_Type, &mfdict, &PyDict_Type, &fdict, &str, &len))
54 54 goto quit;
55 55
56 56 start = str;
57 57 end = str + len;
58 58 while (start < end) {
59 59 PyObject *file = NULL, *node = NULL;
60 60 PyObject *flags = NULL;
61 61 char *zero = NULL, *newline = NULL;
62 62 ptrdiff_t nlen;
63 63
64 64 zero = memchr(start, '\0', end - start);
65 65 if (!zero) {
66 66 PyErr_SetString(PyExc_ValueError,
67 67 "manifest entry has no separator");
68 68 goto quit;
69 69 }
70 70
71 71 newline = memchr(zero + 1, '\n', end - (zero + 1));
72 72 if (!newline) {
73 73 PyErr_SetString(PyExc_ValueError,
74 74 "manifest contains trailing garbage");
75 75 goto quit;
76 76 }
77 77
78 78 file = PyBytes_FromStringAndSize(start, zero - start);
79 79
80 80 if (!file)
81 81 goto bail;
82 82
83 83 nlen = newline - zero - 1;
84 84
85 85 node = unhexlify(zero + 1, nlen > 40 ? 40 : (Py_ssize_t)nlen);
86 86 if (!node)
87 87 goto bail;
88 88
89 89 if (nlen > 40) {
90 90 flags = PyBytes_FromStringAndSize(zero + 41, nlen - 40);
91 91 if (!flags)
92 92 goto bail;
93 93
94 94 if (PyDict_SetItem(fdict, file, flags) == -1)
95 95 goto bail;
96 96 }
97 97
98 98 if (PyDict_SetItem(mfdict, file, node) == -1)
99 99 goto bail;
100 100
101 101 start = newline + 1;
102 102
103 103 Py_XDECREF(flags);
104 104 Py_XDECREF(node);
105 105 Py_XDECREF(file);
106 106 continue;
107 107 bail:
108 108 Py_XDECREF(flags);
109 109 Py_XDECREF(node);
110 110 Py_XDECREF(file);
111 111 goto quit;
112 112 }
113 113
114 114 Py_INCREF(Py_None);
115 115 return Py_None;
116 116 quit:
117 117 return NULL;
118 118 }
119 119
120 120 static inline dirstateTupleObject *make_dirstate_tuple(char state, int mode,
121 121 int size, int mtime)
122 122 {
123 123 dirstateTupleObject *t =
124 124 PyObject_New(dirstateTupleObject, &dirstateTupleType);
125 125 if (!t)
126 126 return NULL;
127 127 t->state = state;
128 128 t->mode = mode;
129 129 t->size = size;
130 130 t->mtime = mtime;
131 131 return t;
132 132 }
133 133
134 134 static PyObject *dirstate_tuple_new(PyTypeObject *subtype, PyObject *args,
135 135 PyObject *kwds)
136 136 {
137 137 /* We do all the initialization here and not a tp_init function because
138 138 * dirstate_tuple is immutable. */
139 139 dirstateTupleObject *t;
140 140 char state;
141 141 int size, mode, mtime;
142 142 if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime))
143 143 return NULL;
144 144
145 145 t = (dirstateTupleObject *)subtype->tp_alloc(subtype, 1);
146 146 if (!t)
147 147 return NULL;
148 148 t->state = state;
149 149 t->mode = mode;
150 150 t->size = size;
151 151 t->mtime = mtime;
152 152
153 153 return (PyObject *)t;
154 154 }
155 155
156 156 static void dirstate_tuple_dealloc(PyObject *o)
157 157 {
158 158 PyObject_Del(o);
159 159 }
160 160
161 161 static Py_ssize_t dirstate_tuple_length(PyObject *o)
162 162 {
163 163 return 4;
164 164 }
165 165
166 166 static PyObject *dirstate_tuple_item(PyObject *o, Py_ssize_t i)
167 167 {
168 168 dirstateTupleObject *t = (dirstateTupleObject *)o;
169 169 switch (i) {
170 170 case 0:
171 171 return PyBytes_FromStringAndSize(&t->state, 1);
172 172 case 1:
173 173 return PyInt_FromLong(t->mode);
174 174 case 2:
175 175 return PyInt_FromLong(t->size);
176 176 case 3:
177 177 return PyInt_FromLong(t->mtime);
178 178 default:
179 179 PyErr_SetString(PyExc_IndexError, "index out of range");
180 180 return NULL;
181 181 }
182 182 }
183 183
184 184 static PySequenceMethods dirstate_tuple_sq = {
185 185 dirstate_tuple_length, /* sq_length */
186 186 0, /* sq_concat */
187 187 0, /* sq_repeat */
188 188 dirstate_tuple_item, /* sq_item */
189 189 0, /* sq_ass_item */
190 190 0, /* sq_contains */
191 191 0, /* sq_inplace_concat */
192 192 0 /* sq_inplace_repeat */
193 193 };
194 194
195 195 PyTypeObject dirstateTupleType = {
196 196 PyVarObject_HEAD_INIT(NULL, 0) /* header */
197 197 "dirstate_tuple", /* tp_name */
198 198 sizeof(dirstateTupleObject), /* tp_basicsize */
199 199 0, /* tp_itemsize */
200 200 (destructor)dirstate_tuple_dealloc, /* tp_dealloc */
201 201 0, /* tp_print */
202 202 0, /* tp_getattr */
203 203 0, /* tp_setattr */
204 204 0, /* tp_compare */
205 205 0, /* tp_repr */
206 206 0, /* tp_as_number */
207 207 &dirstate_tuple_sq, /* tp_as_sequence */
208 208 0, /* tp_as_mapping */
209 209 0, /* tp_hash */
210 210 0, /* tp_call */
211 211 0, /* tp_str */
212 212 0, /* tp_getattro */
213 213 0, /* tp_setattro */
214 214 0, /* tp_as_buffer */
215 215 Py_TPFLAGS_DEFAULT, /* tp_flags */
216 216 "dirstate tuple", /* tp_doc */
217 217 0, /* tp_traverse */
218 218 0, /* tp_clear */
219 219 0, /* tp_richcompare */
220 220 0, /* tp_weaklistoffset */
221 221 0, /* tp_iter */
222 222 0, /* tp_iternext */
223 223 0, /* tp_methods */
224 224 0, /* tp_members */
225 225 0, /* tp_getset */
226 226 0, /* tp_base */
227 227 0, /* tp_dict */
228 228 0, /* tp_descr_get */
229 229 0, /* tp_descr_set */
230 230 0, /* tp_dictoffset */
231 231 0, /* tp_init */
232 232 0, /* tp_alloc */
233 233 dirstate_tuple_new, /* tp_new */
234 234 };
235 235
236 236 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
237 237 {
238 238 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
239 239 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
240 240 char state, *cur, *str, *cpos;
241 241 int mode, size, mtime;
242 242 unsigned int flen, len, pos = 40;
243 243 int readlen;
244 244
245 245 if (!PyArg_ParseTuple(
246 246 args, PY23("O!O!s#:parse_dirstate", "O!O!y#:parse_dirstate"),
247 247 &PyDict_Type, &dmap, &PyDict_Type, &cmap, &str, &readlen))
248 248 goto quit;
249 249
250 250 len = readlen;
251 251
252 252 /* read parents */
253 253 if (len < 40) {
254 254 PyErr_SetString(PyExc_ValueError,
255 255 "too little data for parents");
256 256 goto quit;
257 257 }
258 258
259 259 parents = Py_BuildValue(PY23("s#s#", "y#y#"), str, 20, str + 20, 20);
260 260 if (!parents)
261 261 goto quit;
262 262
263 263 /* read filenames */
264 264 while (pos >= 40 && pos < len) {
265 265 if (pos + 17 > len) {
266 266 PyErr_SetString(PyExc_ValueError,
267 267 "overflow in dirstate");
268 268 goto quit;
269 269 }
270 270 cur = str + pos;
271 271 /* unpack header */
272 272 state = *cur;
273 273 mode = getbe32(cur + 1);
274 274 size = getbe32(cur + 5);
275 275 mtime = getbe32(cur + 9);
276 276 flen = getbe32(cur + 13);
277 277 pos += 17;
278 278 cur += 17;
279 279 if (flen > len - pos) {
280 280 PyErr_SetString(PyExc_ValueError,
281 281 "overflow in dirstate");
282 282 goto quit;
283 283 }
284 284
285 285 entry =
286 286 (PyObject *)make_dirstate_tuple(state, mode, size, mtime);
287 287 cpos = memchr(cur, 0, flen);
288 288 if (cpos) {
289 289 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
290 290 cname = PyBytes_FromStringAndSize(
291 291 cpos + 1, flen - (cpos - cur) - 1);
292 292 if (!fname || !cname ||
293 293 PyDict_SetItem(cmap, fname, cname) == -1 ||
294 294 PyDict_SetItem(dmap, fname, entry) == -1)
295 295 goto quit;
296 296 Py_DECREF(cname);
297 297 } else {
298 298 fname = PyBytes_FromStringAndSize(cur, flen);
299 299 if (!fname || PyDict_SetItem(dmap, fname, entry) == -1)
300 300 goto quit;
301 301 }
302 302 Py_DECREF(fname);
303 303 Py_DECREF(entry);
304 304 fname = cname = entry = NULL;
305 305 pos += flen;
306 306 }
307 307
308 308 ret = parents;
309 309 Py_INCREF(ret);
310 310 quit:
311 311 Py_XDECREF(fname);
312 312 Py_XDECREF(cname);
313 313 Py_XDECREF(entry);
314 314 Py_XDECREF(parents);
315 315 return ret;
316 316 }
317 317
318 318 /*
319 319 * Build a set of non-normal and other parent entries from the dirstate dmap
320 320 */
321 321 static PyObject *nonnormalotherparententries(PyObject *self, PyObject *args)
322 322 {
323 323 PyObject *dmap, *fname, *v;
324 324 PyObject *nonnset = NULL, *otherpset = NULL, *result = NULL;
325 325 Py_ssize_t pos;
326 326
327 327 if (!PyArg_ParseTuple(args, "O!:nonnormalentries", &PyDict_Type, &dmap))
328 328 goto bail;
329 329
330 330 nonnset = PySet_New(NULL);
331 331 if (nonnset == NULL)
332 332 goto bail;
333 333
334 334 otherpset = PySet_New(NULL);
335 335 if (otherpset == NULL)
336 336 goto bail;
337 337
338 338 pos = 0;
339 339 while (PyDict_Next(dmap, &pos, &fname, &v)) {
340 340 dirstateTupleObject *t;
341 341 if (!dirstate_tuple_check(v)) {
342 342 PyErr_SetString(PyExc_TypeError,
343 343 "expected a dirstate tuple");
344 344 goto bail;
345 345 }
346 346 t = (dirstateTupleObject *)v;
347 347
348 348 if (t->state == 'n' && t->size == -2) {
349 349 if (PySet_Add(otherpset, fname) == -1) {
350 350 goto bail;
351 351 }
352 352 }
353 353
354 354 if (t->state == 'n' && t->mtime != -1)
355 355 continue;
356 356 if (PySet_Add(nonnset, fname) == -1)
357 357 goto bail;
358 358 }
359 359
360 360 result = Py_BuildValue("(OO)", nonnset, otherpset);
361 361 if (result == NULL)
362 362 goto bail;
363 363 Py_DECREF(nonnset);
364 364 Py_DECREF(otherpset);
365 365 return result;
366 366 bail:
367 367 Py_XDECREF(nonnset);
368 368 Py_XDECREF(otherpset);
369 369 Py_XDECREF(result);
370 370 return NULL;
371 371 }
372 372
373 373 /*
374 374 * Efficiently pack a dirstate object into its on-disk format.
375 375 */
376 376 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
377 377 {
378 378 PyObject *packobj = NULL;
379 379 PyObject *map, *copymap, *pl, *mtime_unset = NULL;
380 380 Py_ssize_t nbytes, pos, l;
381 381 PyObject *k, *v = NULL, *pn;
382 382 char *p, *s;
383 383 int now;
384 384
385 385 if (!PyArg_ParseTuple(args, "O!O!Oi:pack_dirstate", &PyDict_Type, &map,
386 386 &PyDict_Type, &copymap, &pl, &now))
387 387 return NULL;
388 388
389 389 if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) {
390 390 PyErr_SetString(PyExc_TypeError, "expected 2-element sequence");
391 391 return NULL;
392 392 }
393 393
394 394 /* Figure out how much we need to allocate. */
395 395 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
396 396 PyObject *c;
397 397 if (!PyBytes_Check(k)) {
398 398 PyErr_SetString(PyExc_TypeError, "expected string key");
399 399 goto bail;
400 400 }
401 401 nbytes += PyBytes_GET_SIZE(k) + 17;
402 402 c = PyDict_GetItem(copymap, k);
403 403 if (c) {
404 404 if (!PyBytes_Check(c)) {
405 405 PyErr_SetString(PyExc_TypeError,
406 406 "expected string key");
407 407 goto bail;
408 408 }
409 409 nbytes += PyBytes_GET_SIZE(c) + 1;
410 410 }
411 411 }
412 412
413 413 packobj = PyBytes_FromStringAndSize(NULL, nbytes);
414 414 if (packobj == NULL)
415 415 goto bail;
416 416
417 417 p = PyBytes_AS_STRING(packobj);
418 418
419 419 pn = PySequence_ITEM(pl, 0);
420 420 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
421 421 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
422 422 goto bail;
423 423 }
424 424 memcpy(p, s, l);
425 425 p += 20;
426 426 pn = PySequence_ITEM(pl, 1);
427 427 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
428 428 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
429 429 goto bail;
430 430 }
431 431 memcpy(p, s, l);
432 432 p += 20;
433 433
434 434 for (pos = 0; PyDict_Next(map, &pos, &k, &v);) {
435 435 dirstateTupleObject *tuple;
436 436 char state;
437 437 int mode, size, mtime;
438 438 Py_ssize_t len, l;
439 439 PyObject *o;
440 440 char *t;
441 441
442 442 if (!dirstate_tuple_check(v)) {
443 443 PyErr_SetString(PyExc_TypeError,
444 444 "expected a dirstate tuple");
445 445 goto bail;
446 446 }
447 447 tuple = (dirstateTupleObject *)v;
448 448
449 449 state = tuple->state;
450 450 mode = tuple->mode;
451 451 size = tuple->size;
452 452 mtime = tuple->mtime;
453 453 if (state == 'n' && mtime == now) {
454 454 /* See pure/parsers.py:pack_dirstate for why we do
455 455 * this. */
456 456 mtime = -1;
457 457 mtime_unset = (PyObject *)make_dirstate_tuple(
458 458 state, mode, size, mtime);
459 459 if (!mtime_unset)
460 460 goto bail;
461 461 if (PyDict_SetItem(map, k, mtime_unset) == -1)
462 462 goto bail;
463 463 Py_DECREF(mtime_unset);
464 464 mtime_unset = NULL;
465 465 }
466 466 *p++ = state;
467 467 putbe32((uint32_t)mode, p);
468 468 putbe32((uint32_t)size, p + 4);
469 469 putbe32((uint32_t)mtime, p + 8);
470 470 t = p + 12;
471 471 p += 16;
472 472 len = PyBytes_GET_SIZE(k);
473 473 memcpy(p, PyBytes_AS_STRING(k), len);
474 474 p += len;
475 475 o = PyDict_GetItem(copymap, k);
476 476 if (o) {
477 477 *p++ = '\0';
478 478 l = PyBytes_GET_SIZE(o);
479 479 memcpy(p, PyBytes_AS_STRING(o), l);
480 480 p += l;
481 481 len += l + 1;
482 482 }
483 483 putbe32((uint32_t)len, t);
484 484 }
485 485
486 486 pos = p - PyBytes_AS_STRING(packobj);
487 487 if (pos != nbytes) {
488 488 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
489 489 (long)pos, (long)nbytes);
490 490 goto bail;
491 491 }
492 492
493 493 return packobj;
494 494 bail:
495 495 Py_XDECREF(mtime_unset);
496 496 Py_XDECREF(packobj);
497 497 Py_XDECREF(v);
498 498 return NULL;
499 499 }
500 500
501 501 #define BUMPED_FIX 1
502 502 #define USING_SHA_256 2
503 503 #define FM1_HEADER_SIZE (4 + 8 + 2 + 2 + 1 + 1 + 1)
504 504
505 505 static PyObject *readshas(const char *source, unsigned char num,
506 506 Py_ssize_t hashwidth)
507 507 {
508 508 int i;
509 509 PyObject *list = PyTuple_New(num);
510 510 if (list == NULL) {
511 511 return NULL;
512 512 }
513 513 for (i = 0; i < num; i++) {
514 514 PyObject *hash = PyBytes_FromStringAndSize(source, hashwidth);
515 515 if (hash == NULL) {
516 516 Py_DECREF(list);
517 517 return NULL;
518 518 }
519 519 PyTuple_SET_ITEM(list, i, hash);
520 520 source += hashwidth;
521 521 }
522 522 return list;
523 523 }
524 524
525 525 static PyObject *fm1readmarker(const char *databegin, const char *dataend,
526 526 uint32_t *msize)
527 527 {
528 528 const char *data = databegin;
529 529 const char *meta;
530 530
531 531 double mtime;
532 532 int16_t tz;
533 533 uint16_t flags;
534 534 unsigned char nsuccs, nparents, nmetadata;
535 535 Py_ssize_t hashwidth = 20;
536 536
537 537 PyObject *prec = NULL, *parents = NULL, *succs = NULL;
538 538 PyObject *metadata = NULL, *ret = NULL;
539 539 int i;
540 540
541 541 if (data + FM1_HEADER_SIZE > dataend) {
542 542 goto overflow;
543 543 }
544 544
545 545 *msize = getbe32(data);
546 546 data += 4;
547 547 mtime = getbefloat64(data);
548 548 data += 8;
549 549 tz = getbeint16(data);
550 550 data += 2;
551 551 flags = getbeuint16(data);
552 552 data += 2;
553 553
554 554 if (flags & USING_SHA_256) {
555 555 hashwidth = 32;
556 556 }
557 557
558 558 nsuccs = (unsigned char)(*data++);
559 559 nparents = (unsigned char)(*data++);
560 560 nmetadata = (unsigned char)(*data++);
561 561
562 562 if (databegin + *msize > dataend) {
563 563 goto overflow;
564 564 }
565 565 dataend = databegin + *msize; /* narrow down to marker size */
566 566
567 567 if (data + hashwidth > dataend) {
568 568 goto overflow;
569 569 }
570 570 prec = PyBytes_FromStringAndSize(data, hashwidth);
571 571 data += hashwidth;
572 572 if (prec == NULL) {
573 573 goto bail;
574 574 }
575 575
576 576 if (data + nsuccs * hashwidth > dataend) {
577 577 goto overflow;
578 578 }
579 579 succs = readshas(data, nsuccs, hashwidth);
580 580 if (succs == NULL) {
581 581 goto bail;
582 582 }
583 583 data += nsuccs * hashwidth;
584 584
585 585 if (nparents == 1 || nparents == 2) {
586 586 if (data + nparents * hashwidth > dataend) {
587 587 goto overflow;
588 588 }
589 589 parents = readshas(data, nparents, hashwidth);
590 590 if (parents == NULL) {
591 591 goto bail;
592 592 }
593 593 data += nparents * hashwidth;
594 594 } else {
595 595 parents = Py_None;
596 596 Py_INCREF(parents);
597 597 }
598 598
599 599 if (data + 2 * nmetadata > dataend) {
600 600 goto overflow;
601 601 }
602 602 meta = data + (2 * nmetadata);
603 603 metadata = PyTuple_New(nmetadata);
604 604 if (metadata == NULL) {
605 605 goto bail;
606 606 }
607 607 for (i = 0; i < nmetadata; i++) {
608 608 PyObject *tmp, *left = NULL, *right = NULL;
609 609 Py_ssize_t leftsize = (unsigned char)(*data++);
610 610 Py_ssize_t rightsize = (unsigned char)(*data++);
611 611 if (meta + leftsize + rightsize > dataend) {
612 612 goto overflow;
613 613 }
614 614 left = PyBytes_FromStringAndSize(meta, leftsize);
615 615 meta += leftsize;
616 616 right = PyBytes_FromStringAndSize(meta, rightsize);
617 617 meta += rightsize;
618 618 tmp = PyTuple_New(2);
619 619 if (!left || !right || !tmp) {
620 620 Py_XDECREF(left);
621 621 Py_XDECREF(right);
622 622 Py_XDECREF(tmp);
623 623 goto bail;
624 624 }
625 625 PyTuple_SET_ITEM(tmp, 0, left);
626 626 PyTuple_SET_ITEM(tmp, 1, right);
627 627 PyTuple_SET_ITEM(metadata, i, tmp);
628 628 }
629 629 ret = Py_BuildValue("(OOHO(di)O)", prec, succs, flags, metadata, mtime,
630 630 (int)tz * 60, parents);
631 631 goto bail; /* return successfully */
632 632
633 633 overflow:
634 634 PyErr_SetString(PyExc_ValueError, "overflow in obsstore");
635 635 bail:
636 636 Py_XDECREF(prec);
637 637 Py_XDECREF(succs);
638 638 Py_XDECREF(metadata);
639 639 Py_XDECREF(parents);
640 640 return ret;
641 641 }
642 642
643 643 static PyObject *fm1readmarkers(PyObject *self, PyObject *args)
644 644 {
645 645 const char *data, *dataend;
646 646 int datalen;
647 647 Py_ssize_t offset, stop;
648 648 PyObject *markers = NULL;
649 649
650 650 if (!PyArg_ParseTuple(args, PY23("s#nn", "y#nn"), &data, &datalen,
651 651 &offset, &stop)) {
652 652 return NULL;
653 653 }
654 654 dataend = data + datalen;
655 655 data += offset;
656 656 markers = PyList_New(0);
657 657 if (!markers) {
658 658 return NULL;
659 659 }
660 660 while (offset < stop) {
661 661 uint32_t msize;
662 662 int error;
663 663 PyObject *record = fm1readmarker(data, dataend, &msize);
664 664 if (!record) {
665 665 goto bail;
666 666 }
667 667 error = PyList_Append(markers, record);
668 668 Py_DECREF(record);
669 669 if (error) {
670 670 goto bail;
671 671 }
672 672 data += msize;
673 673 offset += msize;
674 674 }
675 675 return markers;
676 676 bail:
677 677 Py_DECREF(markers);
678 678 return NULL;
679 679 }
680 680
681 681 static char parsers_doc[] = "Efficient content parsing.";
682 682
683 683 PyObject *encodedir(PyObject *self, PyObject *args);
684 684 PyObject *pathencode(PyObject *self, PyObject *args);
685 685 PyObject *lowerencode(PyObject *self, PyObject *args);
686 686 PyObject *parse_index2(PyObject *self, PyObject *args);
687 687
688 688 static PyMethodDef methods[] = {
689 689 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
690 690 {"nonnormalotherparententries", nonnormalotherparententries, METH_VARARGS,
691 691 "create a set containing non-normal and other parent entries of given "
692 692 "dirstate\n"},
693 693 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
694 694 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
695 695 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
696 696 {"isasciistr", isasciistr, METH_VARARGS, "check if an ASCII string\n"},
697 697 {"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"},
698 698 {"asciiupper", asciiupper, METH_VARARGS, "uppercase an ASCII string\n"},
699 699 {"dict_new_presized", dict_new_presized, METH_VARARGS,
700 700 "construct a dict with an expected size\n"},
701 701 {"make_file_foldmap", make_file_foldmap, METH_VARARGS,
702 702 "make file foldmap\n"},
703 703 {"jsonescapeu8fast", jsonescapeu8fast, METH_VARARGS,
704 704 "escape a UTF-8 byte string to JSON (fast path)\n"},
705 705 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
706 706 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
707 707 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
708 708 {"fm1readmarkers", fm1readmarkers, METH_VARARGS,
709 709 "parse v1 obsolete markers\n"},
710 710 {NULL, NULL}};
711 711
712 712 void dirs_module_init(PyObject *mod);
713 713 void manifest_module_init(PyObject *mod);
714 714 void revlog_module_init(PyObject *mod);
715 715
716 static const int version = 4;
716 static const int version = 5;
717 717
718 718 static void module_init(PyObject *mod)
719 719 {
720 720 PyModule_AddIntConstant(mod, "version", version);
721 721
722 722 /* This module constant has two purposes. First, it lets us unit test
723 723 * the ImportError raised without hard-coding any error text. This
724 724 * means we can change the text in the future without breaking tests,
725 725 * even across changesets without a recompile. Second, its presence
726 726 * can be used to determine whether the version-checking logic is
727 727 * present, which also helps in testing across changesets without a
728 728 * recompile. Note that this means the pure-Python version of parsers
729 729 * should not have this module constant. */
730 730 PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
731 731
732 732 dirs_module_init(mod);
733 733 manifest_module_init(mod);
734 734 revlog_module_init(mod);
735 735
736 736 if (PyType_Ready(&dirstateTupleType) < 0)
737 737 return;
738 738 Py_INCREF(&dirstateTupleType);
739 739 PyModule_AddObject(mod, "dirstatetuple",
740 740 (PyObject *)&dirstateTupleType);
741 741 }
742 742
743 743 static int check_python_version(void)
744 744 {
745 745 PyObject *sys = PyImport_ImportModule("sys"), *ver;
746 746 long hexversion;
747 747 if (!sys)
748 748 return -1;
749 749 ver = PyObject_GetAttrString(sys, "hexversion");
750 750 Py_DECREF(sys);
751 751 if (!ver)
752 752 return -1;
753 753 hexversion = PyInt_AsLong(ver);
754 754 Py_DECREF(ver);
755 755 /* sys.hexversion is a 32-bit number by default, so the -1 case
756 756 * should only occur in unusual circumstances (e.g. if sys.hexversion
757 757 * is manually set to an invalid value). */
758 758 if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
759 759 PyErr_Format(PyExc_ImportError,
760 760 "%s: The Mercurial extension "
761 761 "modules were compiled with Python " PY_VERSION
762 762 ", but "
763 763 "Mercurial is currently using Python with "
764 764 "sys.hexversion=%ld: "
765 765 "Python %s\n at: %s",
766 766 versionerrortext, hexversion, Py_GetVersion(),
767 767 Py_GetProgramFullPath());
768 768 return -1;
769 769 }
770 770 return 0;
771 771 }
772 772
773 773 #ifdef IS_PY3K
774 774 static struct PyModuleDef parsers_module = {PyModuleDef_HEAD_INIT, "parsers",
775 775 parsers_doc, -1, methods};
776 776
777 777 PyMODINIT_FUNC PyInit_parsers(void)
778 778 {
779 779 PyObject *mod;
780 780
781 781 if (check_python_version() == -1)
782 782 return NULL;
783 783 mod = PyModule_Create(&parsers_module);
784 784 module_init(mod);
785 785 return mod;
786 786 }
787 787 #else
788 788 PyMODINIT_FUNC initparsers(void)
789 789 {
790 790 PyObject *mod;
791 791
792 792 if (check_python_version() == -1)
793 793 return;
794 794 mod = Py_InitModule3("parsers", methods, parsers_doc);
795 795 module_init(mod);
796 796 }
797 797 #endif
@@ -1,2105 +1,2179
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #include <Python.h>
11 11 #include <assert.h>
12 12 #include <ctype.h>
13 13 #include <stddef.h>
14 14 #include <string.h>
15 15
16 16 #include "bitmanipulation.h"
17 17 #include "charencode.h"
18 18 #include "util.h"
19 19
20 20 #ifdef IS_PY3K
21 21 /* The mapping of Python types is meant to be temporary to get Python
22 22 * 3 to compile. We should remove this once Python 3 support is fully
23 23 * supported and proper types are used in the extensions themselves. */
24 24 #define PyInt_Check PyLong_Check
25 25 #define PyInt_FromLong PyLong_FromLong
26 26 #define PyInt_FromSsize_t PyLong_FromSsize_t
27 27 #define PyInt_AS_LONG PyLong_AS_LONG
28 28 #define PyInt_AsLong PyLong_AsLong
29 29 #endif
30 30
31 31 /*
32 32 * A base-16 trie for fast node->rev mapping.
33 33 *
34 34 * Positive value is index of the next node in the trie
35 35 * Negative value is a leaf: -(rev + 1)
36 36 * Zero is empty
37 37 */
38 38 typedef struct {
39 39 int children[16];
40 40 } nodetree;
41 41
42 42 /*
43 43 * This class has two behaviors.
44 44 *
45 45 * When used in a list-like way (with integer keys), we decode an
46 46 * entry in a RevlogNG index file on demand. Our last entry is a
47 47 * sentinel, always a nullid. We have limited support for
48 48 * integer-keyed insert and delete, only at elements right before the
49 49 * sentinel.
50 50 *
51 51 * With string keys, we lazily perform a reverse mapping from node to
52 52 * rev, using a base-16 trie.
53 53 */
54 54 typedef struct {
55 55 PyObject_HEAD
56 56 /* Type-specific fields go here. */
57 57 PyObject *data; /* raw bytes of index */
58 58 Py_buffer buf; /* buffer of data */
59 59 PyObject **cache; /* cached tuples */
60 60 const char **offsets; /* populated on demand */
61 61 Py_ssize_t raw_length; /* original number of elements */
62 62 Py_ssize_t length; /* current number of elements */
63 63 PyObject *added; /* populated on demand */
64 64 PyObject *headrevs; /* cache, invalidated on changes */
65 65 PyObject *filteredrevs;/* filtered revs set */
66 66 nodetree *nt; /* base-16 trie */
67 67 unsigned ntlength; /* # nodes in use */
68 68 unsigned ntcapacity; /* # nodes allocated */
69 69 int ntdepth; /* maximum depth of tree */
70 70 int ntsplits; /* # splits performed */
71 71 int ntrev; /* last rev scanned */
72 72 int ntlookups; /* # lookups */
73 73 int ntmisses; /* # lookups that miss the cache */
74 74 int inlined;
75 75 } indexObject;
76 76
77 77 static Py_ssize_t index_length(const indexObject *self)
78 78 {
79 79 if (self->added == NULL)
80 80 return self->length;
81 81 return self->length + PyList_GET_SIZE(self->added);
82 82 }
83 83
84 84 static PyObject *nullentry;
85 85 static const char nullid[20];
86 86
87 87 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
88 88
89 89 #if LONG_MAX == 0x7fffffffL
90 90 static const char *const tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#");
91 91 #else
92 92 static const char *const tuple_format = PY23("kiiiiiis#", "kiiiiiiy#");
93 93 #endif
94 94
95 95 /* A RevlogNG v1 index entry is 64 bytes long. */
96 96 static const long v1_hdrsize = 64;
97 97
98 98 /*
99 99 * Return a pointer to the beginning of a RevlogNG record.
100 100 */
101 101 static const char *index_deref(indexObject *self, Py_ssize_t pos)
102 102 {
103 103 if (self->inlined && pos > 0) {
104 104 if (self->offsets == NULL) {
105 105 self->offsets = PyMem_Malloc(self->raw_length *
106 106 sizeof(*self->offsets));
107 107 if (self->offsets == NULL)
108 108 return (const char *)PyErr_NoMemory();
109 109 inline_scan(self, self->offsets);
110 110 }
111 111 return self->offsets[pos];
112 112 }
113 113
114 114 return (const char *)(self->buf.buf) + pos * v1_hdrsize;
115 115 }
116 116
117 117 static inline int index_get_parents(indexObject *self, Py_ssize_t rev,
118 118 int *ps, int maxrev)
119 119 {
120 120 if (rev >= self->length - 1) {
121 121 PyObject *tuple = PyList_GET_ITEM(self->added,
122 122 rev - self->length + 1);
123 123 ps[0] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 5));
124 124 ps[1] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 6));
125 125 } else {
126 126 const char *data = index_deref(self, rev);
127 127 ps[0] = getbe32(data + 24);
128 128 ps[1] = getbe32(data + 28);
129 129 }
130 130 /* If index file is corrupted, ps[] may point to invalid revisions. So
131 131 * there is a risk of buffer overflow to trust them unconditionally. */
132 132 if (ps[0] > maxrev || ps[1] > maxrev) {
133 133 PyErr_SetString(PyExc_ValueError, "parent out of range");
134 134 return -1;
135 135 }
136 136 return 0;
137 137 }
138 138
139 139
140 140 /*
141 141 * RevlogNG format (all in big endian, data may be inlined):
142 142 * 6 bytes: offset
143 143 * 2 bytes: flags
144 144 * 4 bytes: compressed length
145 145 * 4 bytes: uncompressed length
146 146 * 4 bytes: base revision
147 147 * 4 bytes: link revision
148 148 * 4 bytes: parent 1 revision
149 149 * 4 bytes: parent 2 revision
150 150 * 32 bytes: nodeid (only 20 bytes used)
151 151 */
152 152 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
153 153 {
154 154 uint64_t offset_flags;
155 155 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
156 156 const char *c_node_id;
157 157 const char *data;
158 158 Py_ssize_t length = index_length(self);
159 159 PyObject *entry;
160 160
161 161 if (pos < 0)
162 162 pos += length;
163 163
164 164 if (pos < 0 || pos >= length) {
165 165 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
166 166 return NULL;
167 167 }
168 168
169 169 if (pos == length - 1) {
170 170 Py_INCREF(nullentry);
171 171 return nullentry;
172 172 }
173 173
174 174 if (pos >= self->length - 1) {
175 175 PyObject *obj;
176 176 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
177 177 Py_INCREF(obj);
178 178 return obj;
179 179 }
180 180
181 181 if (self->cache) {
182 182 if (self->cache[pos]) {
183 183 Py_INCREF(self->cache[pos]);
184 184 return self->cache[pos];
185 185 }
186 186 } else {
187 187 self->cache = calloc(self->raw_length, sizeof(PyObject *));
188 188 if (self->cache == NULL)
189 189 return PyErr_NoMemory();
190 190 }
191 191
192 192 data = index_deref(self, pos);
193 193 if (data == NULL)
194 194 return NULL;
195 195
196 196 offset_flags = getbe32(data + 4);
197 197 if (pos == 0) /* mask out version number for the first entry */
198 198 offset_flags &= 0xFFFF;
199 199 else {
200 200 uint32_t offset_high = getbe32(data);
201 201 offset_flags |= ((uint64_t)offset_high) << 32;
202 202 }
203 203
204 204 comp_len = getbe32(data + 8);
205 205 uncomp_len = getbe32(data + 12);
206 206 base_rev = getbe32(data + 16);
207 207 link_rev = getbe32(data + 20);
208 208 parent_1 = getbe32(data + 24);
209 209 parent_2 = getbe32(data + 28);
210 210 c_node_id = data + 32;
211 211
212 212 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
213 213 uncomp_len, base_rev, link_rev,
214 214 parent_1, parent_2, c_node_id, 20);
215 215
216 216 if (entry) {
217 217 PyObject_GC_UnTrack(entry);
218 218 Py_INCREF(entry);
219 219 }
220 220
221 221 self->cache[pos] = entry;
222 222
223 223 return entry;
224 224 }
225 225
226 226 /*
227 227 * Return the 20-byte SHA of the node corresponding to the given rev.
228 228 */
229 229 static const char *index_node(indexObject *self, Py_ssize_t pos)
230 230 {
231 231 Py_ssize_t length = index_length(self);
232 232 const char *data;
233 233
234 234 if (pos == length - 1 || pos == INT_MAX)
235 235 return nullid;
236 236
237 237 if (pos >= length)
238 238 return NULL;
239 239
240 240 if (pos >= self->length - 1) {
241 241 PyObject *tuple, *str;
242 242 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
243 243 str = PyTuple_GetItem(tuple, 7);
244 244 return str ? PyBytes_AS_STRING(str) : NULL;
245 245 }
246 246
247 247 data = index_deref(self, pos);
248 248 return data ? data + 32 : NULL;
249 249 }
250 250
251 251 /*
252 252 * Return the 20-byte SHA of the node corresponding to the given rev. The
253 253 * rev is assumed to be existing. If not, an exception is set.
254 254 */
255 255 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
256 256 {
257 257 const char *node = index_node(self, pos);
258 258 if (node == NULL) {
259 259 PyErr_Format(PyExc_IndexError, "could not access rev %d",
260 260 (int)pos);
261 261 }
262 262 return node;
263 263 }
264 264
265 265 static int nt_insert(indexObject *self, const char *node, int rev);
266 266
267 267 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
268 268 {
269 269 if (PyBytes_AsStringAndSize(obj, node, nodelen) == -1)
270 270 return -1;
271 271 if (*nodelen == 20)
272 272 return 0;
273 273 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
274 274 return -1;
275 275 }
276 276
277 277 static PyObject *index_insert(indexObject *self, PyObject *args)
278 278 {
279 279 PyObject *obj;
280 280 char *node;
281 281 int index;
282 282 Py_ssize_t len, nodelen;
283 283
284 284 if (!PyArg_ParseTuple(args, "iO", &index, &obj))
285 285 return NULL;
286 286
287 287 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
288 288 PyErr_SetString(PyExc_TypeError, "8-tuple required");
289 289 return NULL;
290 290 }
291 291
292 292 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
293 293 return NULL;
294 294
295 295 len = index_length(self);
296 296
297 297 if (index < 0)
298 298 index += len;
299 299
300 300 if (index != len - 1) {
301 301 PyErr_SetString(PyExc_IndexError,
302 302 "insert only supported at index -1");
303 303 return NULL;
304 304 }
305 305
306 306 if (self->added == NULL) {
307 307 self->added = PyList_New(0);
308 308 if (self->added == NULL)
309 309 return NULL;
310 310 }
311 311
312 312 if (PyList_Append(self->added, obj) == -1)
313 313 return NULL;
314 314
315 315 if (self->nt)
316 316 nt_insert(self, node, index);
317 317
318 318 Py_CLEAR(self->headrevs);
319 319 Py_RETURN_NONE;
320 320 }
321 321
322 322 static void _index_clearcaches(indexObject *self)
323 323 {
324 324 if (self->cache) {
325 325 Py_ssize_t i;
326 326
327 327 for (i = 0; i < self->raw_length; i++)
328 328 Py_CLEAR(self->cache[i]);
329 329 free(self->cache);
330 330 self->cache = NULL;
331 331 }
332 332 if (self->offsets) {
333 333 PyMem_Free(self->offsets);
334 334 self->offsets = NULL;
335 335 }
336 336 if (self->nt) {
337 337 free(self->nt);
338 338 self->nt = NULL;
339 339 }
340 340 Py_CLEAR(self->headrevs);
341 341 }
342 342
343 343 static PyObject *index_clearcaches(indexObject *self)
344 344 {
345 345 _index_clearcaches(self);
346 346 self->ntlength = self->ntcapacity = 0;
347 347 self->ntdepth = self->ntsplits = 0;
348 348 self->ntrev = -1;
349 349 self->ntlookups = self->ntmisses = 0;
350 350 Py_RETURN_NONE;
351 351 }
352 352
353 353 static PyObject *index_stats(indexObject *self)
354 354 {
355 355 PyObject *obj = PyDict_New();
356 356 PyObject *t = NULL;
357 357
358 358 if (obj == NULL)
359 359 return NULL;
360 360
361 361 #define istat(__n, __d) \
362 362 do { \
363 363 t = PyInt_FromSsize_t(self->__n); \
364 364 if (!t) \
365 365 goto bail; \
366 366 if (PyDict_SetItemString(obj, __d, t) == -1) \
367 367 goto bail; \
368 368 Py_DECREF(t); \
369 369 } while (0)
370 370
371 371 if (self->added) {
372 372 Py_ssize_t len = PyList_GET_SIZE(self->added);
373 373 t = PyInt_FromSsize_t(len);
374 374 if (!t)
375 375 goto bail;
376 376 if (PyDict_SetItemString(obj, "index entries added", t) == -1)
377 377 goto bail;
378 378 Py_DECREF(t);
379 379 }
380 380
381 381 if (self->raw_length != self->length - 1)
382 382 istat(raw_length, "revs on disk");
383 383 istat(length, "revs in memory");
384 384 istat(ntcapacity, "node trie capacity");
385 385 istat(ntdepth, "node trie depth");
386 386 istat(ntlength, "node trie count");
387 387 istat(ntlookups, "node trie lookups");
388 388 istat(ntmisses, "node trie misses");
389 389 istat(ntrev, "node trie last rev scanned");
390 390 istat(ntsplits, "node trie splits");
391 391
392 392 #undef istat
393 393
394 394 return obj;
395 395
396 396 bail:
397 397 Py_XDECREF(obj);
398 398 Py_XDECREF(t);
399 399 return NULL;
400 400 }
401 401
402 402 /*
403 403 * When we cache a list, we want to be sure the caller can't mutate
404 404 * the cached copy.
405 405 */
406 406 static PyObject *list_copy(PyObject *list)
407 407 {
408 408 Py_ssize_t len = PyList_GET_SIZE(list);
409 409 PyObject *newlist = PyList_New(len);
410 410 Py_ssize_t i;
411 411
412 412 if (newlist == NULL)
413 413 return NULL;
414 414
415 415 for (i = 0; i < len; i++) {
416 416 PyObject *obj = PyList_GET_ITEM(list, i);
417 417 Py_INCREF(obj);
418 418 PyList_SET_ITEM(newlist, i, obj);
419 419 }
420 420
421 421 return newlist;
422 422 }
423 423
424 424 static int check_filter(PyObject *filter, Py_ssize_t arg)
425 425 {
426 426 if (filter) {
427 427 PyObject *arglist, *result;
428 428 int isfiltered;
429 429
430 430 arglist = Py_BuildValue("(n)", arg);
431 431 if (!arglist) {
432 432 return -1;
433 433 }
434 434
435 435 result = PyEval_CallObject(filter, arglist);
436 436 Py_DECREF(arglist);
437 437 if (!result) {
438 438 return -1;
439 439 }
440 440
441 441 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
442 442 * same as this function, so we can just return it directly.*/
443 443 isfiltered = PyObject_IsTrue(result);
444 444 Py_DECREF(result);
445 445 return isfiltered;
446 446 } else {
447 447 return 0;
448 448 }
449 449 }
450 450
451 451 static Py_ssize_t add_roots_get_min(indexObject *self, PyObject *list,
452 452 Py_ssize_t marker, char *phases)
453 453 {
454 454 PyObject *iter = NULL;
455 455 PyObject *iter_item = NULL;
456 456 Py_ssize_t min_idx = index_length(self) + 1;
457 457 long iter_item_long;
458 458
459 459 if (PyList_GET_SIZE(list) != 0) {
460 460 iter = PyObject_GetIter(list);
461 461 if (iter == NULL)
462 462 return -2;
463 463 while ((iter_item = PyIter_Next(iter))) {
464 464 iter_item_long = PyInt_AS_LONG(iter_item);
465 465 Py_DECREF(iter_item);
466 466 if (iter_item_long < min_idx)
467 467 min_idx = iter_item_long;
468 468 phases[iter_item_long] = marker;
469 469 }
470 470 Py_DECREF(iter);
471 471 }
472 472
473 473 return min_idx;
474 474 }
475 475
476 476 static inline void set_phase_from_parents(char *phases, int parent_1,
477 477 int parent_2, Py_ssize_t i)
478 478 {
479 479 if (parent_1 >= 0 && phases[parent_1] > phases[i])
480 480 phases[i] = phases[parent_1];
481 481 if (parent_2 >= 0 && phases[parent_2] > phases[i])
482 482 phases[i] = phases[parent_2];
483 483 }
484 484
485 485 static PyObject *reachableroots2(indexObject *self, PyObject *args)
486 486 {
487 487
488 488 /* Input */
489 489 long minroot;
490 490 PyObject *includepatharg = NULL;
491 491 int includepath = 0;
492 492 /* heads and roots are lists */
493 493 PyObject *heads = NULL;
494 494 PyObject *roots = NULL;
495 495 PyObject *reachable = NULL;
496 496
497 497 PyObject *val;
498 498 Py_ssize_t len = index_length(self) - 1;
499 499 long revnum;
500 500 Py_ssize_t k;
501 501 Py_ssize_t i;
502 502 Py_ssize_t l;
503 503 int r;
504 504 int parents[2];
505 505
506 506 /* Internal data structure:
507 507 * tovisit: array of length len+1 (all revs + nullrev), filled upto lentovisit
508 508 * revstates: array of length len+1 (all revs + nullrev) */
509 509 int *tovisit = NULL;
510 510 long lentovisit = 0;
511 511 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
512 512 char *revstates = NULL;
513 513
514 514 /* Get arguments */
515 515 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
516 516 &PyList_Type, &roots,
517 517 &PyBool_Type, &includepatharg))
518 518 goto bail;
519 519
520 520 if (includepatharg == Py_True)
521 521 includepath = 1;
522 522
523 523 /* Initialize return set */
524 524 reachable = PyList_New(0);
525 525 if (reachable == NULL)
526 526 goto bail;
527 527
528 528 /* Initialize internal datastructures */
529 529 tovisit = (int *)malloc((len + 1) * sizeof(int));
530 530 if (tovisit == NULL) {
531 531 PyErr_NoMemory();
532 532 goto bail;
533 533 }
534 534
535 535 revstates = (char *)calloc(len + 1, 1);
536 536 if (revstates == NULL) {
537 537 PyErr_NoMemory();
538 538 goto bail;
539 539 }
540 540
541 541 l = PyList_GET_SIZE(roots);
542 542 for (i = 0; i < l; i++) {
543 543 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
544 544 if (revnum == -1 && PyErr_Occurred())
545 545 goto bail;
546 546 /* If root is out of range, e.g. wdir(), it must be unreachable
547 547 * from heads. So we can just ignore it. */
548 548 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
549 549 continue;
550 550 revstates[revnum + 1] |= RS_ROOT;
551 551 }
552 552
553 553 /* Populate tovisit with all the heads */
554 554 l = PyList_GET_SIZE(heads);
555 555 for (i = 0; i < l; i++) {
556 556 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
557 557 if (revnum == -1 && PyErr_Occurred())
558 558 goto bail;
559 559 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
560 560 PyErr_SetString(PyExc_IndexError, "head out of range");
561 561 goto bail;
562 562 }
563 563 if (!(revstates[revnum + 1] & RS_SEEN)) {
564 564 tovisit[lentovisit++] = (int)revnum;
565 565 revstates[revnum + 1] |= RS_SEEN;
566 566 }
567 567 }
568 568
569 569 /* Visit the tovisit list and find the reachable roots */
570 570 k = 0;
571 571 while (k < lentovisit) {
572 572 /* Add the node to reachable if it is a root*/
573 573 revnum = tovisit[k++];
574 574 if (revstates[revnum + 1] & RS_ROOT) {
575 575 revstates[revnum + 1] |= RS_REACHABLE;
576 576 val = PyInt_FromLong(revnum);
577 577 if (val == NULL)
578 578 goto bail;
579 579 r = PyList_Append(reachable, val);
580 580 Py_DECREF(val);
581 581 if (r < 0)
582 582 goto bail;
583 583 if (includepath == 0)
584 584 continue;
585 585 }
586 586
587 587 /* Add its parents to the list of nodes to visit */
588 588 if (revnum == -1)
589 589 continue;
590 590 r = index_get_parents(self, revnum, parents, (int)len - 1);
591 591 if (r < 0)
592 592 goto bail;
593 593 for (i = 0; i < 2; i++) {
594 594 if (!(revstates[parents[i] + 1] & RS_SEEN)
595 595 && parents[i] >= minroot) {
596 596 tovisit[lentovisit++] = parents[i];
597 597 revstates[parents[i] + 1] |= RS_SEEN;
598 598 }
599 599 }
600 600 }
601 601
602 602 /* Find all the nodes in between the roots we found and the heads
603 603 * and add them to the reachable set */
604 604 if (includepath == 1) {
605 605 long minidx = minroot;
606 606 if (minidx < 0)
607 607 minidx = 0;
608 608 for (i = minidx; i < len; i++) {
609 609 if (!(revstates[i + 1] & RS_SEEN))
610 610 continue;
611 611 r = index_get_parents(self, i, parents, (int)len - 1);
612 612 /* Corrupted index file, error is set from
613 613 * index_get_parents */
614 614 if (r < 0)
615 615 goto bail;
616 616 if (((revstates[parents[0] + 1] |
617 617 revstates[parents[1] + 1]) & RS_REACHABLE)
618 618 && !(revstates[i + 1] & RS_REACHABLE)) {
619 619 revstates[i + 1] |= RS_REACHABLE;
620 620 val = PyInt_FromLong(i);
621 621 if (val == NULL)
622 622 goto bail;
623 623 r = PyList_Append(reachable, val);
624 624 Py_DECREF(val);
625 625 if (r < 0)
626 626 goto bail;
627 627 }
628 628 }
629 629 }
630 630
631 631 free(revstates);
632 632 free(tovisit);
633 633 return reachable;
634 634 bail:
635 635 Py_XDECREF(reachable);
636 636 free(revstates);
637 637 free(tovisit);
638 638 return NULL;
639 639 }
640 640
641 641 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
642 642 {
643 643 PyObject *roots = Py_None;
644 644 PyObject *ret = NULL;
645 645 PyObject *phasessize = NULL;
646 646 PyObject *phaseroots = NULL;
647 647 PyObject *phaseset = NULL;
648 648 PyObject *phasessetlist = NULL;
649 649 PyObject *rev = NULL;
650 650 Py_ssize_t len = index_length(self) - 1;
651 651 Py_ssize_t numphase = 0;
652 652 Py_ssize_t minrevallphases = 0;
653 653 Py_ssize_t minrevphase = 0;
654 654 Py_ssize_t i = 0;
655 655 char *phases = NULL;
656 656 long phase;
657 657
658 658 if (!PyArg_ParseTuple(args, "O", &roots))
659 659 goto done;
660 660 if (roots == NULL || !PyList_Check(roots)) {
661 661 PyErr_SetString(PyExc_TypeError, "roots must be a list");
662 662 goto done;
663 663 }
664 664
665 665 phases = calloc(len, 1); /* phase per rev: {0: public, 1: draft, 2: secret} */
666 666 if (phases == NULL) {
667 667 PyErr_NoMemory();
668 668 goto done;
669 669 }
670 670 /* Put the phase information of all the roots in phases */
671 671 numphase = PyList_GET_SIZE(roots)+1;
672 672 minrevallphases = len + 1;
673 673 phasessetlist = PyList_New(numphase);
674 674 if (phasessetlist == NULL)
675 675 goto done;
676 676
677 677 PyList_SET_ITEM(phasessetlist, 0, Py_None);
678 678 Py_INCREF(Py_None);
679 679
680 680 for (i = 0; i < numphase-1; i++) {
681 681 phaseroots = PyList_GET_ITEM(roots, i);
682 682 phaseset = PySet_New(NULL);
683 683 if (phaseset == NULL)
684 684 goto release;
685 685 PyList_SET_ITEM(phasessetlist, i+1, phaseset);
686 686 if (!PyList_Check(phaseroots)) {
687 687 PyErr_SetString(PyExc_TypeError,
688 688 "roots item must be a list");
689 689 goto release;
690 690 }
691 691 minrevphase = add_roots_get_min(self, phaseroots, i+1, phases);
692 692 if (minrevphase == -2) /* Error from add_roots_get_min */
693 693 goto release;
694 694 minrevallphases = MIN(minrevallphases, minrevphase);
695 695 }
696 696 /* Propagate the phase information from the roots to the revs */
697 697 if (minrevallphases != -1) {
698 698 int parents[2];
699 699 for (i = minrevallphases; i < len; i++) {
700 700 if (index_get_parents(self, i, parents,
701 701 (int)len - 1) < 0)
702 702 goto release;
703 703 set_phase_from_parents(phases, parents[0], parents[1], i);
704 704 }
705 705 }
706 706 /* Transform phase list to a python list */
707 707 phasessize = PyInt_FromLong(len);
708 708 if (phasessize == NULL)
709 709 goto release;
710 710 for (i = 0; i < len; i++) {
711 711 phase = phases[i];
712 712 /* We only store the sets of phase for non public phase, the public phase
713 713 * is computed as a difference */
714 714 if (phase != 0) {
715 715 phaseset = PyList_GET_ITEM(phasessetlist, phase);
716 716 rev = PyInt_FromLong(i);
717 717 if (rev == NULL)
718 718 goto release;
719 719 PySet_Add(phaseset, rev);
720 720 Py_XDECREF(rev);
721 721 }
722 722 }
723 723 ret = PyTuple_Pack(2, phasessize, phasessetlist);
724 724
725 725 release:
726 726 Py_XDECREF(phasessize);
727 727 Py_XDECREF(phasessetlist);
728 728 done:
729 729 free(phases);
730 730 return ret;
731 731 }
732 732
733 733 static PyObject *index_headrevs(indexObject *self, PyObject *args)
734 734 {
735 735 Py_ssize_t i, j, len;
736 736 char *nothead = NULL;
737 737 PyObject *heads = NULL;
738 738 PyObject *filter = NULL;
739 739 PyObject *filteredrevs = Py_None;
740 740
741 741 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
742 742 return NULL;
743 743 }
744 744
745 745 if (self->headrevs && filteredrevs == self->filteredrevs)
746 746 return list_copy(self->headrevs);
747 747
748 748 Py_DECREF(self->filteredrevs);
749 749 self->filteredrevs = filteredrevs;
750 750 Py_INCREF(filteredrevs);
751 751
752 752 if (filteredrevs != Py_None) {
753 753 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
754 754 if (!filter) {
755 755 PyErr_SetString(PyExc_TypeError,
756 756 "filteredrevs has no attribute __contains__");
757 757 goto bail;
758 758 }
759 759 }
760 760
761 761 len = index_length(self) - 1;
762 762 heads = PyList_New(0);
763 763 if (heads == NULL)
764 764 goto bail;
765 765 if (len == 0) {
766 766 PyObject *nullid = PyInt_FromLong(-1);
767 767 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
768 768 Py_XDECREF(nullid);
769 769 goto bail;
770 770 }
771 771 goto done;
772 772 }
773 773
774 774 nothead = calloc(len, 1);
775 775 if (nothead == NULL) {
776 776 PyErr_NoMemory();
777 777 goto bail;
778 778 }
779 779
780 780 for (i = len - 1; i >= 0; i--) {
781 781 int isfiltered;
782 782 int parents[2];
783 783
784 784 /* If nothead[i] == 1, it means we've seen an unfiltered child of this
785 785 * node already, and therefore this node is not filtered. So we can skip
786 786 * the expensive check_filter step.
787 787 */
788 788 if (nothead[i] != 1) {
789 789 isfiltered = check_filter(filter, i);
790 790 if (isfiltered == -1) {
791 791 PyErr_SetString(PyExc_TypeError,
792 792 "unable to check filter");
793 793 goto bail;
794 794 }
795 795
796 796 if (isfiltered) {
797 797 nothead[i] = 1;
798 798 continue;
799 799 }
800 800 }
801 801
802 802 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
803 803 goto bail;
804 804 for (j = 0; j < 2; j++) {
805 805 if (parents[j] >= 0)
806 806 nothead[parents[j]] = 1;
807 807 }
808 808 }
809 809
810 810 for (i = 0; i < len; i++) {
811 811 PyObject *head;
812 812
813 813 if (nothead[i])
814 814 continue;
815 815 head = PyInt_FromSsize_t(i);
816 816 if (head == NULL || PyList_Append(heads, head) == -1) {
817 817 Py_XDECREF(head);
818 818 goto bail;
819 819 }
820 820 }
821 821
822 822 done:
823 823 self->headrevs = heads;
824 824 Py_XDECREF(filter);
825 825 free(nothead);
826 826 return list_copy(self->headrevs);
827 827 bail:
828 828 Py_XDECREF(filter);
829 829 Py_XDECREF(heads);
830 830 free(nothead);
831 831 return NULL;
832 832 }
833 833
834 834 /**
835 835 * Obtain the base revision index entry.
836 836 *
837 837 * Callers must ensure that rev >= 0 or illegal memory access may occur.
838 838 */
839 839 static inline int index_baserev(indexObject *self, int rev)
840 840 {
841 841 const char *data;
842 842
843 843 if (rev >= self->length - 1) {
844 844 PyObject *tuple = PyList_GET_ITEM(self->added,
845 845 rev - self->length + 1);
846 846 return (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 3));
847 847 }
848 848 else {
849 849 data = index_deref(self, rev);
850 850 if (data == NULL) {
851 851 return -2;
852 852 }
853 853
854 854 return getbe32(data + 16);
855 855 }
856 856 }
857 857
858 858 static PyObject *index_deltachain(indexObject *self, PyObject *args)
859 859 {
860 860 int rev, generaldelta;
861 861 PyObject *stoparg;
862 862 int stoprev, iterrev, baserev = -1;
863 863 int stopped;
864 864 PyObject *chain = NULL, *result = NULL;
865 865 const Py_ssize_t length = index_length(self);
866 866
867 867 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
868 868 return NULL;
869 869 }
870 870
871 871 if (PyInt_Check(stoparg)) {
872 872 stoprev = (int)PyInt_AsLong(stoparg);
873 873 if (stoprev == -1 && PyErr_Occurred()) {
874 874 return NULL;
875 875 }
876 876 }
877 877 else if (stoparg == Py_None) {
878 878 stoprev = -2;
879 879 }
880 880 else {
881 881 PyErr_SetString(PyExc_ValueError,
882 882 "stoprev must be integer or None");
883 883 return NULL;
884 884 }
885 885
886 886 if (rev < 0 || rev >= length - 1) {
887 887 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
888 888 return NULL;
889 889 }
890 890
891 891 chain = PyList_New(0);
892 892 if (chain == NULL) {
893 893 return NULL;
894 894 }
895 895
896 896 baserev = index_baserev(self, rev);
897 897
898 898 /* This should never happen. */
899 899 if (baserev <= -2) {
900 900 /* Error should be set by index_deref() */
901 901 assert(PyErr_Occurred());
902 902 goto bail;
903 903 }
904 904
905 905 iterrev = rev;
906 906
907 907 while (iterrev != baserev && iterrev != stoprev) {
908 908 PyObject *value = PyInt_FromLong(iterrev);
909 909 if (value == NULL) {
910 910 goto bail;
911 911 }
912 912 if (PyList_Append(chain, value)) {
913 913 Py_DECREF(value);
914 914 goto bail;
915 915 }
916 916 Py_DECREF(value);
917 917
918 918 if (generaldelta) {
919 919 iterrev = baserev;
920 920 }
921 921 else {
922 922 iterrev--;
923 923 }
924 924
925 925 if (iterrev < 0) {
926 926 break;
927 927 }
928 928
929 929 if (iterrev >= length - 1) {
930 930 PyErr_SetString(PyExc_IndexError, "revision outside index");
931 931 return NULL;
932 932 }
933 933
934 934 baserev = index_baserev(self, iterrev);
935 935
936 936 /* This should never happen. */
937 937 if (baserev <= -2) {
938 938 /* Error should be set by index_deref() */
939 939 assert(PyErr_Occurred());
940 940 goto bail;
941 941 }
942 942 }
943 943
944 944 if (iterrev == stoprev) {
945 945 stopped = 1;
946 946 }
947 947 else {
948 948 PyObject *value = PyInt_FromLong(iterrev);
949 949 if (value == NULL) {
950 950 goto bail;
951 951 }
952 952 if (PyList_Append(chain, value)) {
953 953 Py_DECREF(value);
954 954 goto bail;
955 955 }
956 956 Py_DECREF(value);
957 957
958 958 stopped = 0;
959 959 }
960 960
961 961 if (PyList_Reverse(chain)) {
962 962 goto bail;
963 963 }
964 964
965 965 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
966 966 Py_DECREF(chain);
967 967 return result;
968 968
969 969 bail:
970 970 Py_DECREF(chain);
971 971 return NULL;
972 972 }
973 973
974 974 static inline int nt_level(const char *node, Py_ssize_t level)
975 975 {
976 976 int v = node[level>>1];
977 977 if (!(level & 1))
978 978 v >>= 4;
979 979 return v & 0xf;
980 980 }
981 981
982 982 /*
983 983 * Return values:
984 984 *
985 985 * -4: match is ambiguous (multiple candidates)
986 986 * -2: not found
987 987 * rest: valid rev
988 988 */
989 989 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
990 990 int hex)
991 991 {
992 992 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
993 993 int level, maxlevel, off;
994 994
995 995 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
996 996 return -1;
997 997
998 998 if (self->nt == NULL)
999 999 return -2;
1000 1000
1001 1001 if (hex)
1002 1002 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
1003 1003 else
1004 1004 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
1005 1005
1006 1006 for (level = off = 0; level < maxlevel; level++) {
1007 1007 int k = getnybble(node, level);
1008 1008 nodetree *n = &self->nt[off];
1009 1009 int v = n->children[k];
1010 1010
1011 1011 if (v < 0) {
1012 1012 const char *n;
1013 1013 Py_ssize_t i;
1014 1014
1015 1015 v = -(v + 1);
1016 1016 n = index_node(self, v);
1017 1017 if (n == NULL)
1018 1018 return -2;
1019 1019 for (i = level; i < maxlevel; i++)
1020 1020 if (getnybble(node, i) != nt_level(n, i))
1021 1021 return -2;
1022 1022 return v;
1023 1023 }
1024 1024 if (v == 0)
1025 1025 return -2;
1026 1026 off = v;
1027 1027 }
1028 1028 /* multiple matches against an ambiguous prefix */
1029 1029 return -4;
1030 1030 }
1031 1031
1032 1032 static int nt_new(indexObject *self)
1033 1033 {
1034 1034 if (self->ntlength == self->ntcapacity) {
1035 1035 if (self->ntcapacity >= INT_MAX / (sizeof(nodetree) * 2)) {
1036 1036 PyErr_SetString(PyExc_MemoryError,
1037 1037 "overflow in nt_new");
1038 1038 return -1;
1039 1039 }
1040 1040 self->ntcapacity *= 2;
1041 1041 self->nt = realloc(self->nt,
1042 1042 self->ntcapacity * sizeof(nodetree));
1043 1043 if (self->nt == NULL) {
1044 1044 PyErr_SetString(PyExc_MemoryError, "out of memory");
1045 1045 return -1;
1046 1046 }
1047 1047 memset(&self->nt[self->ntlength], 0,
1048 1048 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
1049 1049 }
1050 1050 return self->ntlength++;
1051 1051 }
1052 1052
1053 1053 static int nt_insert(indexObject *self, const char *node, int rev)
1054 1054 {
1055 1055 int level = 0;
1056 1056 int off = 0;
1057 1057
1058 1058 while (level < 40) {
1059 1059 int k = nt_level(node, level);
1060 1060 nodetree *n;
1061 1061 int v;
1062 1062
1063 1063 n = &self->nt[off];
1064 1064 v = n->children[k];
1065 1065
1066 1066 if (v == 0) {
1067 1067 n->children[k] = -rev - 1;
1068 1068 return 0;
1069 1069 }
1070 1070 if (v < 0) {
1071 1071 const char *oldnode = index_node(self, -(v + 1));
1072 1072 int noff;
1073 1073
1074 1074 if (!oldnode || !memcmp(oldnode, node, 20)) {
1075 1075 n->children[k] = -rev - 1;
1076 1076 return 0;
1077 1077 }
1078 1078 noff = nt_new(self);
1079 1079 if (noff == -1)
1080 1080 return -1;
1081 1081 /* self->nt may have been changed by realloc */
1082 1082 self->nt[off].children[k] = noff;
1083 1083 off = noff;
1084 1084 n = &self->nt[off];
1085 1085 n->children[nt_level(oldnode, ++level)] = v;
1086 1086 if (level > self->ntdepth)
1087 1087 self->ntdepth = level;
1088 1088 self->ntsplits += 1;
1089 1089 } else {
1090 1090 level += 1;
1091 1091 off = v;
1092 1092 }
1093 1093 }
1094 1094
1095 1095 return -1;
1096 1096 }
1097 1097
1098 1098 static int nt_init(indexObject *self)
1099 1099 {
1100 1100 if (self->nt == NULL) {
1101 1101 if ((size_t)self->raw_length > INT_MAX / sizeof(nodetree)) {
1102 1102 PyErr_SetString(PyExc_ValueError, "overflow in nt_init");
1103 1103 return -1;
1104 1104 }
1105 1105 self->ntcapacity = self->raw_length < 4
1106 1106 ? 4 : (int)self->raw_length / 2;
1107 1107
1108 1108 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
1109 1109 if (self->nt == NULL) {
1110 1110 PyErr_NoMemory();
1111 1111 return -1;
1112 1112 }
1113 1113 self->ntlength = 1;
1114 1114 self->ntrev = (int)index_length(self) - 1;
1115 1115 self->ntlookups = 1;
1116 1116 self->ntmisses = 0;
1117 1117 if (nt_insert(self, nullid, INT_MAX) == -1)
1118 1118 return -1;
1119 1119 }
1120 1120 return 0;
1121 1121 }
1122 1122
1123 1123 /*
1124 1124 * Return values:
1125 1125 *
1126 1126 * -3: error (exception set)
1127 1127 * -2: not found (no exception set)
1128 1128 * rest: valid rev
1129 1129 */
1130 1130 static int index_find_node(indexObject *self,
1131 1131 const char *node, Py_ssize_t nodelen)
1132 1132 {
1133 1133 int rev;
1134 1134
1135 1135 self->ntlookups++;
1136 1136 rev = nt_find(self, node, nodelen, 0);
1137 1137 if (rev >= -1)
1138 1138 return rev;
1139 1139
1140 1140 if (nt_init(self) == -1)
1141 1141 return -3;
1142 1142
1143 1143 /*
1144 1144 * For the first handful of lookups, we scan the entire index,
1145 1145 * and cache only the matching nodes. This optimizes for cases
1146 1146 * like "hg tip", where only a few nodes are accessed.
1147 1147 *
1148 1148 * After that, we cache every node we visit, using a single
1149 1149 * scan amortized over multiple lookups. This gives the best
1150 1150 * bulk performance, e.g. for "hg log".
1151 1151 */
1152 1152 if (self->ntmisses++ < 4) {
1153 1153 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1154 1154 const char *n = index_node_existing(self, rev);
1155 1155 if (n == NULL)
1156 1156 return -3;
1157 1157 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1158 1158 if (nt_insert(self, n, rev) == -1)
1159 1159 return -3;
1160 1160 break;
1161 1161 }
1162 1162 }
1163 1163 } else {
1164 1164 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1165 1165 const char *n = index_node_existing(self, rev);
1166 1166 if (n == NULL)
1167 1167 return -3;
1168 1168 if (nt_insert(self, n, rev) == -1) {
1169 1169 self->ntrev = rev + 1;
1170 1170 return -3;
1171 1171 }
1172 1172 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1173 1173 break;
1174 1174 }
1175 1175 }
1176 1176 self->ntrev = rev;
1177 1177 }
1178 1178
1179 1179 if (rev >= 0)
1180 1180 return rev;
1181 1181 return -2;
1182 1182 }
1183 1183
1184 1184 static void raise_revlog_error(void)
1185 1185 {
1186 1186 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
1187 1187
1188 1188 mod = PyImport_ImportModule("mercurial.error");
1189 1189 if (mod == NULL) {
1190 1190 goto cleanup;
1191 1191 }
1192 1192
1193 1193 dict = PyModule_GetDict(mod);
1194 1194 if (dict == NULL) {
1195 1195 goto cleanup;
1196 1196 }
1197 1197 Py_INCREF(dict);
1198 1198
1199 1199 errclass = PyDict_GetItemString(dict, "RevlogError");
1200 1200 if (errclass == NULL) {
1201 1201 PyErr_SetString(PyExc_SystemError,
1202 1202 "could not find RevlogError");
1203 1203 goto cleanup;
1204 1204 }
1205 1205
1206 1206 /* value of exception is ignored by callers */
1207 1207 PyErr_SetString(errclass, "RevlogError");
1208 1208
1209 1209 cleanup:
1210 1210 Py_XDECREF(dict);
1211 1211 Py_XDECREF(mod);
1212 1212 }
1213 1213
1214 1214 static PyObject *index_getitem(indexObject *self, PyObject *value)
1215 1215 {
1216 1216 char *node;
1217 1217 Py_ssize_t nodelen;
1218 1218 int rev;
1219 1219
1220 1220 if (PyInt_Check(value))
1221 1221 return index_get(self, PyInt_AS_LONG(value));
1222 1222
1223 1223 if (node_check(value, &node, &nodelen) == -1)
1224 1224 return NULL;
1225 1225 rev = index_find_node(self, node, nodelen);
1226 1226 if (rev >= -1)
1227 1227 return PyInt_FromLong(rev);
1228 1228 if (rev == -2)
1229 1229 raise_revlog_error();
1230 1230 return NULL;
1231 1231 }
1232 1232
1233 1233 /*
1234 1234 * Fully populate the radix tree.
1235 1235 */
1236 1236 static int nt_populate(indexObject *self) {
1237 1237 int rev;
1238 1238 if (self->ntrev > 0) {
1239 1239 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1240 1240 const char *n = index_node_existing(self, rev);
1241 1241 if (n == NULL)
1242 1242 return -1;
1243 1243 if (nt_insert(self, n, rev) == -1)
1244 1244 return -1;
1245 1245 }
1246 1246 self->ntrev = -1;
1247 1247 }
1248 1248 return 0;
1249 1249 }
1250 1250
1251 1251 static int nt_partialmatch(indexObject *self, const char *node,
1252 1252 Py_ssize_t nodelen)
1253 1253 {
1254 1254 if (nt_init(self) == -1)
1255 1255 return -3;
1256 1256 if (nt_populate(self) == -1)
1257 1257 return -3;
1258 1258
1259 1259 return nt_find(self, node, nodelen, 1);
1260 1260 }
1261 1261
1262 /*
1263 * Find the length of the shortest unique prefix of node.
1264 *
1265 * Return values:
1266 *
1267 * -3: error (exception set)
1268 * -2: not found (no exception set)
1269 * rest: length of shortest prefix
1270 */
1271 static int nt_shortest(indexObject *self, const char *node)
1272 {
1273 int level, off;
1274
1275 if (nt_init(self) == -1)
1276 return -3;
1277 if (nt_populate(self) == -1)
1278 return -3;
1279
1280 for (level = off = 0; level < 40; level++) {
1281 int k, v;
1282 nodetree *n = &self->nt[off];
1283 k = nt_level(node, level);
1284 v = n->children[k];
1285 if (v < 0) {
1286 const char *n;
1287 v = -(v + 1);
1288 n = index_node(self, v);
1289 if (memcmp(node, n, 20) != 0)
1290 /*
1291 * Found a unique prefix, but it wasn't for the
1292 * requested node (i.e the requested node does
1293 * not exist).
1294 */
1295 return -2;
1296 return level + 1;
1297 }
1298 if (v == 0)
1299 return -2;
1300 off = v;
1301 }
1302 /*
1303 * The node was still not unique after 40 hex digits, so this won't
1304 * happen. Also, if we get here, then there's a programming error in
1305 * this file that made us insert a node longer than 40 hex digits.
1306 */
1307 PyErr_SetString(PyExc_Exception, "broken node tree");
1308 return -3;
1309 }
1310
1262 1311 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1263 1312 {
1264 1313 const char *fullnode;
1265 1314 int nodelen;
1266 1315 char *node;
1267 1316 int rev, i;
1268 1317
1269 1318 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
1270 1319 return NULL;
1271 1320
1272 1321 if (nodelen < 1) {
1273 1322 PyErr_SetString(PyExc_ValueError, "key too short");
1274 1323 return NULL;
1275 1324 }
1276 1325
1277 1326 if (nodelen > 40) {
1278 1327 PyErr_SetString(PyExc_ValueError, "key too long");
1279 1328 return NULL;
1280 1329 }
1281 1330
1282 1331 for (i = 0; i < nodelen; i++)
1283 1332 hexdigit(node, i);
1284 1333 if (PyErr_Occurred()) {
1285 1334 /* input contains non-hex characters */
1286 1335 PyErr_Clear();
1287 1336 Py_RETURN_NONE;
1288 1337 }
1289 1338
1290 1339 rev = nt_partialmatch(self, node, nodelen);
1291 1340
1292 1341 switch (rev) {
1293 1342 case -4:
1294 1343 raise_revlog_error();
1295 1344 case -3:
1296 1345 return NULL;
1297 1346 case -2:
1298 1347 Py_RETURN_NONE;
1299 1348 case -1:
1300 1349 return PyBytes_FromStringAndSize(nullid, 20);
1301 1350 }
1302 1351
1303 1352 fullnode = index_node_existing(self, rev);
1304 1353 if (fullnode == NULL) {
1305 1354 return NULL;
1306 1355 }
1307 1356 return PyBytes_FromStringAndSize(fullnode, 20);
1308 1357 }
1309 1358
1359 static PyObject *index_shortest(indexObject *self, PyObject *args)
1360 {
1361 Py_ssize_t nodelen;
1362 PyObject *val;
1363 char *node;
1364 int length;
1365
1366 if (!PyArg_ParseTuple(args, "O", &val))
1367 return NULL;
1368 if (node_check(val, &node, &nodelen) == -1)
1369 return NULL;
1370
1371 self->ntlookups++;
1372 length = nt_shortest(self, node);
1373 if (length == -3)
1374 return NULL;
1375 if (length == -2) {
1376 raise_revlog_error();
1377 return NULL;
1378 }
1379 return PyInt_FromLong(length);
1380 }
1381
1310 1382 static PyObject *index_m_get(indexObject *self, PyObject *args)
1311 1383 {
1312 1384 Py_ssize_t nodelen;
1313 1385 PyObject *val;
1314 1386 char *node;
1315 1387 int rev;
1316 1388
1317 1389 if (!PyArg_ParseTuple(args, "O", &val))
1318 1390 return NULL;
1319 1391 if (node_check(val, &node, &nodelen) == -1)
1320 1392 return NULL;
1321 1393 rev = index_find_node(self, node, nodelen);
1322 1394 if (rev == -3)
1323 1395 return NULL;
1324 1396 if (rev == -2)
1325 1397 Py_RETURN_NONE;
1326 1398 return PyInt_FromLong(rev);
1327 1399 }
1328 1400
1329 1401 static int index_contains(indexObject *self, PyObject *value)
1330 1402 {
1331 1403 char *node;
1332 1404 Py_ssize_t nodelen;
1333 1405
1334 1406 if (PyInt_Check(value)) {
1335 1407 long rev = PyInt_AS_LONG(value);
1336 1408 return rev >= -1 && rev < index_length(self);
1337 1409 }
1338 1410
1339 1411 if (node_check(value, &node, &nodelen) == -1)
1340 1412 return -1;
1341 1413
1342 1414 switch (index_find_node(self, node, nodelen)) {
1343 1415 case -3:
1344 1416 return -1;
1345 1417 case -2:
1346 1418 return 0;
1347 1419 default:
1348 1420 return 1;
1349 1421 }
1350 1422 }
1351 1423
1352 1424 typedef uint64_t bitmask;
1353 1425
1354 1426 /*
1355 1427 * Given a disjoint set of revs, return all candidates for the
1356 1428 * greatest common ancestor. In revset notation, this is the set
1357 1429 * "heads(::a and ::b and ...)"
1358 1430 */
1359 1431 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
1360 1432 int revcount)
1361 1433 {
1362 1434 const bitmask allseen = (1ull << revcount) - 1;
1363 1435 const bitmask poison = 1ull << revcount;
1364 1436 PyObject *gca = PyList_New(0);
1365 1437 int i, v, interesting;
1366 1438 int maxrev = -1;
1367 1439 bitmask sp;
1368 1440 bitmask *seen;
1369 1441
1370 1442 if (gca == NULL)
1371 1443 return PyErr_NoMemory();
1372 1444
1373 1445 for (i = 0; i < revcount; i++) {
1374 1446 if (revs[i] > maxrev)
1375 1447 maxrev = revs[i];
1376 1448 }
1377 1449
1378 1450 seen = calloc(sizeof(*seen), maxrev + 1);
1379 1451 if (seen == NULL) {
1380 1452 Py_DECREF(gca);
1381 1453 return PyErr_NoMemory();
1382 1454 }
1383 1455
1384 1456 for (i = 0; i < revcount; i++)
1385 1457 seen[revs[i]] = 1ull << i;
1386 1458
1387 1459 interesting = revcount;
1388 1460
1389 1461 for (v = maxrev; v >= 0 && interesting; v--) {
1390 1462 bitmask sv = seen[v];
1391 1463 int parents[2];
1392 1464
1393 1465 if (!sv)
1394 1466 continue;
1395 1467
1396 1468 if (sv < poison) {
1397 1469 interesting -= 1;
1398 1470 if (sv == allseen) {
1399 1471 PyObject *obj = PyInt_FromLong(v);
1400 1472 if (obj == NULL)
1401 1473 goto bail;
1402 1474 if (PyList_Append(gca, obj) == -1) {
1403 1475 Py_DECREF(obj);
1404 1476 goto bail;
1405 1477 }
1406 1478 sv |= poison;
1407 1479 for (i = 0; i < revcount; i++) {
1408 1480 if (revs[i] == v)
1409 1481 goto done;
1410 1482 }
1411 1483 }
1412 1484 }
1413 1485 if (index_get_parents(self, v, parents, maxrev) < 0)
1414 1486 goto bail;
1415 1487
1416 1488 for (i = 0; i < 2; i++) {
1417 1489 int p = parents[i];
1418 1490 if (p == -1)
1419 1491 continue;
1420 1492 sp = seen[p];
1421 1493 if (sv < poison) {
1422 1494 if (sp == 0) {
1423 1495 seen[p] = sv;
1424 1496 interesting++;
1425 1497 }
1426 1498 else if (sp != sv)
1427 1499 seen[p] |= sv;
1428 1500 } else {
1429 1501 if (sp && sp < poison)
1430 1502 interesting--;
1431 1503 seen[p] = sv;
1432 1504 }
1433 1505 }
1434 1506 }
1435 1507
1436 1508 done:
1437 1509 free(seen);
1438 1510 return gca;
1439 1511 bail:
1440 1512 free(seen);
1441 1513 Py_XDECREF(gca);
1442 1514 return NULL;
1443 1515 }
1444 1516
1445 1517 /*
1446 1518 * Given a disjoint set of revs, return the subset with the longest
1447 1519 * path to the root.
1448 1520 */
1449 1521 static PyObject *find_deepest(indexObject *self, PyObject *revs)
1450 1522 {
1451 1523 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
1452 1524 static const Py_ssize_t capacity = 24;
1453 1525 int *depth, *interesting = NULL;
1454 1526 int i, j, v, ninteresting;
1455 1527 PyObject *dict = NULL, *keys = NULL;
1456 1528 long *seen = NULL;
1457 1529 int maxrev = -1;
1458 1530 long final;
1459 1531
1460 1532 if (revcount > capacity) {
1461 1533 PyErr_Format(PyExc_OverflowError,
1462 1534 "bitset size (%ld) > capacity (%ld)",
1463 1535 (long)revcount, (long)capacity);
1464 1536 return NULL;
1465 1537 }
1466 1538
1467 1539 for (i = 0; i < revcount; i++) {
1468 1540 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1469 1541 if (n > maxrev)
1470 1542 maxrev = n;
1471 1543 }
1472 1544
1473 1545 depth = calloc(sizeof(*depth), maxrev + 1);
1474 1546 if (depth == NULL)
1475 1547 return PyErr_NoMemory();
1476 1548
1477 1549 seen = calloc(sizeof(*seen), maxrev + 1);
1478 1550 if (seen == NULL) {
1479 1551 PyErr_NoMemory();
1480 1552 goto bail;
1481 1553 }
1482 1554
1483 1555 interesting = calloc(sizeof(*interesting), 1 << revcount);
1484 1556 if (interesting == NULL) {
1485 1557 PyErr_NoMemory();
1486 1558 goto bail;
1487 1559 }
1488 1560
1489 1561 if (PyList_Sort(revs) == -1)
1490 1562 goto bail;
1491 1563
1492 1564 for (i = 0; i < revcount; i++) {
1493 1565 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1494 1566 long b = 1l << i;
1495 1567 depth[n] = 1;
1496 1568 seen[n] = b;
1497 1569 interesting[b] = 1;
1498 1570 }
1499 1571
1500 1572 /* invariant: ninteresting is the number of non-zero entries in
1501 1573 * interesting. */
1502 1574 ninteresting = (int)revcount;
1503 1575
1504 1576 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
1505 1577 int dv = depth[v];
1506 1578 int parents[2];
1507 1579 long sv;
1508 1580
1509 1581 if (dv == 0)
1510 1582 continue;
1511 1583
1512 1584 sv = seen[v];
1513 1585 if (index_get_parents(self, v, parents, maxrev) < 0)
1514 1586 goto bail;
1515 1587
1516 1588 for (i = 0; i < 2; i++) {
1517 1589 int p = parents[i];
1518 1590 long sp;
1519 1591 int dp;
1520 1592
1521 1593 if (p == -1)
1522 1594 continue;
1523 1595
1524 1596 dp = depth[p];
1525 1597 sp = seen[p];
1526 1598 if (dp <= dv) {
1527 1599 depth[p] = dv + 1;
1528 1600 if (sp != sv) {
1529 1601 interesting[sv] += 1;
1530 1602 seen[p] = sv;
1531 1603 if (sp) {
1532 1604 interesting[sp] -= 1;
1533 1605 if (interesting[sp] == 0)
1534 1606 ninteresting -= 1;
1535 1607 }
1536 1608 }
1537 1609 }
1538 1610 else if (dv == dp - 1) {
1539 1611 long nsp = sp | sv;
1540 1612 if (nsp == sp)
1541 1613 continue;
1542 1614 seen[p] = nsp;
1543 1615 interesting[sp] -= 1;
1544 1616 if (interesting[sp] == 0)
1545 1617 ninteresting -= 1;
1546 1618 if (interesting[nsp] == 0)
1547 1619 ninteresting += 1;
1548 1620 interesting[nsp] += 1;
1549 1621 }
1550 1622 }
1551 1623 interesting[sv] -= 1;
1552 1624 if (interesting[sv] == 0)
1553 1625 ninteresting -= 1;
1554 1626 }
1555 1627
1556 1628 final = 0;
1557 1629 j = ninteresting;
1558 1630 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
1559 1631 if (interesting[i] == 0)
1560 1632 continue;
1561 1633 final |= i;
1562 1634 j -= 1;
1563 1635 }
1564 1636 if (final == 0) {
1565 1637 keys = PyList_New(0);
1566 1638 goto bail;
1567 1639 }
1568 1640
1569 1641 dict = PyDict_New();
1570 1642 if (dict == NULL)
1571 1643 goto bail;
1572 1644
1573 1645 for (i = 0; i < revcount; i++) {
1574 1646 PyObject *key;
1575 1647
1576 1648 if ((final & (1 << i)) == 0)
1577 1649 continue;
1578 1650
1579 1651 key = PyList_GET_ITEM(revs, i);
1580 1652 Py_INCREF(key);
1581 1653 Py_INCREF(Py_None);
1582 1654 if (PyDict_SetItem(dict, key, Py_None) == -1) {
1583 1655 Py_DECREF(key);
1584 1656 Py_DECREF(Py_None);
1585 1657 goto bail;
1586 1658 }
1587 1659 }
1588 1660
1589 1661 keys = PyDict_Keys(dict);
1590 1662
1591 1663 bail:
1592 1664 free(depth);
1593 1665 free(seen);
1594 1666 free(interesting);
1595 1667 Py_XDECREF(dict);
1596 1668
1597 1669 return keys;
1598 1670 }
1599 1671
1600 1672 /*
1601 1673 * Given a (possibly overlapping) set of revs, return all the
1602 1674 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
1603 1675 */
1604 1676 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
1605 1677 {
1606 1678 PyObject *ret = NULL;
1607 1679 Py_ssize_t argcount, i, len;
1608 1680 bitmask repeat = 0;
1609 1681 int revcount = 0;
1610 1682 int *revs;
1611 1683
1612 1684 argcount = PySequence_Length(args);
1613 1685 revs = PyMem_Malloc(argcount * sizeof(*revs));
1614 1686 if (argcount > 0 && revs == NULL)
1615 1687 return PyErr_NoMemory();
1616 1688 len = index_length(self) - 1;
1617 1689
1618 1690 for (i = 0; i < argcount; i++) {
1619 1691 static const int capacity = 24;
1620 1692 PyObject *obj = PySequence_GetItem(args, i);
1621 1693 bitmask x;
1622 1694 long val;
1623 1695
1624 1696 if (!PyInt_Check(obj)) {
1625 1697 PyErr_SetString(PyExc_TypeError,
1626 1698 "arguments must all be ints");
1627 1699 Py_DECREF(obj);
1628 1700 goto bail;
1629 1701 }
1630 1702 val = PyInt_AsLong(obj);
1631 1703 Py_DECREF(obj);
1632 1704 if (val == -1) {
1633 1705 ret = PyList_New(0);
1634 1706 goto done;
1635 1707 }
1636 1708 if (val < 0 || val >= len) {
1637 1709 PyErr_SetString(PyExc_IndexError,
1638 1710 "index out of range");
1639 1711 goto bail;
1640 1712 }
1641 1713 /* this cheesy bloom filter lets us avoid some more
1642 1714 * expensive duplicate checks in the common set-is-disjoint
1643 1715 * case */
1644 1716 x = 1ull << (val & 0x3f);
1645 1717 if (repeat & x) {
1646 1718 int k;
1647 1719 for (k = 0; k < revcount; k++) {
1648 1720 if (val == revs[k])
1649 1721 goto duplicate;
1650 1722 }
1651 1723 }
1652 1724 else repeat |= x;
1653 1725 if (revcount >= capacity) {
1654 1726 PyErr_Format(PyExc_OverflowError,
1655 1727 "bitset size (%d) > capacity (%d)",
1656 1728 revcount, capacity);
1657 1729 goto bail;
1658 1730 }
1659 1731 revs[revcount++] = (int)val;
1660 1732 duplicate:;
1661 1733 }
1662 1734
1663 1735 if (revcount == 0) {
1664 1736 ret = PyList_New(0);
1665 1737 goto done;
1666 1738 }
1667 1739 if (revcount == 1) {
1668 1740 PyObject *obj;
1669 1741 ret = PyList_New(1);
1670 1742 if (ret == NULL)
1671 1743 goto bail;
1672 1744 obj = PyInt_FromLong(revs[0]);
1673 1745 if (obj == NULL)
1674 1746 goto bail;
1675 1747 PyList_SET_ITEM(ret, 0, obj);
1676 1748 goto done;
1677 1749 }
1678 1750
1679 1751 ret = find_gca_candidates(self, revs, revcount);
1680 1752 if (ret == NULL)
1681 1753 goto bail;
1682 1754
1683 1755 done:
1684 1756 PyMem_Free(revs);
1685 1757 return ret;
1686 1758
1687 1759 bail:
1688 1760 PyMem_Free(revs);
1689 1761 Py_XDECREF(ret);
1690 1762 return NULL;
1691 1763 }
1692 1764
1693 1765 /*
1694 1766 * Given a (possibly overlapping) set of revs, return the greatest
1695 1767 * common ancestors: those with the longest path to the root.
1696 1768 */
1697 1769 static PyObject *index_ancestors(indexObject *self, PyObject *args)
1698 1770 {
1699 1771 PyObject *ret;
1700 1772 PyObject *gca = index_commonancestorsheads(self, args);
1701 1773 if (gca == NULL)
1702 1774 return NULL;
1703 1775
1704 1776 if (PyList_GET_SIZE(gca) <= 1) {
1705 1777 return gca;
1706 1778 }
1707 1779
1708 1780 ret = find_deepest(self, gca);
1709 1781 Py_DECREF(gca);
1710 1782 return ret;
1711 1783 }
1712 1784
1713 1785 /*
1714 1786 * Invalidate any trie entries introduced by added revs.
1715 1787 */
1716 1788 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
1717 1789 {
1718 1790 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
1719 1791
1720 1792 for (i = start; i < len; i++) {
1721 1793 PyObject *tuple = PyList_GET_ITEM(self->added, i);
1722 1794 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
1723 1795
1724 1796 nt_insert(self, PyBytes_AS_STRING(node), -1);
1725 1797 }
1726 1798
1727 1799 if (start == 0)
1728 1800 Py_CLEAR(self->added);
1729 1801 }
1730 1802
1731 1803 /*
1732 1804 * Delete a numeric range of revs, which must be at the end of the
1733 1805 * range, but exclude the sentinel nullid entry.
1734 1806 */
1735 1807 static int index_slice_del(indexObject *self, PyObject *item)
1736 1808 {
1737 1809 Py_ssize_t start, stop, step, slicelength;
1738 1810 Py_ssize_t length = index_length(self);
1739 1811 int ret = 0;
1740 1812
1741 1813 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
1742 1814 #ifdef IS_PY3K
1743 1815 if (PySlice_GetIndicesEx(item, length,
1744 1816 #else
1745 1817 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
1746 1818 #endif
1747 1819 &start, &stop, &step, &slicelength) < 0)
1748 1820 return -1;
1749 1821
1750 1822 if (slicelength <= 0)
1751 1823 return 0;
1752 1824
1753 1825 if ((step < 0 && start < stop) || (step > 0 && start > stop))
1754 1826 stop = start;
1755 1827
1756 1828 if (step < 0) {
1757 1829 stop = start + 1;
1758 1830 start = stop + step*(slicelength - 1) - 1;
1759 1831 step = -step;
1760 1832 }
1761 1833
1762 1834 if (step != 1) {
1763 1835 PyErr_SetString(PyExc_ValueError,
1764 1836 "revlog index delete requires step size of 1");
1765 1837 return -1;
1766 1838 }
1767 1839
1768 1840 if (stop != length - 1) {
1769 1841 PyErr_SetString(PyExc_IndexError,
1770 1842 "revlog index deletion indices are invalid");
1771 1843 return -1;
1772 1844 }
1773 1845
1774 1846 if (start < self->length - 1) {
1775 1847 if (self->nt) {
1776 1848 Py_ssize_t i;
1777 1849
1778 1850 for (i = start + 1; i < self->length - 1; i++) {
1779 1851 const char *node = index_node(self, i);
1780 1852
1781 1853 if (node)
1782 1854 nt_insert(self, node, -1);
1783 1855 }
1784 1856 if (self->added)
1785 1857 nt_invalidate_added(self, 0);
1786 1858 if (self->ntrev > start)
1787 1859 self->ntrev = (int)start;
1788 1860 }
1789 1861 self->length = start + 1;
1790 1862 if (start < self->raw_length) {
1791 1863 if (self->cache) {
1792 1864 Py_ssize_t i;
1793 1865 for (i = start; i < self->raw_length; i++)
1794 1866 Py_CLEAR(self->cache[i]);
1795 1867 }
1796 1868 self->raw_length = start;
1797 1869 }
1798 1870 goto done;
1799 1871 }
1800 1872
1801 1873 if (self->nt) {
1802 1874 nt_invalidate_added(self, start - self->length + 1);
1803 1875 if (self->ntrev > start)
1804 1876 self->ntrev = (int)start;
1805 1877 }
1806 1878 if (self->added)
1807 1879 ret = PyList_SetSlice(self->added, start - self->length + 1,
1808 1880 PyList_GET_SIZE(self->added), NULL);
1809 1881 done:
1810 1882 Py_CLEAR(self->headrevs);
1811 1883 return ret;
1812 1884 }
1813 1885
1814 1886 /*
1815 1887 * Supported ops:
1816 1888 *
1817 1889 * slice deletion
1818 1890 * string assignment (extend node->rev mapping)
1819 1891 * string deletion (shrink node->rev mapping)
1820 1892 */
1821 1893 static int index_assign_subscript(indexObject *self, PyObject *item,
1822 1894 PyObject *value)
1823 1895 {
1824 1896 char *node;
1825 1897 Py_ssize_t nodelen;
1826 1898 long rev;
1827 1899
1828 1900 if (PySlice_Check(item) && value == NULL)
1829 1901 return index_slice_del(self, item);
1830 1902
1831 1903 if (node_check(item, &node, &nodelen) == -1)
1832 1904 return -1;
1833 1905
1834 1906 if (value == NULL)
1835 1907 return self->nt ? nt_insert(self, node, -1) : 0;
1836 1908 rev = PyInt_AsLong(value);
1837 1909 if (rev > INT_MAX || rev < 0) {
1838 1910 if (!PyErr_Occurred())
1839 1911 PyErr_SetString(PyExc_ValueError, "rev out of range");
1840 1912 return -1;
1841 1913 }
1842 1914
1843 1915 if (nt_init(self) == -1)
1844 1916 return -1;
1845 1917 return nt_insert(self, node, (int)rev);
1846 1918 }
1847 1919
1848 1920 /*
1849 1921 * Find all RevlogNG entries in an index that has inline data. Update
1850 1922 * the optional "offsets" table with those entries.
1851 1923 */
1852 1924 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
1853 1925 {
1854 1926 const char *data = (const char *)self->buf.buf;
1855 1927 Py_ssize_t pos = 0;
1856 1928 Py_ssize_t end = self->buf.len;
1857 1929 long incr = v1_hdrsize;
1858 1930 Py_ssize_t len = 0;
1859 1931
1860 1932 while (pos + v1_hdrsize <= end && pos >= 0) {
1861 1933 uint32_t comp_len;
1862 1934 /* 3rd element of header is length of compressed inline data */
1863 1935 comp_len = getbe32(data + pos + 8);
1864 1936 incr = v1_hdrsize + comp_len;
1865 1937 if (offsets)
1866 1938 offsets[len] = data + pos;
1867 1939 len++;
1868 1940 pos += incr;
1869 1941 }
1870 1942
1871 1943 if (pos != end) {
1872 1944 if (!PyErr_Occurred())
1873 1945 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1874 1946 return -1;
1875 1947 }
1876 1948
1877 1949 return len;
1878 1950 }
1879 1951
1880 1952 static int index_init(indexObject *self, PyObject *args)
1881 1953 {
1882 1954 PyObject *data_obj, *inlined_obj;
1883 1955 Py_ssize_t size;
1884 1956
1885 1957 /* Initialize before argument-checking to avoid index_dealloc() crash. */
1886 1958 self->raw_length = 0;
1887 1959 self->added = NULL;
1888 1960 self->cache = NULL;
1889 1961 self->data = NULL;
1890 1962 memset(&self->buf, 0, sizeof(self->buf));
1891 1963 self->headrevs = NULL;
1892 1964 self->filteredrevs = Py_None;
1893 1965 Py_INCREF(Py_None);
1894 1966 self->nt = NULL;
1895 1967 self->offsets = NULL;
1896 1968
1897 1969 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
1898 1970 return -1;
1899 1971 if (!PyObject_CheckBuffer(data_obj)) {
1900 1972 PyErr_SetString(PyExc_TypeError,
1901 1973 "data does not support buffer interface");
1902 1974 return -1;
1903 1975 }
1904 1976
1905 1977 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
1906 1978 return -1;
1907 1979 size = self->buf.len;
1908 1980
1909 1981 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
1910 1982 self->data = data_obj;
1911 1983
1912 1984 self->ntlength = self->ntcapacity = 0;
1913 1985 self->ntdepth = self->ntsplits = 0;
1914 1986 self->ntlookups = self->ntmisses = 0;
1915 1987 self->ntrev = -1;
1916 1988 Py_INCREF(self->data);
1917 1989
1918 1990 if (self->inlined) {
1919 1991 Py_ssize_t len = inline_scan(self, NULL);
1920 1992 if (len == -1)
1921 1993 goto bail;
1922 1994 self->raw_length = len;
1923 1995 self->length = len + 1;
1924 1996 } else {
1925 1997 if (size % v1_hdrsize) {
1926 1998 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1927 1999 goto bail;
1928 2000 }
1929 2001 self->raw_length = size / v1_hdrsize;
1930 2002 self->length = self->raw_length + 1;
1931 2003 }
1932 2004
1933 2005 return 0;
1934 2006 bail:
1935 2007 return -1;
1936 2008 }
1937 2009
1938 2010 static PyObject *index_nodemap(indexObject *self)
1939 2011 {
1940 2012 Py_INCREF(self);
1941 2013 return (PyObject *)self;
1942 2014 }
1943 2015
1944 2016 static void index_dealloc(indexObject *self)
1945 2017 {
1946 2018 _index_clearcaches(self);
1947 2019 Py_XDECREF(self->filteredrevs);
1948 2020 if (self->buf.buf) {
1949 2021 PyBuffer_Release(&self->buf);
1950 2022 memset(&self->buf, 0, sizeof(self->buf));
1951 2023 }
1952 2024 Py_XDECREF(self->data);
1953 2025 Py_XDECREF(self->added);
1954 2026 PyObject_Del(self);
1955 2027 }
1956 2028
1957 2029 static PySequenceMethods index_sequence_methods = {
1958 2030 (lenfunc)index_length, /* sq_length */
1959 2031 0, /* sq_concat */
1960 2032 0, /* sq_repeat */
1961 2033 (ssizeargfunc)index_get, /* sq_item */
1962 2034 0, /* sq_slice */
1963 2035 0, /* sq_ass_item */
1964 2036 0, /* sq_ass_slice */
1965 2037 (objobjproc)index_contains, /* sq_contains */
1966 2038 };
1967 2039
1968 2040 static PyMappingMethods index_mapping_methods = {
1969 2041 (lenfunc)index_length, /* mp_length */
1970 2042 (binaryfunc)index_getitem, /* mp_subscript */
1971 2043 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
1972 2044 };
1973 2045
1974 2046 static PyMethodDef index_methods[] = {
1975 2047 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
1976 2048 "return the gca set of the given revs"},
1977 2049 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
1978 2050 METH_VARARGS,
1979 2051 "return the heads of the common ancestors of the given revs"},
1980 2052 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
1981 2053 "clear the index caches"},
1982 2054 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1983 2055 "get an index entry"},
1984 2056 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets,
1985 2057 METH_VARARGS, "compute phases"},
1986 2058 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
1987 2059 "reachableroots"},
1988 2060 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
1989 2061 "get head revisions"}, /* Can do filtering since 3.2 */
1990 2062 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
1991 2063 "get filtered head revisions"}, /* Can always do filtering */
1992 2064 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
1993 2065 "determine revisions with deltas to reconstruct fulltext"},
1994 2066 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1995 2067 "insert an index entry"},
1996 2068 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
1997 2069 "match a potentially ambiguous node ID"},
2070 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
2071 "find length of shortest hex nodeid of a binary ID"},
1998 2072 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1999 2073 "stats for the index"},
2000 2074 {NULL} /* Sentinel */
2001 2075 };
2002 2076
2003 2077 static PyGetSetDef index_getset[] = {
2004 2078 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2005 2079 {NULL} /* Sentinel */
2006 2080 };
2007 2081
2008 2082 static PyTypeObject indexType = {
2009 2083 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2010 2084 "parsers.index", /* tp_name */
2011 2085 sizeof(indexObject), /* tp_basicsize */
2012 2086 0, /* tp_itemsize */
2013 2087 (destructor)index_dealloc, /* tp_dealloc */
2014 2088 0, /* tp_print */
2015 2089 0, /* tp_getattr */
2016 2090 0, /* tp_setattr */
2017 2091 0, /* tp_compare */
2018 2092 0, /* tp_repr */
2019 2093 0, /* tp_as_number */
2020 2094 &index_sequence_methods, /* tp_as_sequence */
2021 2095 &index_mapping_methods, /* tp_as_mapping */
2022 2096 0, /* tp_hash */
2023 2097 0, /* tp_call */
2024 2098 0, /* tp_str */
2025 2099 0, /* tp_getattro */
2026 2100 0, /* tp_setattro */
2027 2101 0, /* tp_as_buffer */
2028 2102 Py_TPFLAGS_DEFAULT, /* tp_flags */
2029 2103 "revlog index", /* tp_doc */
2030 2104 0, /* tp_traverse */
2031 2105 0, /* tp_clear */
2032 2106 0, /* tp_richcompare */
2033 2107 0, /* tp_weaklistoffset */
2034 2108 0, /* tp_iter */
2035 2109 0, /* tp_iternext */
2036 2110 index_methods, /* tp_methods */
2037 2111 0, /* tp_members */
2038 2112 index_getset, /* tp_getset */
2039 2113 0, /* tp_base */
2040 2114 0, /* tp_dict */
2041 2115 0, /* tp_descr_get */
2042 2116 0, /* tp_descr_set */
2043 2117 0, /* tp_dictoffset */
2044 2118 (initproc)index_init, /* tp_init */
2045 2119 0, /* tp_alloc */
2046 2120 };
2047 2121
2048 2122 /*
2049 2123 * returns a tuple of the form (index, index, cache) with elements as
2050 2124 * follows:
2051 2125 *
2052 2126 * index: an index object that lazily parses RevlogNG records
2053 2127 * cache: if data is inlined, a tuple (0, index_file_content), else None
2054 2128 * index_file_content could be a string, or a buffer
2055 2129 *
2056 2130 * added complications are for backwards compatibility
2057 2131 */
2058 2132 PyObject *parse_index2(PyObject *self, PyObject *args)
2059 2133 {
2060 2134 PyObject *tuple = NULL, *cache = NULL;
2061 2135 indexObject *idx;
2062 2136 int ret;
2063 2137
2064 2138 idx = PyObject_New(indexObject, &indexType);
2065 2139 if (idx == NULL)
2066 2140 goto bail;
2067 2141
2068 2142 ret = index_init(idx, args);
2069 2143 if (ret == -1)
2070 2144 goto bail;
2071 2145
2072 2146 if (idx->inlined) {
2073 2147 cache = Py_BuildValue("iO", 0, idx->data);
2074 2148 if (cache == NULL)
2075 2149 goto bail;
2076 2150 } else {
2077 2151 cache = Py_None;
2078 2152 Py_INCREF(cache);
2079 2153 }
2080 2154
2081 2155 tuple = Py_BuildValue("NN", idx, cache);
2082 2156 if (!tuple)
2083 2157 goto bail;
2084 2158 return tuple;
2085 2159
2086 2160 bail:
2087 2161 Py_XDECREF(idx);
2088 2162 Py_XDECREF(cache);
2089 2163 Py_XDECREF(tuple);
2090 2164 return NULL;
2091 2165 }
2092 2166
2093 2167 void revlog_module_init(PyObject *mod)
2094 2168 {
2095 2169 indexType.tp_new = PyType_GenericNew;
2096 2170 if (PyType_Ready(&indexType) < 0)
2097 2171 return;
2098 2172 Py_INCREF(&indexType);
2099 2173 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
2100 2174
2101 2175 nullentry = Py_BuildValue(PY23("iiiiiiis#", "iiiiiiiy#"), 0, 0, 0,
2102 2176 -1, -1, -1, -1, nullid, 20);
2103 2177 if (nullentry)
2104 2178 PyObject_GC_UnTrack(nullentry);
2105 2179 }
@@ -1,109 +1,109
1 1 # policy.py - module policy logic for Mercurial.
2 2 #
3 3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import sys
12 12
13 13 # Rules for how modules can be loaded. Values are:
14 14 #
15 15 # c - require C extensions
16 16 # allow - allow pure Python implementation when C loading fails
17 17 # cffi - required cffi versions (implemented within pure module)
18 18 # cffi-allow - allow pure Python implementation if cffi version is missing
19 19 # py - only load pure Python modules
20 20 #
21 21 # By default, fall back to the pure modules so the in-place build can
22 22 # run without recompiling the C extensions. This will be overridden by
23 23 # __modulepolicy__ generated by setup.py.
24 24 policy = b'allow'
25 25 _packageprefs = {
26 26 # policy: (versioned package, pure package)
27 27 b'c': (r'cext', None),
28 28 b'allow': (r'cext', r'pure'),
29 29 b'cffi': (r'cffi', None),
30 30 b'cffi-allow': (r'cffi', r'pure'),
31 31 b'py': (None, r'pure'),
32 32 }
33 33
34 34 try:
35 35 from . import __modulepolicy__
36 36 policy = __modulepolicy__.modulepolicy
37 37 except ImportError:
38 38 pass
39 39
40 40 # PyPy doesn't load C extensions.
41 41 #
42 42 # The canonical way to do this is to test platform.python_implementation().
43 43 # But we don't import platform and don't bloat for it here.
44 44 if r'__pypy__' in sys.builtin_module_names:
45 45 policy = b'cffi'
46 46
47 47 # Environment variable can always force settings.
48 48 if sys.version_info[0] >= 3:
49 49 if r'HGMODULEPOLICY' in os.environ:
50 50 policy = os.environ[r'HGMODULEPOLICY'].encode(r'utf-8')
51 51 else:
52 52 policy = os.environ.get(r'HGMODULEPOLICY', policy)
53 53
54 54 def _importfrom(pkgname, modname):
55 55 # from .<pkgname> import <modname> (where . is looked through this module)
56 56 fakelocals = {}
57 57 pkg = __import__(pkgname, globals(), fakelocals, [modname], level=1)
58 58 try:
59 59 fakelocals[modname] = mod = getattr(pkg, modname)
60 60 except AttributeError:
61 61 raise ImportError(r'cannot import name %s' % modname)
62 62 # force import; fakelocals[modname] may be replaced with the real module
63 63 getattr(mod, r'__doc__', None)
64 64 return fakelocals[modname]
65 65
66 66 # keep in sync with "version" in C modules
67 67 _cextversions = {
68 68 (r'cext', r'base85'): 1,
69 69 (r'cext', r'bdiff'): 3,
70 70 (r'cext', r'mpatch'): 1,
71 71 (r'cext', r'osutil'): 4,
72 (r'cext', r'parsers'): 4,
72 (r'cext', r'parsers'): 5,
73 73 }
74 74
75 75 # map import request to other package or module
76 76 _modredirects = {
77 77 (r'cext', r'charencode'): (r'cext', r'parsers'),
78 78 (r'cffi', r'base85'): (r'pure', r'base85'),
79 79 (r'cffi', r'charencode'): (r'pure', r'charencode'),
80 80 (r'cffi', r'parsers'): (r'pure', r'parsers'),
81 81 }
82 82
83 83 def _checkmod(pkgname, modname, mod):
84 84 expected = _cextversions.get((pkgname, modname))
85 85 actual = getattr(mod, r'version', None)
86 86 if actual != expected:
87 87 raise ImportError(r'cannot import module %s.%s '
88 88 r'(expected version: %d, actual: %r)'
89 89 % (pkgname, modname, expected, actual))
90 90
91 91 def importmod(modname):
92 92 """Import module according to policy and check API version"""
93 93 try:
94 94 verpkg, purepkg = _packageprefs[policy]
95 95 except KeyError:
96 96 raise ImportError(r'invalid HGMODULEPOLICY %r' % policy)
97 97 assert verpkg or purepkg
98 98 if verpkg:
99 99 pn, mn = _modredirects.get((verpkg, modname), (verpkg, modname))
100 100 try:
101 101 mod = _importfrom(pn, mn)
102 102 if pn == verpkg:
103 103 _checkmod(pn, mn, mod)
104 104 return mod
105 105 except ImportError:
106 106 if not purepkg:
107 107 raise
108 108 pn, mn = _modredirects.get((purepkg, modname), (purepkg, modname))
109 109 return _importfrom(pn, mn)
@@ -1,2597 +1,2623
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import hashlib
20 20 import heapq
21 21 import os
22 22 import re
23 23 import struct
24 24 import zlib
25 25
26 26 # import stuff from node for others to import from revlog
27 27 from .node import (
28 28 bin,
29 29 hex,
30 30 nullid,
31 31 nullrev,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .thirdparty import (
39 39 attr,
40 40 )
41 41 from . import (
42 42 ancestor,
43 43 error,
44 44 mdiff,
45 45 policy,
46 46 pycompat,
47 47 templatefilters,
48 48 util,
49 49 )
50 50 from .utils import (
51 51 stringutil,
52 52 )
53 53
54 54 parsers = policy.importmod(r'parsers')
55 55
56 56 # Aliased for performance.
57 57 _zlibdecompress = zlib.decompress
58 58
59 59 # revlog header flags
60 60 REVLOGV0 = 0
61 61 REVLOGV1 = 1
62 62 # Dummy value until file format is finalized.
63 63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 64 REVLOGV2 = 0xDEAD
65 65 FLAG_INLINE_DATA = (1 << 16)
66 66 FLAG_GENERALDELTA = (1 << 17)
67 67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 68 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72 72
73 73 # revlog index flags
74 74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 77 REVIDX_DEFAULT_FLAGS = 0
78 78 # stable order in which flags need to be processed and their processors applied
79 79 REVIDX_FLAGS_ORDER = [
80 80 REVIDX_ISCENSORED,
81 81 REVIDX_ELLIPSIS,
82 82 REVIDX_EXTSTORED,
83 83 ]
84 84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 85 # bitmark for flags that could cause rawdata content change
86 86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87 87
88 88 # max size of revlog with inline data
89 89 _maxinline = 131072
90 90 _chunksize = 1048576
91 91
92 92 RevlogError = error.RevlogError
93 93 LookupError = error.LookupError
94 94 CensoredNodeError = error.CensoredNodeError
95 95 ProgrammingError = error.ProgrammingError
96 96
97 97 # Store flag processors (cf. 'addflagprocessor()' to register)
98 98 _flagprocessors = {
99 99 REVIDX_ISCENSORED: None,
100 100 }
101 101
102 102 _mdre = re.compile('\1\n')
103 103 def parsemeta(text):
104 104 """return (metadatadict, metadatasize)"""
105 105 # text can be buffer, so we can't use .startswith or .index
106 106 if text[:2] != '\1\n':
107 107 return None, None
108 108 s = _mdre.search(text, 2).start()
109 109 mtext = text[2:s]
110 110 meta = {}
111 111 for l in mtext.splitlines():
112 112 k, v = l.split(": ", 1)
113 113 meta[k] = v
114 114 return meta, (s + 2)
115 115
116 116 def packmeta(meta, text):
117 117 keys = sorted(meta)
118 118 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
119 119 return "\1\n%s\1\n%s" % (metatext, text)
120 120
121 121 def _censoredtext(text):
122 122 m, offs = parsemeta(text)
123 123 return m and "censored" in m
124 124
125 125 def addflagprocessor(flag, processor):
126 126 """Register a flag processor on a revision data flag.
127 127
128 128 Invariant:
129 129 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
130 130 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
131 131 - Only one flag processor can be registered on a specific flag.
132 132 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
133 133 following signatures:
134 134 - (read) f(self, rawtext) -> text, bool
135 135 - (write) f(self, text) -> rawtext, bool
136 136 - (raw) f(self, rawtext) -> bool
137 137 "text" is presented to the user. "rawtext" is stored in revlog data, not
138 138 directly visible to the user.
139 139 The boolean returned by these transforms is used to determine whether
140 140 the returned text can be used for hash integrity checking. For example,
141 141 if "write" returns False, then "text" is used to generate hash. If
142 142 "write" returns True, that basically means "rawtext" returned by "write"
143 143 should be used to generate hash. Usually, "write" and "read" return
144 144 different booleans. And "raw" returns a same boolean as "write".
145 145
146 146 Note: The 'raw' transform is used for changegroup generation and in some
147 147 debug commands. In this case the transform only indicates whether the
148 148 contents can be used for hash integrity checks.
149 149 """
150 150 if not flag & REVIDX_KNOWN_FLAGS:
151 151 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
152 152 raise ProgrammingError(msg)
153 153 if flag not in REVIDX_FLAGS_ORDER:
154 154 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
155 155 raise ProgrammingError(msg)
156 156 if flag in _flagprocessors:
157 157 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
158 158 raise error.Abort(msg)
159 159 _flagprocessors[flag] = processor
160 160
161 161 def getoffset(q):
162 162 return int(q >> 16)
163 163
164 164 def gettype(q):
165 165 return int(q & 0xFFFF)
166 166
167 167 def offset_type(offset, type):
168 168 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
169 169 raise ValueError('unknown revlog index flags')
170 170 return int(int(offset) << 16 | type)
171 171
172 172 _nullhash = hashlib.sha1(nullid)
173 173
174 174 def hash(text, p1, p2):
175 175 """generate a hash from the given text and its parent hashes
176 176
177 177 This hash combines both the current file contents and its history
178 178 in a manner that makes it easy to distinguish nodes with the same
179 179 content in the revision graph.
180 180 """
181 181 # As of now, if one of the parent node is null, p2 is null
182 182 if p2 == nullid:
183 183 # deep copy of a hash is faster than creating one
184 184 s = _nullhash.copy()
185 185 s.update(p1)
186 186 else:
187 187 # none of the parent nodes are nullid
188 188 if p1 < p2:
189 189 a = p1
190 190 b = p2
191 191 else:
192 192 a = p2
193 193 b = p1
194 194 s = hashlib.sha1(a)
195 195 s.update(b)
196 196 s.update(text)
197 197 return s.digest()
198 198
199 199 def _trimchunk(revlog, revs, startidx, endidx=None):
200 200 """returns revs[startidx:endidx] without empty trailing revs
201 201 """
202 202 length = revlog.length
203 203
204 204 if endidx is None:
205 205 endidx = len(revs)
206 206
207 207 # Trim empty revs at the end, but never the very first revision of a chain
208 208 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
209 209 endidx -= 1
210 210
211 211 return revs[startidx:endidx]
212 212
213 213 def _slicechunk(revlog, revs):
214 214 """slice revs to reduce the amount of unrelated data to be read from disk.
215 215
216 216 ``revs`` is sliced into groups that should be read in one time.
217 217 Assume that revs are sorted.
218 218 """
219 219 start = revlog.start
220 220 length = revlog.length
221 221
222 222 if len(revs) <= 1:
223 223 yield revs
224 224 return
225 225
226 226 startbyte = start(revs[0])
227 227 endbyte = start(revs[-1]) + length(revs[-1])
228 228 readdata = deltachainspan = endbyte - startbyte
229 229
230 230 chainpayload = sum(length(r) for r in revs)
231 231
232 232 if deltachainspan:
233 233 density = chainpayload / float(deltachainspan)
234 234 else:
235 235 density = 1.0
236 236
237 237 # Store the gaps in a heap to have them sorted by decreasing size
238 238 gapsheap = []
239 239 heapq.heapify(gapsheap)
240 240 prevend = None
241 241 for i, rev in enumerate(revs):
242 242 revstart = start(rev)
243 243 revlen = length(rev)
244 244
245 245 # Skip empty revisions to form larger holes
246 246 if revlen == 0:
247 247 continue
248 248
249 249 if prevend is not None:
250 250 gapsize = revstart - prevend
251 251 # only consider holes that are large enough
252 252 if gapsize > revlog._srmingapsize:
253 253 heapq.heappush(gapsheap, (-gapsize, i))
254 254
255 255 prevend = revstart + revlen
256 256
257 257 # Collect the indices of the largest holes until the density is acceptable
258 258 indicesheap = []
259 259 heapq.heapify(indicesheap)
260 260 while gapsheap and density < revlog._srdensitythreshold:
261 261 oppgapsize, gapidx = heapq.heappop(gapsheap)
262 262
263 263 heapq.heappush(indicesheap, gapidx)
264 264
265 265 # the gap sizes are stored as negatives to be sorted decreasingly
266 266 # by the heap
267 267 readdata -= (-oppgapsize)
268 268 if readdata > 0:
269 269 density = chainpayload / float(readdata)
270 270 else:
271 271 density = 1.0
272 272
273 273 # Cut the revs at collected indices
274 274 previdx = 0
275 275 while indicesheap:
276 276 idx = heapq.heappop(indicesheap)
277 277
278 278 chunk = _trimchunk(revlog, revs, previdx, idx)
279 279 if chunk:
280 280 yield chunk
281 281
282 282 previdx = idx
283 283
284 284 chunk = _trimchunk(revlog, revs, previdx)
285 285 if chunk:
286 286 yield chunk
287 287
288 288 @attr.s(slots=True, frozen=True)
289 289 class _deltainfo(object):
290 290 distance = attr.ib()
291 291 deltalen = attr.ib()
292 292 data = attr.ib()
293 293 base = attr.ib()
294 294 chainbase = attr.ib()
295 295 chainlen = attr.ib()
296 296 compresseddeltalen = attr.ib()
297 297
298 298 class _deltacomputer(object):
299 299 def __init__(self, revlog):
300 300 self.revlog = revlog
301 301
302 302 def _getcandidaterevs(self, p1, p2, cachedelta):
303 303 """
304 304 Provides revisions that present an interest to be diffed against,
305 305 grouped by level of easiness.
306 306 """
307 307 revlog = self.revlog
308 308 curr = len(revlog)
309 309 prev = curr - 1
310 310 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
311 311
312 312 # should we try to build a delta?
313 313 if prev != nullrev and revlog.storedeltachains:
314 314 tested = set()
315 315 # This condition is true most of the time when processing
316 316 # changegroup data into a generaldelta repo. The only time it
317 317 # isn't true is if this is the first revision in a delta chain
318 318 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
319 319 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
320 320 # Assume what we received from the server is a good choice
321 321 # build delta will reuse the cache
322 322 yield (cachedelta[0],)
323 323 tested.add(cachedelta[0])
324 324
325 325 if revlog._generaldelta:
326 326 # exclude already lazy tested base if any
327 327 parents = [p for p in (p1r, p2r)
328 328 if p != nullrev and p not in tested]
329 329 if parents and not revlog._aggressivemergedeltas:
330 330 # Pick whichever parent is closer to us (to minimize the
331 331 # chance of having to build a fulltext).
332 332 parents = [max(parents)]
333 333 tested.update(parents)
334 334 yield parents
335 335
336 336 if prev not in tested:
337 337 # other approach failed try against prev to hopefully save us a
338 338 # fulltext.
339 339 yield (prev,)
340 340
341 341 def buildtext(self, revinfo, fh):
342 342 """Builds a fulltext version of a revision
343 343
344 344 revinfo: _revisioninfo instance that contains all needed info
345 345 fh: file handle to either the .i or the .d revlog file,
346 346 depending on whether it is inlined or not
347 347 """
348 348 btext = revinfo.btext
349 349 if btext[0] is not None:
350 350 return btext[0]
351 351
352 352 revlog = self.revlog
353 353 cachedelta = revinfo.cachedelta
354 354 flags = revinfo.flags
355 355 node = revinfo.node
356 356
357 357 baserev = cachedelta[0]
358 358 delta = cachedelta[1]
359 359 # special case deltas which replace entire base; no need to decode
360 360 # base revision. this neatly avoids censored bases, which throw when
361 361 # they're decoded.
362 362 hlen = struct.calcsize(">lll")
363 363 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
364 364 len(delta) - hlen):
365 365 btext[0] = delta[hlen:]
366 366 else:
367 367 # deltabase is rawtext before changed by flag processors, which is
368 368 # equivalent to non-raw text
369 369 basetext = revlog.revision(baserev, _df=fh, raw=False)
370 370 btext[0] = mdiff.patch(basetext, delta)
371 371
372 372 try:
373 373 res = revlog._processflags(btext[0], flags, 'read', raw=True)
374 374 btext[0], validatehash = res
375 375 if validatehash:
376 376 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
377 377 if flags & REVIDX_ISCENSORED:
378 378 raise RevlogError(_('node %s is not censored') % node)
379 379 except CensoredNodeError:
380 380 # must pass the censored index flag to add censored revisions
381 381 if not flags & REVIDX_ISCENSORED:
382 382 raise
383 383 return btext[0]
384 384
385 385 def _builddeltadiff(self, base, revinfo, fh):
386 386 revlog = self.revlog
387 387 t = self.buildtext(revinfo, fh)
388 388 if revlog.iscensored(base):
389 389 # deltas based on a censored revision must replace the
390 390 # full content in one patch, so delta works everywhere
391 391 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
392 392 delta = header + t
393 393 else:
394 394 ptext = revlog.revision(base, _df=fh, raw=True)
395 395 delta = mdiff.textdiff(ptext, t)
396 396
397 397 return delta
398 398
399 399 def _builddeltainfo(self, revinfo, base, fh):
400 400 # can we use the cached delta?
401 401 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
402 402 delta = revinfo.cachedelta[1]
403 403 else:
404 404 delta = self._builddeltadiff(base, revinfo, fh)
405 405 revlog = self.revlog
406 406 header, data = revlog.compress(delta)
407 407 deltalen = len(header) + len(data)
408 408 chainbase = revlog.chainbase(base)
409 409 offset = revlog.end(len(revlog) - 1)
410 410 dist = deltalen + offset - revlog.start(chainbase)
411 411 if revlog._generaldelta:
412 412 deltabase = base
413 413 else:
414 414 deltabase = chainbase
415 415 chainlen, compresseddeltalen = revlog._chaininfo(base)
416 416 chainlen += 1
417 417 compresseddeltalen += deltalen
418 418 return _deltainfo(dist, deltalen, (header, data), deltabase,
419 419 chainbase, chainlen, compresseddeltalen)
420 420
421 421 def finddeltainfo(self, revinfo, fh):
422 422 """Find an acceptable delta against a candidate revision
423 423
424 424 revinfo: information about the revision (instance of _revisioninfo)
425 425 fh: file handle to either the .i or the .d revlog file,
426 426 depending on whether it is inlined or not
427 427
428 428 Returns the first acceptable candidate revision, as ordered by
429 429 _getcandidaterevs
430 430 """
431 431 cachedelta = revinfo.cachedelta
432 432 p1 = revinfo.p1
433 433 p2 = revinfo.p2
434 434 revlog = self.revlog
435 435
436 436 deltainfo = None
437 437 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
438 438 nominateddeltas = []
439 439 for candidaterev in candidaterevs:
440 440 # no delta for rawtext-changing revs (see "candelta" for why)
441 441 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
442 442 continue
443 443 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
444 444 if revlog._isgooddeltainfo(candidatedelta, revinfo.textlen):
445 445 nominateddeltas.append(candidatedelta)
446 446 if nominateddeltas:
447 447 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
448 448 break
449 449
450 450 return deltainfo
451 451
452 452 @attr.s(slots=True, frozen=True)
453 453 class _revisioninfo(object):
454 454 """Information about a revision that allows building its fulltext
455 455 node: expected hash of the revision
456 456 p1, p2: parent revs of the revision
457 457 btext: built text cache consisting of a one-element list
458 458 cachedelta: (baserev, uncompressed_delta) or None
459 459 flags: flags associated to the revision storage
460 460
461 461 One of btext[0] or cachedelta must be set.
462 462 """
463 463 node = attr.ib()
464 464 p1 = attr.ib()
465 465 p2 = attr.ib()
466 466 btext = attr.ib()
467 467 textlen = attr.ib()
468 468 cachedelta = attr.ib()
469 469 flags = attr.ib()
470 470
471 471 # index v0:
472 472 # 4 bytes: offset
473 473 # 4 bytes: compressed length
474 474 # 4 bytes: base rev
475 475 # 4 bytes: link rev
476 476 # 20 bytes: parent 1 nodeid
477 477 # 20 bytes: parent 2 nodeid
478 478 # 20 bytes: nodeid
479 479 indexformatv0 = struct.Struct(">4l20s20s20s")
480 480 indexformatv0_pack = indexformatv0.pack
481 481 indexformatv0_unpack = indexformatv0.unpack
482 482
483 483 class revlogoldio(object):
484 484 def __init__(self):
485 485 self.size = indexformatv0.size
486 486
487 487 def parseindex(self, data, inline):
488 488 s = self.size
489 489 index = []
490 490 nodemap = {nullid: nullrev}
491 491 n = off = 0
492 492 l = len(data)
493 493 while off + s <= l:
494 494 cur = data[off:off + s]
495 495 off += s
496 496 e = indexformatv0_unpack(cur)
497 497 # transform to revlogv1 format
498 498 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
499 499 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
500 500 index.append(e2)
501 501 nodemap[e[6]] = n
502 502 n += 1
503 503
504 504 # add the magic null revision at -1
505 505 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
506 506
507 507 return index, nodemap, None
508 508
509 509 def packentry(self, entry, node, version, rev):
510 510 if gettype(entry[0]):
511 511 raise RevlogError(_('index entry flags need revlog version 1'))
512 512 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
513 513 node(entry[5]), node(entry[6]), entry[7])
514 514 return indexformatv0_pack(*e2)
515 515
516 516 # index ng:
517 517 # 6 bytes: offset
518 518 # 2 bytes: flags
519 519 # 4 bytes: compressed length
520 520 # 4 bytes: uncompressed length
521 521 # 4 bytes: base rev
522 522 # 4 bytes: link rev
523 523 # 4 bytes: parent 1 rev
524 524 # 4 bytes: parent 2 rev
525 525 # 32 bytes: nodeid
526 526 indexformatng = struct.Struct(">Qiiiiii20s12x")
527 527 indexformatng_pack = indexformatng.pack
528 528 versionformat = struct.Struct(">I")
529 529 versionformat_pack = versionformat.pack
530 530 versionformat_unpack = versionformat.unpack
531 531
532 532 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
533 533 # signed integer)
534 534 _maxentrysize = 0x7fffffff
535 535
536 536 class revlogio(object):
537 537 def __init__(self):
538 538 self.size = indexformatng.size
539 539
540 540 def parseindex(self, data, inline):
541 541 # call the C implementation to parse the index data
542 542 index, cache = parsers.parse_index2(data, inline)
543 543 return index, getattr(index, 'nodemap', None), cache
544 544
545 545 def packentry(self, entry, node, version, rev):
546 546 p = indexformatng_pack(*entry)
547 547 if rev == 0:
548 548 p = versionformat_pack(version) + p[4:]
549 549 return p
550 550
551 551 class revlog(object):
552 552 """
553 553 the underlying revision storage object
554 554
555 555 A revlog consists of two parts, an index and the revision data.
556 556
557 557 The index is a file with a fixed record size containing
558 558 information on each revision, including its nodeid (hash), the
559 559 nodeids of its parents, the position and offset of its data within
560 560 the data file, and the revision it's based on. Finally, each entry
561 561 contains a linkrev entry that can serve as a pointer to external
562 562 data.
563 563
564 564 The revision data itself is a linear collection of data chunks.
565 565 Each chunk represents a revision and is usually represented as a
566 566 delta against the previous chunk. To bound lookup time, runs of
567 567 deltas are limited to about 2 times the length of the original
568 568 version data. This makes retrieval of a version proportional to
569 569 its size, or O(1) relative to the number of revisions.
570 570
571 571 Both pieces of the revlog are written to in an append-only
572 572 fashion, which means we never need to rewrite a file to insert or
573 573 remove data, and can use some simple techniques to avoid the need
574 574 for locking while reading.
575 575
576 576 If checkambig, indexfile is opened with checkambig=True at
577 577 writing, to avoid file stat ambiguity.
578 578
579 579 If mmaplargeindex is True, and an mmapindexthreshold is set, the
580 580 index will be mmapped rather than read if it is larger than the
581 581 configured threshold.
582 582
583 583 If censorable is True, the revlog can have censored revisions.
584 584 """
585 585 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
586 586 mmaplargeindex=False, censorable=False):
587 587 """
588 588 create a revlog object
589 589
590 590 opener is a function that abstracts the file opening operation
591 591 and can be used to implement COW semantics or the like.
592 592 """
593 593 self.indexfile = indexfile
594 594 self.datafile = datafile or (indexfile[:-2] + ".d")
595 595 self.opener = opener
596 596 # When True, indexfile is opened with checkambig=True at writing, to
597 597 # avoid file stat ambiguity.
598 598 self._checkambig = checkambig
599 599 self._censorable = censorable
600 600 # 3-tuple of (node, rev, text) for a raw revision.
601 601 self._cache = None
602 602 # Maps rev to chain base rev.
603 603 self._chainbasecache = util.lrucachedict(100)
604 604 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
605 605 self._chunkcache = (0, '')
606 606 # How much data to read and cache into the raw revlog data cache.
607 607 self._chunkcachesize = 65536
608 608 self._maxchainlen = None
609 609 self._aggressivemergedeltas = False
610 610 self.index = []
611 611 # Mapping of partial identifiers to full nodes.
612 612 self._pcache = {}
613 613 # Mapping of revision integer to full node.
614 614 self._nodecache = {nullid: nullrev}
615 615 self._nodepos = None
616 616 self._compengine = 'zlib'
617 617 self._maxdeltachainspan = -1
618 618 self._withsparseread = False
619 619 self._srdensitythreshold = 0.25
620 620 self._srmingapsize = 262144
621 621
622 622 mmapindexthreshold = None
623 623 v = REVLOG_DEFAULT_VERSION
624 624 opts = getattr(opener, 'options', None)
625 625 if opts is not None:
626 626 if 'revlogv2' in opts:
627 627 # version 2 revlogs always use generaldelta.
628 628 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
629 629 elif 'revlogv1' in opts:
630 630 if 'generaldelta' in opts:
631 631 v |= FLAG_GENERALDELTA
632 632 else:
633 633 v = 0
634 634 if 'chunkcachesize' in opts:
635 635 self._chunkcachesize = opts['chunkcachesize']
636 636 if 'maxchainlen' in opts:
637 637 self._maxchainlen = opts['maxchainlen']
638 638 if 'aggressivemergedeltas' in opts:
639 639 self._aggressivemergedeltas = opts['aggressivemergedeltas']
640 640 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
641 641 if 'compengine' in opts:
642 642 self._compengine = opts['compengine']
643 643 if 'maxdeltachainspan' in opts:
644 644 self._maxdeltachainspan = opts['maxdeltachainspan']
645 645 if mmaplargeindex and 'mmapindexthreshold' in opts:
646 646 mmapindexthreshold = opts['mmapindexthreshold']
647 647 self._withsparseread = bool(opts.get('with-sparse-read', False))
648 648 if 'sparse-read-density-threshold' in opts:
649 649 self._srdensitythreshold = opts['sparse-read-density-threshold']
650 650 if 'sparse-read-min-gap-size' in opts:
651 651 self._srmingapsize = opts['sparse-read-min-gap-size']
652 652
653 653 if self._chunkcachesize <= 0:
654 654 raise RevlogError(_('revlog chunk cache size %r is not greater '
655 655 'than 0') % self._chunkcachesize)
656 656 elif self._chunkcachesize & (self._chunkcachesize - 1):
657 657 raise RevlogError(_('revlog chunk cache size %r is not a power '
658 658 'of 2') % self._chunkcachesize)
659 659
660 660 indexdata = ''
661 661 self._initempty = True
662 662 try:
663 663 with self._indexfp() as f:
664 664 if (mmapindexthreshold is not None and
665 665 self.opener.fstat(f).st_size >= mmapindexthreshold):
666 666 indexdata = util.buffer(util.mmapread(f))
667 667 else:
668 668 indexdata = f.read()
669 669 if len(indexdata) > 0:
670 670 v = versionformat_unpack(indexdata[:4])[0]
671 671 self._initempty = False
672 672 except IOError as inst:
673 673 if inst.errno != errno.ENOENT:
674 674 raise
675 675
676 676 self.version = v
677 677 self._inline = v & FLAG_INLINE_DATA
678 678 self._generaldelta = v & FLAG_GENERALDELTA
679 679 flags = v & ~0xFFFF
680 680 fmt = v & 0xFFFF
681 681 if fmt == REVLOGV0:
682 682 if flags:
683 683 raise RevlogError(_('unknown flags (%#04x) in version %d '
684 684 'revlog %s') %
685 685 (flags >> 16, fmt, self.indexfile))
686 686 elif fmt == REVLOGV1:
687 687 if flags & ~REVLOGV1_FLAGS:
688 688 raise RevlogError(_('unknown flags (%#04x) in version %d '
689 689 'revlog %s') %
690 690 (flags >> 16, fmt, self.indexfile))
691 691 elif fmt == REVLOGV2:
692 692 if flags & ~REVLOGV2_FLAGS:
693 693 raise RevlogError(_('unknown flags (%#04x) in version %d '
694 694 'revlog %s') %
695 695 (flags >> 16, fmt, self.indexfile))
696 696 else:
697 697 raise RevlogError(_('unknown version (%d) in revlog %s') %
698 698 (fmt, self.indexfile))
699 699
700 700 self.storedeltachains = True
701 701
702 702 self._io = revlogio()
703 703 if self.version == REVLOGV0:
704 704 self._io = revlogoldio()
705 705 try:
706 706 d = self._io.parseindex(indexdata, self._inline)
707 707 except (ValueError, IndexError):
708 708 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
709 709 self.index, nodemap, self._chunkcache = d
710 710 if nodemap is not None:
711 711 self.nodemap = self._nodecache = nodemap
712 712 if not self._chunkcache:
713 713 self._chunkclear()
714 714 # revnum -> (chain-length, sum-delta-length)
715 715 self._chaininfocache = {}
716 716 # revlog header -> revlog compressor
717 717 self._decompressors = {}
718 718
719 719 @util.propertycache
720 720 def _compressor(self):
721 721 return util.compengines[self._compengine].revlogcompressor()
722 722
723 723 def _indexfp(self, mode='r'):
724 724 """file object for the revlog's index file"""
725 725 args = {r'mode': mode}
726 726 if mode != 'r':
727 727 args[r'checkambig'] = self._checkambig
728 728 if mode == 'w':
729 729 args[r'atomictemp'] = True
730 730 return self.opener(self.indexfile, **args)
731 731
732 732 def _datafp(self, mode='r'):
733 733 """file object for the revlog's data file"""
734 734 return self.opener(self.datafile, mode=mode)
735 735
736 736 @contextlib.contextmanager
737 737 def _datareadfp(self, existingfp=None):
738 738 """file object suitable to read data"""
739 739 if existingfp is not None:
740 740 yield existingfp
741 741 else:
742 742 if self._inline:
743 743 func = self._indexfp
744 744 else:
745 745 func = self._datafp
746 746 with func() as fp:
747 747 yield fp
748 748
749 749 def tip(self):
750 750 return self.node(len(self.index) - 2)
751 751 def __contains__(self, rev):
752 752 return 0 <= rev < len(self)
753 753 def __len__(self):
754 754 return len(self.index) - 1
755 755 def __iter__(self):
756 756 return iter(xrange(len(self)))
757 757 def revs(self, start=0, stop=None):
758 758 """iterate over all rev in this revlog (from start to stop)"""
759 759 step = 1
760 760 if stop is not None:
761 761 if start > stop:
762 762 step = -1
763 763 stop += step
764 764 else:
765 765 stop = len(self)
766 766 return xrange(start, stop, step)
767 767
768 768 @util.propertycache
769 769 def nodemap(self):
770 770 self.rev(self.node(0))
771 771 return self._nodecache
772 772
773 773 def hasnode(self, node):
774 774 try:
775 775 self.rev(node)
776 776 return True
777 777 except KeyError:
778 778 return False
779 779
780 780 def candelta(self, baserev, rev):
781 781 """whether two revisions (baserev, rev) can be delta-ed or not"""
782 782 # Disable delta if either rev requires a content-changing flag
783 783 # processor (ex. LFS). This is because such flag processor can alter
784 784 # the rawtext content that the delta will be based on, and two clients
785 785 # could have a same revlog node with different flags (i.e. different
786 786 # rawtext contents) and the delta could be incompatible.
787 787 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
788 788 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
789 789 return False
790 790 return True
791 791
792 792 def clearcaches(self):
793 793 self._cache = None
794 794 self._chainbasecache.clear()
795 795 self._chunkcache = (0, '')
796 796 self._pcache = {}
797 797
798 798 try:
799 799 self._nodecache.clearcaches()
800 800 except AttributeError:
801 801 self._nodecache = {nullid: nullrev}
802 802 self._nodepos = None
803 803
804 804 def rev(self, node):
805 805 try:
806 806 return self._nodecache[node]
807 807 except TypeError:
808 808 raise
809 809 except RevlogError:
810 810 # parsers.c radix tree lookup failed
811 811 if node == wdirid or node in wdirfilenodeids:
812 812 raise error.WdirUnsupported
813 813 raise LookupError(node, self.indexfile, _('no node'))
814 814 except KeyError:
815 815 # pure python cache lookup failed
816 816 n = self._nodecache
817 817 i = self.index
818 818 p = self._nodepos
819 819 if p is None:
820 820 p = len(i) - 2
821 821 else:
822 822 assert p < len(i)
823 823 for r in xrange(p, -1, -1):
824 824 v = i[r][7]
825 825 n[v] = r
826 826 if v == node:
827 827 self._nodepos = r - 1
828 828 return r
829 829 if node == wdirid or node in wdirfilenodeids:
830 830 raise error.WdirUnsupported
831 831 raise LookupError(node, self.indexfile, _('no node'))
832 832
833 833 # Accessors for index entries.
834 834
835 835 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
836 836 # are flags.
837 837 def start(self, rev):
838 838 return int(self.index[rev][0] >> 16)
839 839
840 840 def flags(self, rev):
841 841 return self.index[rev][0] & 0xFFFF
842 842
843 843 def length(self, rev):
844 844 return self.index[rev][1]
845 845
846 846 def rawsize(self, rev):
847 847 """return the length of the uncompressed text for a given revision"""
848 848 l = self.index[rev][2]
849 849 if l >= 0:
850 850 return l
851 851
852 852 t = self.revision(rev, raw=True)
853 853 return len(t)
854 854
855 855 def size(self, rev):
856 856 """length of non-raw text (processed by a "read" flag processor)"""
857 857 # fast path: if no "read" flag processor could change the content,
858 858 # size is rawsize. note: ELLIPSIS is known to not change the content.
859 859 flags = self.flags(rev)
860 860 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
861 861 return self.rawsize(rev)
862 862
863 863 return len(self.revision(rev, raw=False))
864 864
865 865 def chainbase(self, rev):
866 866 base = self._chainbasecache.get(rev)
867 867 if base is not None:
868 868 return base
869 869
870 870 index = self.index
871 871 base = index[rev][3]
872 872 while base != rev:
873 873 rev = base
874 874 base = index[rev][3]
875 875
876 876 self._chainbasecache[rev] = base
877 877 return base
878 878
879 879 def linkrev(self, rev):
880 880 return self.index[rev][4]
881 881
882 882 def parentrevs(self, rev):
883 883 try:
884 884 entry = self.index[rev]
885 885 except IndexError:
886 886 if rev == wdirrev:
887 887 raise error.WdirUnsupported
888 888 raise
889 889
890 890 return entry[5], entry[6]
891 891
892 892 def node(self, rev):
893 893 try:
894 894 return self.index[rev][7]
895 895 except IndexError:
896 896 if rev == wdirrev:
897 897 raise error.WdirUnsupported
898 898 raise
899 899
900 900 # Derived from index values.
901 901
902 902 def end(self, rev):
903 903 return self.start(rev) + self.length(rev)
904 904
905 905 def parents(self, node):
906 906 i = self.index
907 907 d = i[self.rev(node)]
908 908 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
909 909
910 910 def chainlen(self, rev):
911 911 return self._chaininfo(rev)[0]
912 912
913 913 def _chaininfo(self, rev):
914 914 chaininfocache = self._chaininfocache
915 915 if rev in chaininfocache:
916 916 return chaininfocache[rev]
917 917 index = self.index
918 918 generaldelta = self._generaldelta
919 919 iterrev = rev
920 920 e = index[iterrev]
921 921 clen = 0
922 922 compresseddeltalen = 0
923 923 while iterrev != e[3]:
924 924 clen += 1
925 925 compresseddeltalen += e[1]
926 926 if generaldelta:
927 927 iterrev = e[3]
928 928 else:
929 929 iterrev -= 1
930 930 if iterrev in chaininfocache:
931 931 t = chaininfocache[iterrev]
932 932 clen += t[0]
933 933 compresseddeltalen += t[1]
934 934 break
935 935 e = index[iterrev]
936 936 else:
937 937 # Add text length of base since decompressing that also takes
938 938 # work. For cache hits the length is already included.
939 939 compresseddeltalen += e[1]
940 940 r = (clen, compresseddeltalen)
941 941 chaininfocache[rev] = r
942 942 return r
943 943
944 944 def _deltachain(self, rev, stoprev=None):
945 945 """Obtain the delta chain for a revision.
946 946
947 947 ``stoprev`` specifies a revision to stop at. If not specified, we
948 948 stop at the base of the chain.
949 949
950 950 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
951 951 revs in ascending order and ``stopped`` is a bool indicating whether
952 952 ``stoprev`` was hit.
953 953 """
954 954 # Try C implementation.
955 955 try:
956 956 return self.index.deltachain(rev, stoprev, self._generaldelta)
957 957 except AttributeError:
958 958 pass
959 959
960 960 chain = []
961 961
962 962 # Alias to prevent attribute lookup in tight loop.
963 963 index = self.index
964 964 generaldelta = self._generaldelta
965 965
966 966 iterrev = rev
967 967 e = index[iterrev]
968 968 while iterrev != e[3] and iterrev != stoprev:
969 969 chain.append(iterrev)
970 970 if generaldelta:
971 971 iterrev = e[3]
972 972 else:
973 973 iterrev -= 1
974 974 e = index[iterrev]
975 975
976 976 if iterrev == stoprev:
977 977 stopped = True
978 978 else:
979 979 chain.append(iterrev)
980 980 stopped = False
981 981
982 982 chain.reverse()
983 983 return chain, stopped
984 984
985 985 def ancestors(self, revs, stoprev=0, inclusive=False):
986 986 """Generate the ancestors of 'revs' in reverse topological order.
987 987 Does not generate revs lower than stoprev.
988 988
989 989 See the documentation for ancestor.lazyancestors for more details."""
990 990
991 991 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
992 992 inclusive=inclusive)
993 993
994 994 def descendants(self, revs):
995 995 """Generate the descendants of 'revs' in revision order.
996 996
997 997 Yield a sequence of revision numbers starting with a child of
998 998 some rev in revs, i.e., each revision is *not* considered a
999 999 descendant of itself. Results are ordered by revision number (a
1000 1000 topological sort)."""
1001 1001 first = min(revs)
1002 1002 if first == nullrev:
1003 1003 for i in self:
1004 1004 yield i
1005 1005 return
1006 1006
1007 1007 seen = set(revs)
1008 1008 for i in self.revs(start=first + 1):
1009 1009 for x in self.parentrevs(i):
1010 1010 if x != nullrev and x in seen:
1011 1011 seen.add(i)
1012 1012 yield i
1013 1013 break
1014 1014
1015 1015 def findcommonmissing(self, common=None, heads=None):
1016 1016 """Return a tuple of the ancestors of common and the ancestors of heads
1017 1017 that are not ancestors of common. In revset terminology, we return the
1018 1018 tuple:
1019 1019
1020 1020 ::common, (::heads) - (::common)
1021 1021
1022 1022 The list is sorted by revision number, meaning it is
1023 1023 topologically sorted.
1024 1024
1025 1025 'heads' and 'common' are both lists of node IDs. If heads is
1026 1026 not supplied, uses all of the revlog's heads. If common is not
1027 1027 supplied, uses nullid."""
1028 1028 if common is None:
1029 1029 common = [nullid]
1030 1030 if heads is None:
1031 1031 heads = self.heads()
1032 1032
1033 1033 common = [self.rev(n) for n in common]
1034 1034 heads = [self.rev(n) for n in heads]
1035 1035
1036 1036 # we want the ancestors, but inclusive
1037 1037 class lazyset(object):
1038 1038 def __init__(self, lazyvalues):
1039 1039 self.addedvalues = set()
1040 1040 self.lazyvalues = lazyvalues
1041 1041
1042 1042 def __contains__(self, value):
1043 1043 return value in self.addedvalues or value in self.lazyvalues
1044 1044
1045 1045 def __iter__(self):
1046 1046 added = self.addedvalues
1047 1047 for r in added:
1048 1048 yield r
1049 1049 for r in self.lazyvalues:
1050 1050 if not r in added:
1051 1051 yield r
1052 1052
1053 1053 def add(self, value):
1054 1054 self.addedvalues.add(value)
1055 1055
1056 1056 def update(self, values):
1057 1057 self.addedvalues.update(values)
1058 1058
1059 1059 has = lazyset(self.ancestors(common))
1060 1060 has.add(nullrev)
1061 1061 has.update(common)
1062 1062
1063 1063 # take all ancestors from heads that aren't in has
1064 1064 missing = set()
1065 1065 visit = collections.deque(r for r in heads if r not in has)
1066 1066 while visit:
1067 1067 r = visit.popleft()
1068 1068 if r in missing:
1069 1069 continue
1070 1070 else:
1071 1071 missing.add(r)
1072 1072 for p in self.parentrevs(r):
1073 1073 if p not in has:
1074 1074 visit.append(p)
1075 1075 missing = list(missing)
1076 1076 missing.sort()
1077 1077 return has, [self.node(miss) for miss in missing]
1078 1078
1079 1079 def incrementalmissingrevs(self, common=None):
1080 1080 """Return an object that can be used to incrementally compute the
1081 1081 revision numbers of the ancestors of arbitrary sets that are not
1082 1082 ancestors of common. This is an ancestor.incrementalmissingancestors
1083 1083 object.
1084 1084
1085 1085 'common' is a list of revision numbers. If common is not supplied, uses
1086 1086 nullrev.
1087 1087 """
1088 1088 if common is None:
1089 1089 common = [nullrev]
1090 1090
1091 1091 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1092 1092
1093 1093 def findmissingrevs(self, common=None, heads=None):
1094 1094 """Return the revision numbers of the ancestors of heads that
1095 1095 are not ancestors of common.
1096 1096
1097 1097 More specifically, return a list of revision numbers corresponding to
1098 1098 nodes N such that every N satisfies the following constraints:
1099 1099
1100 1100 1. N is an ancestor of some node in 'heads'
1101 1101 2. N is not an ancestor of any node in 'common'
1102 1102
1103 1103 The list is sorted by revision number, meaning it is
1104 1104 topologically sorted.
1105 1105
1106 1106 'heads' and 'common' are both lists of revision numbers. If heads is
1107 1107 not supplied, uses all of the revlog's heads. If common is not
1108 1108 supplied, uses nullid."""
1109 1109 if common is None:
1110 1110 common = [nullrev]
1111 1111 if heads is None:
1112 1112 heads = self.headrevs()
1113 1113
1114 1114 inc = self.incrementalmissingrevs(common=common)
1115 1115 return inc.missingancestors(heads)
1116 1116
1117 1117 def findmissing(self, common=None, heads=None):
1118 1118 """Return the ancestors of heads that are not ancestors of common.
1119 1119
1120 1120 More specifically, return a list of nodes N such that every N
1121 1121 satisfies the following constraints:
1122 1122
1123 1123 1. N is an ancestor of some node in 'heads'
1124 1124 2. N is not an ancestor of any node in 'common'
1125 1125
1126 1126 The list is sorted by revision number, meaning it is
1127 1127 topologically sorted.
1128 1128
1129 1129 'heads' and 'common' are both lists of node IDs. If heads is
1130 1130 not supplied, uses all of the revlog's heads. If common is not
1131 1131 supplied, uses nullid."""
1132 1132 if common is None:
1133 1133 common = [nullid]
1134 1134 if heads is None:
1135 1135 heads = self.heads()
1136 1136
1137 1137 common = [self.rev(n) for n in common]
1138 1138 heads = [self.rev(n) for n in heads]
1139 1139
1140 1140 inc = self.incrementalmissingrevs(common=common)
1141 1141 return [self.node(r) for r in inc.missingancestors(heads)]
1142 1142
1143 1143 def nodesbetween(self, roots=None, heads=None):
1144 1144 """Return a topological path from 'roots' to 'heads'.
1145 1145
1146 1146 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1147 1147 topologically sorted list of all nodes N that satisfy both of
1148 1148 these constraints:
1149 1149
1150 1150 1. N is a descendant of some node in 'roots'
1151 1151 2. N is an ancestor of some node in 'heads'
1152 1152
1153 1153 Every node is considered to be both a descendant and an ancestor
1154 1154 of itself, so every reachable node in 'roots' and 'heads' will be
1155 1155 included in 'nodes'.
1156 1156
1157 1157 'outroots' is the list of reachable nodes in 'roots', i.e., the
1158 1158 subset of 'roots' that is returned in 'nodes'. Likewise,
1159 1159 'outheads' is the subset of 'heads' that is also in 'nodes'.
1160 1160
1161 1161 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1162 1162 unspecified, uses nullid as the only root. If 'heads' is
1163 1163 unspecified, uses list of all of the revlog's heads."""
1164 1164 nonodes = ([], [], [])
1165 1165 if roots is not None:
1166 1166 roots = list(roots)
1167 1167 if not roots:
1168 1168 return nonodes
1169 1169 lowestrev = min([self.rev(n) for n in roots])
1170 1170 else:
1171 1171 roots = [nullid] # Everybody's a descendant of nullid
1172 1172 lowestrev = nullrev
1173 1173 if (lowestrev == nullrev) and (heads is None):
1174 1174 # We want _all_ the nodes!
1175 1175 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1176 1176 if heads is None:
1177 1177 # All nodes are ancestors, so the latest ancestor is the last
1178 1178 # node.
1179 1179 highestrev = len(self) - 1
1180 1180 # Set ancestors to None to signal that every node is an ancestor.
1181 1181 ancestors = None
1182 1182 # Set heads to an empty dictionary for later discovery of heads
1183 1183 heads = {}
1184 1184 else:
1185 1185 heads = list(heads)
1186 1186 if not heads:
1187 1187 return nonodes
1188 1188 ancestors = set()
1189 1189 # Turn heads into a dictionary so we can remove 'fake' heads.
1190 1190 # Also, later we will be using it to filter out the heads we can't
1191 1191 # find from roots.
1192 1192 heads = dict.fromkeys(heads, False)
1193 1193 # Start at the top and keep marking parents until we're done.
1194 1194 nodestotag = set(heads)
1195 1195 # Remember where the top was so we can use it as a limit later.
1196 1196 highestrev = max([self.rev(n) for n in nodestotag])
1197 1197 while nodestotag:
1198 1198 # grab a node to tag
1199 1199 n = nodestotag.pop()
1200 1200 # Never tag nullid
1201 1201 if n == nullid:
1202 1202 continue
1203 1203 # A node's revision number represents its place in a
1204 1204 # topologically sorted list of nodes.
1205 1205 r = self.rev(n)
1206 1206 if r >= lowestrev:
1207 1207 if n not in ancestors:
1208 1208 # If we are possibly a descendant of one of the roots
1209 1209 # and we haven't already been marked as an ancestor
1210 1210 ancestors.add(n) # Mark as ancestor
1211 1211 # Add non-nullid parents to list of nodes to tag.
1212 1212 nodestotag.update([p for p in self.parents(n) if
1213 1213 p != nullid])
1214 1214 elif n in heads: # We've seen it before, is it a fake head?
1215 1215 # So it is, real heads should not be the ancestors of
1216 1216 # any other heads.
1217 1217 heads.pop(n)
1218 1218 if not ancestors:
1219 1219 return nonodes
1220 1220 # Now that we have our set of ancestors, we want to remove any
1221 1221 # roots that are not ancestors.
1222 1222
1223 1223 # If one of the roots was nullid, everything is included anyway.
1224 1224 if lowestrev > nullrev:
1225 1225 # But, since we weren't, let's recompute the lowest rev to not
1226 1226 # include roots that aren't ancestors.
1227 1227
1228 1228 # Filter out roots that aren't ancestors of heads
1229 1229 roots = [root for root in roots if root in ancestors]
1230 1230 # Recompute the lowest revision
1231 1231 if roots:
1232 1232 lowestrev = min([self.rev(root) for root in roots])
1233 1233 else:
1234 1234 # No more roots? Return empty list
1235 1235 return nonodes
1236 1236 else:
1237 1237 # We are descending from nullid, and don't need to care about
1238 1238 # any other roots.
1239 1239 lowestrev = nullrev
1240 1240 roots = [nullid]
1241 1241 # Transform our roots list into a set.
1242 1242 descendants = set(roots)
1243 1243 # Also, keep the original roots so we can filter out roots that aren't
1244 1244 # 'real' roots (i.e. are descended from other roots).
1245 1245 roots = descendants.copy()
1246 1246 # Our topologically sorted list of output nodes.
1247 1247 orderedout = []
1248 1248 # Don't start at nullid since we don't want nullid in our output list,
1249 1249 # and if nullid shows up in descendants, empty parents will look like
1250 1250 # they're descendants.
1251 1251 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1252 1252 n = self.node(r)
1253 1253 isdescendant = False
1254 1254 if lowestrev == nullrev: # Everybody is a descendant of nullid
1255 1255 isdescendant = True
1256 1256 elif n in descendants:
1257 1257 # n is already a descendant
1258 1258 isdescendant = True
1259 1259 # This check only needs to be done here because all the roots
1260 1260 # will start being marked is descendants before the loop.
1261 1261 if n in roots:
1262 1262 # If n was a root, check if it's a 'real' root.
1263 1263 p = tuple(self.parents(n))
1264 1264 # If any of its parents are descendants, it's not a root.
1265 1265 if (p[0] in descendants) or (p[1] in descendants):
1266 1266 roots.remove(n)
1267 1267 else:
1268 1268 p = tuple(self.parents(n))
1269 1269 # A node is a descendant if either of its parents are
1270 1270 # descendants. (We seeded the dependents list with the roots
1271 1271 # up there, remember?)
1272 1272 if (p[0] in descendants) or (p[1] in descendants):
1273 1273 descendants.add(n)
1274 1274 isdescendant = True
1275 1275 if isdescendant and ((ancestors is None) or (n in ancestors)):
1276 1276 # Only include nodes that are both descendants and ancestors.
1277 1277 orderedout.append(n)
1278 1278 if (ancestors is not None) and (n in heads):
1279 1279 # We're trying to figure out which heads are reachable
1280 1280 # from roots.
1281 1281 # Mark this head as having been reached
1282 1282 heads[n] = True
1283 1283 elif ancestors is None:
1284 1284 # Otherwise, we're trying to discover the heads.
1285 1285 # Assume this is a head because if it isn't, the next step
1286 1286 # will eventually remove it.
1287 1287 heads[n] = True
1288 1288 # But, obviously its parents aren't.
1289 1289 for p in self.parents(n):
1290 1290 heads.pop(p, None)
1291 1291 heads = [head for head, flag in heads.iteritems() if flag]
1292 1292 roots = list(roots)
1293 1293 assert orderedout
1294 1294 assert roots
1295 1295 assert heads
1296 1296 return (orderedout, roots, heads)
1297 1297
1298 1298 def headrevs(self):
1299 1299 try:
1300 1300 return self.index.headrevs()
1301 1301 except AttributeError:
1302 1302 return self._headrevs()
1303 1303
1304 1304 def computephases(self, roots):
1305 1305 return self.index.computephasesmapsets(roots)
1306 1306
1307 1307 def _headrevs(self):
1308 1308 count = len(self)
1309 1309 if not count:
1310 1310 return [nullrev]
1311 1311 # we won't iter over filtered rev so nobody is a head at start
1312 1312 ishead = [0] * (count + 1)
1313 1313 index = self.index
1314 1314 for r in self:
1315 1315 ishead[r] = 1 # I may be an head
1316 1316 e = index[r]
1317 1317 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1318 1318 return [r for r, val in enumerate(ishead) if val]
1319 1319
1320 1320 def heads(self, start=None, stop=None):
1321 1321 """return the list of all nodes that have no children
1322 1322
1323 1323 if start is specified, only heads that are descendants of
1324 1324 start will be returned
1325 1325 if stop is specified, it will consider all the revs from stop
1326 1326 as if they had no children
1327 1327 """
1328 1328 if start is None and stop is None:
1329 1329 if not len(self):
1330 1330 return [nullid]
1331 1331 return [self.node(r) for r in self.headrevs()]
1332 1332
1333 1333 if start is None:
1334 1334 start = nullid
1335 1335 if stop is None:
1336 1336 stop = []
1337 1337 stoprevs = set([self.rev(n) for n in stop])
1338 1338 startrev = self.rev(start)
1339 1339 reachable = {startrev}
1340 1340 heads = {startrev}
1341 1341
1342 1342 parentrevs = self.parentrevs
1343 1343 for r in self.revs(start=startrev + 1):
1344 1344 for p in parentrevs(r):
1345 1345 if p in reachable:
1346 1346 if r not in stoprevs:
1347 1347 reachable.add(r)
1348 1348 heads.add(r)
1349 1349 if p in heads and p not in stoprevs:
1350 1350 heads.remove(p)
1351 1351
1352 1352 return [self.node(r) for r in heads]
1353 1353
1354 1354 def children(self, node):
1355 1355 """find the children of a given node"""
1356 1356 c = []
1357 1357 p = self.rev(node)
1358 1358 for r in self.revs(start=p + 1):
1359 1359 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1360 1360 if prevs:
1361 1361 for pr in prevs:
1362 1362 if pr == p:
1363 1363 c.append(self.node(r))
1364 1364 elif p == nullrev:
1365 1365 c.append(self.node(r))
1366 1366 return c
1367 1367
1368 1368 def descendant(self, start, end):
1369 1369 if start == nullrev:
1370 1370 return True
1371 1371 for i in self.descendants([start]):
1372 1372 if i == end:
1373 1373 return True
1374 1374 elif i > end:
1375 1375 break
1376 1376 return False
1377 1377
1378 1378 def commonancestorsheads(self, a, b):
1379 1379 """calculate all the heads of the common ancestors of nodes a and b"""
1380 1380 a, b = self.rev(a), self.rev(b)
1381 1381 try:
1382 1382 ancs = self.index.commonancestorsheads(a, b)
1383 1383 except (AttributeError, OverflowError): # C implementation failed
1384 1384 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1385 1385 return pycompat.maplist(self.node, ancs)
1386 1386
1387 1387 def isancestor(self, a, b):
1388 1388 """return True if node a is an ancestor of node b
1389 1389
1390 1390 The implementation of this is trivial but the use of
1391 1391 commonancestorsheads is not."""
1392 1392 return a in self.commonancestorsheads(a, b)
1393 1393
1394 1394 def ancestor(self, a, b):
1395 1395 """calculate the "best" common ancestor of nodes a and b"""
1396 1396
1397 1397 a, b = self.rev(a), self.rev(b)
1398 1398 try:
1399 1399 ancs = self.index.ancestors(a, b)
1400 1400 except (AttributeError, OverflowError):
1401 1401 ancs = ancestor.ancestors(self.parentrevs, a, b)
1402 1402 if ancs:
1403 1403 # choose a consistent winner when there's a tie
1404 1404 return min(map(self.node, ancs))
1405 1405 return nullid
1406 1406
1407 1407 def _match(self, id):
1408 1408 if isinstance(id, int):
1409 1409 # rev
1410 1410 return self.node(id)
1411 1411 if len(id) == 20:
1412 1412 # possibly a binary node
1413 1413 # odds of a binary node being all hex in ASCII are 1 in 10**25
1414 1414 try:
1415 1415 node = id
1416 1416 self.rev(node) # quick search the index
1417 1417 return node
1418 1418 except LookupError:
1419 1419 pass # may be partial hex id
1420 1420 try:
1421 1421 # str(rev)
1422 1422 rev = int(id)
1423 1423 if "%d" % rev != id:
1424 1424 raise ValueError
1425 1425 if rev < 0:
1426 1426 rev = len(self) + rev
1427 1427 if rev < 0 or rev >= len(self):
1428 1428 raise ValueError
1429 1429 return self.node(rev)
1430 1430 except (ValueError, OverflowError):
1431 1431 pass
1432 1432 if len(id) == 40:
1433 1433 try:
1434 1434 # a full hex nodeid?
1435 1435 node = bin(id)
1436 1436 self.rev(node)
1437 1437 return node
1438 1438 except (TypeError, LookupError):
1439 1439 pass
1440 1440
1441 1441 def _partialmatch(self, id):
1442 1442 # we don't care wdirfilenodeids as they should be always full hash
1443 1443 maybewdir = wdirhex.startswith(id)
1444 1444 try:
1445 1445 partial = self.index.partialmatch(id)
1446 1446 if partial and self.hasnode(partial):
1447 1447 if maybewdir:
1448 1448 # single 'ff...' match in radix tree, ambiguous with wdir
1449 1449 raise RevlogError
1450 1450 return partial
1451 1451 if maybewdir:
1452 1452 # no 'ff...' match in radix tree, wdir identified
1453 1453 raise error.WdirUnsupported
1454 1454 return None
1455 1455 except RevlogError:
1456 1456 # parsers.c radix tree lookup gave multiple matches
1457 1457 # fast path: for unfiltered changelog, radix tree is accurate
1458 1458 if not getattr(self, 'filteredrevs', None):
1459 1459 raise LookupError(id, self.indexfile,
1460 1460 _('ambiguous identifier'))
1461 1461 # fall through to slow path that filters hidden revisions
1462 1462 except (AttributeError, ValueError):
1463 1463 # we are pure python, or key was too short to search radix tree
1464 1464 pass
1465 1465
1466 1466 if id in self._pcache:
1467 1467 return self._pcache[id]
1468 1468
1469 1469 if len(id) <= 40:
1470 1470 try:
1471 1471 # hex(node)[:...]
1472 1472 l = len(id) // 2 # grab an even number of digits
1473 1473 prefix = bin(id[:l * 2])
1474 1474 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1475 1475 nl = [n for n in nl if hex(n).startswith(id) and
1476 1476 self.hasnode(n)]
1477 1477 if len(nl) > 0:
1478 1478 if len(nl) == 1 and not maybewdir:
1479 1479 self._pcache[id] = nl[0]
1480 1480 return nl[0]
1481 1481 raise LookupError(id, self.indexfile,
1482 1482 _('ambiguous identifier'))
1483 1483 if maybewdir:
1484 1484 raise error.WdirUnsupported
1485 1485 return None
1486 1486 except TypeError:
1487 1487 pass
1488 1488
1489 1489 def lookup(self, id):
1490 1490 """locate a node based on:
1491 1491 - revision number or str(revision number)
1492 1492 - nodeid or subset of hex nodeid
1493 1493 """
1494 1494 n = self._match(id)
1495 1495 if n is not None:
1496 1496 return n
1497 1497 n = self._partialmatch(id)
1498 1498 if n:
1499 1499 return n
1500 1500
1501 1501 raise LookupError(id, self.indexfile, _('no match found'))
1502 1502
1503 1503 def shortest(self, node, minlength=1):
1504 1504 """Find the shortest unambiguous prefix that matches node."""
1505 1505 def isrev(prefix):
1506 1506 try:
1507 1507 i = int(prefix)
1508 1508 # if we are a pure int, then starting with zero will not be
1509 1509 # confused as a rev; or, obviously, if the int is larger
1510 1510 # than the value of the tip rev
1511 1511 if prefix[0] == '0' or i > len(self):
1512 1512 return False
1513 1513 return True
1514 1514 except ValueError:
1515 1515 return False
1516 1516
1517 1517 def isvalid(prefix):
1518 1518 try:
1519 1519 node = self._partialmatch(prefix)
1520 1520 except error.RevlogError:
1521 1521 return False
1522 1522 except error.WdirUnsupported:
1523 1523 # single 'ff...' match
1524 1524 return True
1525 1525 if node is None:
1526 1526 raise LookupError(node, self.indexfile, _('no node'))
1527 1527 return not isrev(prefix)
1528 1528
1529 def maybewdir(prefix):
1530 return all(c == 'f' for c in prefix)
1531
1529 1532 hexnode = hex(node)
1533
1534 def disambiguate(hexnode, minlength):
1535 for length in range(minlength, 41):
1536 prefix = hexnode[:length]
1537 if not isrev(prefix) and not maybewdir(prefix):
1538 return prefix
1539
1540 if not getattr(self, 'filteredrevs', None):
1541 try:
1542 length = max(self.index.shortest(node), minlength)
1543 return disambiguate(hexnode, length)
1544 except RevlogError:
1545 if node == wdirid:
1546 for length in range(minlength, 41):
1547 prefix = hexnode[:length]
1548 if isvalid(prefix):
1549 return prefix
1550 else:
1551 raise LookupError(node, self.indexfile, _('no node'))
1552 except AttributeError:
1553 # Fall through to pure code
1554 pass
1555
1530 1556 shortest = hexnode
1531 1557 startlength = max(6, minlength)
1532 1558 length = startlength
1533 1559 while True:
1534 1560 prefix = hexnode[:length]
1535 1561 if isvalid(prefix):
1536 1562 shortest = prefix
1537 1563 if length == minlength or length > startlength:
1538 1564 return shortest
1539 1565 length -= 1
1540 1566 else:
1541 1567 length += 1
1542 1568 if len(shortest) <= length:
1543 1569 return shortest
1544 1570
1545 1571 def cmp(self, node, text):
1546 1572 """compare text with a given file revision
1547 1573
1548 1574 returns True if text is different than what is stored.
1549 1575 """
1550 1576 p1, p2 = self.parents(node)
1551 1577 return hash(text, p1, p2) != node
1552 1578
1553 1579 def _cachesegment(self, offset, data):
1554 1580 """Add a segment to the revlog cache.
1555 1581
1556 1582 Accepts an absolute offset and the data that is at that location.
1557 1583 """
1558 1584 o, d = self._chunkcache
1559 1585 # try to add to existing cache
1560 1586 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1561 1587 self._chunkcache = o, d + data
1562 1588 else:
1563 1589 self._chunkcache = offset, data
1564 1590
1565 1591 def _readsegment(self, offset, length, df=None):
1566 1592 """Load a segment of raw data from the revlog.
1567 1593
1568 1594 Accepts an absolute offset, length to read, and an optional existing
1569 1595 file handle to read from.
1570 1596
1571 1597 If an existing file handle is passed, it will be seeked and the
1572 1598 original seek position will NOT be restored.
1573 1599
1574 1600 Returns a str or buffer of raw byte data.
1575 1601 """
1576 1602 # Cache data both forward and backward around the requested
1577 1603 # data, in a fixed size window. This helps speed up operations
1578 1604 # involving reading the revlog backwards.
1579 1605 cachesize = self._chunkcachesize
1580 1606 realoffset = offset & ~(cachesize - 1)
1581 1607 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1582 1608 - realoffset)
1583 1609 with self._datareadfp(df) as df:
1584 1610 df.seek(realoffset)
1585 1611 d = df.read(reallength)
1586 1612 self._cachesegment(realoffset, d)
1587 1613 if offset != realoffset or reallength != length:
1588 1614 return util.buffer(d, offset - realoffset, length)
1589 1615 return d
1590 1616
1591 1617 def _getsegment(self, offset, length, df=None):
1592 1618 """Obtain a segment of raw data from the revlog.
1593 1619
1594 1620 Accepts an absolute offset, length of bytes to obtain, and an
1595 1621 optional file handle to the already-opened revlog. If the file
1596 1622 handle is used, it's original seek position will not be preserved.
1597 1623
1598 1624 Requests for data may be returned from a cache.
1599 1625
1600 1626 Returns a str or a buffer instance of raw byte data.
1601 1627 """
1602 1628 o, d = self._chunkcache
1603 1629 l = len(d)
1604 1630
1605 1631 # is it in the cache?
1606 1632 cachestart = offset - o
1607 1633 cacheend = cachestart + length
1608 1634 if cachestart >= 0 and cacheend <= l:
1609 1635 if cachestart == 0 and cacheend == l:
1610 1636 return d # avoid a copy
1611 1637 return util.buffer(d, cachestart, cacheend - cachestart)
1612 1638
1613 1639 return self._readsegment(offset, length, df=df)
1614 1640
1615 1641 def _getsegmentforrevs(self, startrev, endrev, df=None):
1616 1642 """Obtain a segment of raw data corresponding to a range of revisions.
1617 1643
1618 1644 Accepts the start and end revisions and an optional already-open
1619 1645 file handle to be used for reading. If the file handle is read, its
1620 1646 seek position will not be preserved.
1621 1647
1622 1648 Requests for data may be satisfied by a cache.
1623 1649
1624 1650 Returns a 2-tuple of (offset, data) for the requested range of
1625 1651 revisions. Offset is the integer offset from the beginning of the
1626 1652 revlog and data is a str or buffer of the raw byte data.
1627 1653
1628 1654 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1629 1655 to determine where each revision's data begins and ends.
1630 1656 """
1631 1657 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1632 1658 # (functions are expensive).
1633 1659 index = self.index
1634 1660 istart = index[startrev]
1635 1661 start = int(istart[0] >> 16)
1636 1662 if startrev == endrev:
1637 1663 end = start + istart[1]
1638 1664 else:
1639 1665 iend = index[endrev]
1640 1666 end = int(iend[0] >> 16) + iend[1]
1641 1667
1642 1668 if self._inline:
1643 1669 start += (startrev + 1) * self._io.size
1644 1670 end += (endrev + 1) * self._io.size
1645 1671 length = end - start
1646 1672
1647 1673 return start, self._getsegment(start, length, df=df)
1648 1674
1649 1675 def _chunk(self, rev, df=None):
1650 1676 """Obtain a single decompressed chunk for a revision.
1651 1677
1652 1678 Accepts an integer revision and an optional already-open file handle
1653 1679 to be used for reading. If used, the seek position of the file will not
1654 1680 be preserved.
1655 1681
1656 1682 Returns a str holding uncompressed data for the requested revision.
1657 1683 """
1658 1684 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1659 1685
1660 1686 def _chunks(self, revs, df=None):
1661 1687 """Obtain decompressed chunks for the specified revisions.
1662 1688
1663 1689 Accepts an iterable of numeric revisions that are assumed to be in
1664 1690 ascending order. Also accepts an optional already-open file handle
1665 1691 to be used for reading. If used, the seek position of the file will
1666 1692 not be preserved.
1667 1693
1668 1694 This function is similar to calling ``self._chunk()`` multiple times,
1669 1695 but is faster.
1670 1696
1671 1697 Returns a list with decompressed data for each requested revision.
1672 1698 """
1673 1699 if not revs:
1674 1700 return []
1675 1701 start = self.start
1676 1702 length = self.length
1677 1703 inline = self._inline
1678 1704 iosize = self._io.size
1679 1705 buffer = util.buffer
1680 1706
1681 1707 l = []
1682 1708 ladd = l.append
1683 1709
1684 1710 if not self._withsparseread:
1685 1711 slicedchunks = (revs,)
1686 1712 else:
1687 1713 slicedchunks = _slicechunk(self, revs)
1688 1714
1689 1715 for revschunk in slicedchunks:
1690 1716 firstrev = revschunk[0]
1691 1717 # Skip trailing revisions with empty diff
1692 1718 for lastrev in revschunk[::-1]:
1693 1719 if length(lastrev) != 0:
1694 1720 break
1695 1721
1696 1722 try:
1697 1723 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1698 1724 except OverflowError:
1699 1725 # issue4215 - we can't cache a run of chunks greater than
1700 1726 # 2G on Windows
1701 1727 return [self._chunk(rev, df=df) for rev in revschunk]
1702 1728
1703 1729 decomp = self.decompress
1704 1730 for rev in revschunk:
1705 1731 chunkstart = start(rev)
1706 1732 if inline:
1707 1733 chunkstart += (rev + 1) * iosize
1708 1734 chunklength = length(rev)
1709 1735 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1710 1736
1711 1737 return l
1712 1738
1713 1739 def _chunkclear(self):
1714 1740 """Clear the raw chunk cache."""
1715 1741 self._chunkcache = (0, '')
1716 1742
1717 1743 def deltaparent(self, rev):
1718 1744 """return deltaparent of the given revision"""
1719 1745 base = self.index[rev][3]
1720 1746 if base == rev:
1721 1747 return nullrev
1722 1748 elif self._generaldelta:
1723 1749 return base
1724 1750 else:
1725 1751 return rev - 1
1726 1752
1727 1753 def revdiff(self, rev1, rev2):
1728 1754 """return or calculate a delta between two revisions
1729 1755
1730 1756 The delta calculated is in binary form and is intended to be written to
1731 1757 revlog data directly. So this function needs raw revision data.
1732 1758 """
1733 1759 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1734 1760 return bytes(self._chunk(rev2))
1735 1761
1736 1762 return mdiff.textdiff(self.revision(rev1, raw=True),
1737 1763 self.revision(rev2, raw=True))
1738 1764
1739 1765 def revision(self, nodeorrev, _df=None, raw=False):
1740 1766 """return an uncompressed revision of a given node or revision
1741 1767 number.
1742 1768
1743 1769 _df - an existing file handle to read from. (internal-only)
1744 1770 raw - an optional argument specifying if the revision data is to be
1745 1771 treated as raw data when applying flag transforms. 'raw' should be set
1746 1772 to True when generating changegroups or in debug commands.
1747 1773 """
1748 1774 if isinstance(nodeorrev, int):
1749 1775 rev = nodeorrev
1750 1776 node = self.node(rev)
1751 1777 else:
1752 1778 node = nodeorrev
1753 1779 rev = None
1754 1780
1755 1781 cachedrev = None
1756 1782 flags = None
1757 1783 rawtext = None
1758 1784 if node == nullid:
1759 1785 return ""
1760 1786 if self._cache:
1761 1787 if self._cache[0] == node:
1762 1788 # _cache only stores rawtext
1763 1789 if raw:
1764 1790 return self._cache[2]
1765 1791 # duplicated, but good for perf
1766 1792 if rev is None:
1767 1793 rev = self.rev(node)
1768 1794 if flags is None:
1769 1795 flags = self.flags(rev)
1770 1796 # no extra flags set, no flag processor runs, text = rawtext
1771 1797 if flags == REVIDX_DEFAULT_FLAGS:
1772 1798 return self._cache[2]
1773 1799 # rawtext is reusable. need to run flag processor
1774 1800 rawtext = self._cache[2]
1775 1801
1776 1802 cachedrev = self._cache[1]
1777 1803
1778 1804 # look up what we need to read
1779 1805 if rawtext is None:
1780 1806 if rev is None:
1781 1807 rev = self.rev(node)
1782 1808
1783 1809 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1784 1810 if stopped:
1785 1811 rawtext = self._cache[2]
1786 1812
1787 1813 # drop cache to save memory
1788 1814 self._cache = None
1789 1815
1790 1816 bins = self._chunks(chain, df=_df)
1791 1817 if rawtext is None:
1792 1818 rawtext = bytes(bins[0])
1793 1819 bins = bins[1:]
1794 1820
1795 1821 rawtext = mdiff.patches(rawtext, bins)
1796 1822 self._cache = (node, rev, rawtext)
1797 1823
1798 1824 if flags is None:
1799 1825 if rev is None:
1800 1826 rev = self.rev(node)
1801 1827 flags = self.flags(rev)
1802 1828
1803 1829 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1804 1830 if validatehash:
1805 1831 self.checkhash(text, node, rev=rev)
1806 1832
1807 1833 return text
1808 1834
1809 1835 def hash(self, text, p1, p2):
1810 1836 """Compute a node hash.
1811 1837
1812 1838 Available as a function so that subclasses can replace the hash
1813 1839 as needed.
1814 1840 """
1815 1841 return hash(text, p1, p2)
1816 1842
1817 1843 def _processflags(self, text, flags, operation, raw=False):
1818 1844 """Inspect revision data flags and applies transforms defined by
1819 1845 registered flag processors.
1820 1846
1821 1847 ``text`` - the revision data to process
1822 1848 ``flags`` - the revision flags
1823 1849 ``operation`` - the operation being performed (read or write)
1824 1850 ``raw`` - an optional argument describing if the raw transform should be
1825 1851 applied.
1826 1852
1827 1853 This method processes the flags in the order (or reverse order if
1828 1854 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1829 1855 flag processors registered for present flags. The order of flags defined
1830 1856 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1831 1857
1832 1858 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1833 1859 processed text and ``validatehash`` is a bool indicating whether the
1834 1860 returned text should be checked for hash integrity.
1835 1861
1836 1862 Note: If the ``raw`` argument is set, it has precedence over the
1837 1863 operation and will only update the value of ``validatehash``.
1838 1864 """
1839 1865 # fast path: no flag processors will run
1840 1866 if flags == 0:
1841 1867 return text, True
1842 1868 if not operation in ('read', 'write'):
1843 1869 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1844 1870 # Check all flags are known.
1845 1871 if flags & ~REVIDX_KNOWN_FLAGS:
1846 1872 raise RevlogError(_("incompatible revision flag '%#x'") %
1847 1873 (flags & ~REVIDX_KNOWN_FLAGS))
1848 1874 validatehash = True
1849 1875 # Depending on the operation (read or write), the order might be
1850 1876 # reversed due to non-commutative transforms.
1851 1877 orderedflags = REVIDX_FLAGS_ORDER
1852 1878 if operation == 'write':
1853 1879 orderedflags = reversed(orderedflags)
1854 1880
1855 1881 for flag in orderedflags:
1856 1882 # If a flagprocessor has been registered for a known flag, apply the
1857 1883 # related operation transform and update result tuple.
1858 1884 if flag & flags:
1859 1885 vhash = True
1860 1886
1861 1887 if flag not in _flagprocessors:
1862 1888 message = _("missing processor for flag '%#x'") % (flag)
1863 1889 raise RevlogError(message)
1864 1890
1865 1891 processor = _flagprocessors[flag]
1866 1892 if processor is not None:
1867 1893 readtransform, writetransform, rawtransform = processor
1868 1894
1869 1895 if raw:
1870 1896 vhash = rawtransform(self, text)
1871 1897 elif operation == 'read':
1872 1898 text, vhash = readtransform(self, text)
1873 1899 else: # write operation
1874 1900 text, vhash = writetransform(self, text)
1875 1901 validatehash = validatehash and vhash
1876 1902
1877 1903 return text, validatehash
1878 1904
1879 1905 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1880 1906 """Check node hash integrity.
1881 1907
1882 1908 Available as a function so that subclasses can extend hash mismatch
1883 1909 behaviors as needed.
1884 1910 """
1885 1911 try:
1886 1912 if p1 is None and p2 is None:
1887 1913 p1, p2 = self.parents(node)
1888 1914 if node != self.hash(text, p1, p2):
1889 1915 revornode = rev
1890 1916 if revornode is None:
1891 1917 revornode = templatefilters.short(hex(node))
1892 1918 raise RevlogError(_("integrity check failed on %s:%s")
1893 1919 % (self.indexfile, pycompat.bytestr(revornode)))
1894 1920 except RevlogError:
1895 1921 if self._censorable and _censoredtext(text):
1896 1922 raise error.CensoredNodeError(self.indexfile, node, text)
1897 1923 raise
1898 1924
1899 1925 def _enforceinlinesize(self, tr, fp=None):
1900 1926 """Check if the revlog is too big for inline and convert if so.
1901 1927
1902 1928 This should be called after revisions are added to the revlog. If the
1903 1929 revlog has grown too large to be an inline revlog, it will convert it
1904 1930 to use multiple index and data files.
1905 1931 """
1906 1932 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1907 1933 return
1908 1934
1909 1935 trinfo = tr.find(self.indexfile)
1910 1936 if trinfo is None:
1911 1937 raise RevlogError(_("%s not found in the transaction")
1912 1938 % self.indexfile)
1913 1939
1914 1940 trindex = trinfo[2]
1915 1941 if trindex is not None:
1916 1942 dataoff = self.start(trindex)
1917 1943 else:
1918 1944 # revlog was stripped at start of transaction, use all leftover data
1919 1945 trindex = len(self) - 1
1920 1946 dataoff = self.end(-2)
1921 1947
1922 1948 tr.add(self.datafile, dataoff)
1923 1949
1924 1950 if fp:
1925 1951 fp.flush()
1926 1952 fp.close()
1927 1953
1928 1954 with self._datafp('w') as df:
1929 1955 for r in self:
1930 1956 df.write(self._getsegmentforrevs(r, r)[1])
1931 1957
1932 1958 with self._indexfp('w') as fp:
1933 1959 self.version &= ~FLAG_INLINE_DATA
1934 1960 self._inline = False
1935 1961 io = self._io
1936 1962 for i in self:
1937 1963 e = io.packentry(self.index[i], self.node, self.version, i)
1938 1964 fp.write(e)
1939 1965
1940 1966 # the temp file replace the real index when we exit the context
1941 1967 # manager
1942 1968
1943 1969 tr.replace(self.indexfile, trindex * self._io.size)
1944 1970 self._chunkclear()
1945 1971
1946 1972 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1947 1973 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1948 1974 """add a revision to the log
1949 1975
1950 1976 text - the revision data to add
1951 1977 transaction - the transaction object used for rollback
1952 1978 link - the linkrev data to add
1953 1979 p1, p2 - the parent nodeids of the revision
1954 1980 cachedelta - an optional precomputed delta
1955 1981 node - nodeid of revision; typically node is not specified, and it is
1956 1982 computed by default as hash(text, p1, p2), however subclasses might
1957 1983 use different hashing method (and override checkhash() in such case)
1958 1984 flags - the known flags to set on the revision
1959 1985 deltacomputer - an optional _deltacomputer instance shared between
1960 1986 multiple calls
1961 1987 """
1962 1988 if link == nullrev:
1963 1989 raise RevlogError(_("attempted to add linkrev -1 to %s")
1964 1990 % self.indexfile)
1965 1991
1966 1992 if flags:
1967 1993 node = node or self.hash(text, p1, p2)
1968 1994
1969 1995 rawtext, validatehash = self._processflags(text, flags, 'write')
1970 1996
1971 1997 # If the flag processor modifies the revision data, ignore any provided
1972 1998 # cachedelta.
1973 1999 if rawtext != text:
1974 2000 cachedelta = None
1975 2001
1976 2002 if len(rawtext) > _maxentrysize:
1977 2003 raise RevlogError(
1978 2004 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1979 2005 % (self.indexfile, len(rawtext)))
1980 2006
1981 2007 node = node or self.hash(rawtext, p1, p2)
1982 2008 if node in self.nodemap:
1983 2009 return node
1984 2010
1985 2011 if validatehash:
1986 2012 self.checkhash(rawtext, node, p1=p1, p2=p2)
1987 2013
1988 2014 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1989 2015 flags, cachedelta=cachedelta,
1990 2016 deltacomputer=deltacomputer)
1991 2017
1992 2018 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1993 2019 cachedelta=None, deltacomputer=None):
1994 2020 """add a raw revision with known flags, node and parents
1995 2021 useful when reusing a revision not stored in this revlog (ex: received
1996 2022 over wire, or read from an external bundle).
1997 2023 """
1998 2024 dfh = None
1999 2025 if not self._inline:
2000 2026 dfh = self._datafp("a+")
2001 2027 ifh = self._indexfp("a+")
2002 2028 try:
2003 2029 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2004 2030 flags, cachedelta, ifh, dfh,
2005 2031 deltacomputer=deltacomputer)
2006 2032 finally:
2007 2033 if dfh:
2008 2034 dfh.close()
2009 2035 ifh.close()
2010 2036
2011 2037 def compress(self, data):
2012 2038 """Generate a possibly-compressed representation of data."""
2013 2039 if not data:
2014 2040 return '', data
2015 2041
2016 2042 compressed = self._compressor.compress(data)
2017 2043
2018 2044 if compressed:
2019 2045 # The revlog compressor added the header in the returned data.
2020 2046 return '', compressed
2021 2047
2022 2048 if data[0:1] == '\0':
2023 2049 return '', data
2024 2050 return 'u', data
2025 2051
2026 2052 def decompress(self, data):
2027 2053 """Decompress a revlog chunk.
2028 2054
2029 2055 The chunk is expected to begin with a header identifying the
2030 2056 format type so it can be routed to an appropriate decompressor.
2031 2057 """
2032 2058 if not data:
2033 2059 return data
2034 2060
2035 2061 # Revlogs are read much more frequently than they are written and many
2036 2062 # chunks only take microseconds to decompress, so performance is
2037 2063 # important here.
2038 2064 #
2039 2065 # We can make a few assumptions about revlogs:
2040 2066 #
2041 2067 # 1) the majority of chunks will be compressed (as opposed to inline
2042 2068 # raw data).
2043 2069 # 2) decompressing *any* data will likely by at least 10x slower than
2044 2070 # returning raw inline data.
2045 2071 # 3) we want to prioritize common and officially supported compression
2046 2072 # engines
2047 2073 #
2048 2074 # It follows that we want to optimize for "decompress compressed data
2049 2075 # when encoded with common and officially supported compression engines"
2050 2076 # case over "raw data" and "data encoded by less common or non-official
2051 2077 # compression engines." That is why we have the inline lookup first
2052 2078 # followed by the compengines lookup.
2053 2079 #
2054 2080 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2055 2081 # compressed chunks. And this matters for changelog and manifest reads.
2056 2082 t = data[0:1]
2057 2083
2058 2084 if t == 'x':
2059 2085 try:
2060 2086 return _zlibdecompress(data)
2061 2087 except zlib.error as e:
2062 2088 raise RevlogError(_('revlog decompress error: %s') %
2063 2089 stringutil.forcebytestr(e))
2064 2090 # '\0' is more common than 'u' so it goes first.
2065 2091 elif t == '\0':
2066 2092 return data
2067 2093 elif t == 'u':
2068 2094 return util.buffer(data, 1)
2069 2095
2070 2096 try:
2071 2097 compressor = self._decompressors[t]
2072 2098 except KeyError:
2073 2099 try:
2074 2100 engine = util.compengines.forrevlogheader(t)
2075 2101 compressor = engine.revlogcompressor()
2076 2102 self._decompressors[t] = compressor
2077 2103 except KeyError:
2078 2104 raise RevlogError(_('unknown compression type %r') % t)
2079 2105
2080 2106 return compressor.decompress(data)
2081 2107
2082 2108 def _isgooddeltainfo(self, d, textlen):
2083 2109 """Returns True if the given delta is good. Good means that it is within
2084 2110 the disk span, disk size, and chain length bounds that we know to be
2085 2111 performant."""
2086 2112 if d is None:
2087 2113 return False
2088 2114
2089 2115 # - 'd.distance' is the distance from the base revision -- bounding it
2090 2116 # limits the amount of I/O we need to do.
2091 2117 # - 'd.compresseddeltalen' is the sum of the total size of deltas we
2092 2118 # need to apply -- bounding it limits the amount of CPU we consume.
2093 2119
2094 2120 defaultmax = textlen * 4
2095 2121 maxdist = self._maxdeltachainspan
2096 2122 if not maxdist:
2097 2123 maxdist = d.distance # ensure the conditional pass
2098 2124 maxdist = max(maxdist, defaultmax)
2099 2125 if (d.distance > maxdist or d.deltalen > textlen or
2100 2126 d.compresseddeltalen > textlen * 2 or
2101 2127 (self._maxchainlen and d.chainlen > self._maxchainlen)):
2102 2128 return False
2103 2129
2104 2130 return True
2105 2131
2106 2132 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2107 2133 cachedelta, ifh, dfh, alwayscache=False,
2108 2134 deltacomputer=None):
2109 2135 """internal function to add revisions to the log
2110 2136
2111 2137 see addrevision for argument descriptions.
2112 2138
2113 2139 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2114 2140
2115 2141 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2116 2142 be used.
2117 2143
2118 2144 invariants:
2119 2145 - rawtext is optional (can be None); if not set, cachedelta must be set.
2120 2146 if both are set, they must correspond to each other.
2121 2147 """
2122 2148 if node == nullid:
2123 2149 raise RevlogError(_("%s: attempt to add null revision") %
2124 2150 (self.indexfile))
2125 2151 if node == wdirid or node in wdirfilenodeids:
2126 2152 raise RevlogError(_("%s: attempt to add wdir revision") %
2127 2153 (self.indexfile))
2128 2154
2129 2155 if self._inline:
2130 2156 fh = ifh
2131 2157 else:
2132 2158 fh = dfh
2133 2159
2134 2160 btext = [rawtext]
2135 2161
2136 2162 curr = len(self)
2137 2163 prev = curr - 1
2138 2164 offset = self.end(prev)
2139 2165 p1r, p2r = self.rev(p1), self.rev(p2)
2140 2166
2141 2167 # full versions are inserted when the needed deltas
2142 2168 # become comparable to the uncompressed text
2143 2169 if rawtext is None:
2144 2170 # need rawtext size, before changed by flag processors, which is
2145 2171 # the non-raw size. use revlog explicitly to avoid filelog's extra
2146 2172 # logic that might remove metadata size.
2147 2173 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2148 2174 cachedelta[1])
2149 2175 else:
2150 2176 textlen = len(rawtext)
2151 2177
2152 2178 if deltacomputer is None:
2153 2179 deltacomputer = _deltacomputer(self)
2154 2180
2155 2181 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2156 2182
2157 2183 # no delta for flag processor revision (see "candelta" for why)
2158 2184 # not calling candelta since only one revision needs test, also to
2159 2185 # avoid overhead fetching flags again.
2160 2186 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2161 2187 deltainfo = None
2162 2188 else:
2163 2189 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2164 2190
2165 2191 if deltainfo is not None:
2166 2192 base = deltainfo.base
2167 2193 chainbase = deltainfo.chainbase
2168 2194 data = deltainfo.data
2169 2195 l = deltainfo.deltalen
2170 2196 else:
2171 2197 rawtext = deltacomputer.buildtext(revinfo, fh)
2172 2198 data = self.compress(rawtext)
2173 2199 l = len(data[1]) + len(data[0])
2174 2200 base = chainbase = curr
2175 2201
2176 2202 e = (offset_type(offset, flags), l, textlen,
2177 2203 base, link, p1r, p2r, node)
2178 2204 self.index.insert(-1, e)
2179 2205 self.nodemap[node] = curr
2180 2206
2181 2207 entry = self._io.packentry(e, self.node, self.version, curr)
2182 2208 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2183 2209
2184 2210 if alwayscache and rawtext is None:
2185 2211 rawtext = deltacomputer._buildtext(revinfo, fh)
2186 2212
2187 2213 if type(rawtext) == bytes: # only accept immutable objects
2188 2214 self._cache = (node, curr, rawtext)
2189 2215 self._chainbasecache[curr] = chainbase
2190 2216 return node
2191 2217
2192 2218 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2193 2219 # Files opened in a+ mode have inconsistent behavior on various
2194 2220 # platforms. Windows requires that a file positioning call be made
2195 2221 # when the file handle transitions between reads and writes. See
2196 2222 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2197 2223 # platforms, Python or the platform itself can be buggy. Some versions
2198 2224 # of Solaris have been observed to not append at the end of the file
2199 2225 # if the file was seeked to before the end. See issue4943 for more.
2200 2226 #
2201 2227 # We work around this issue by inserting a seek() before writing.
2202 2228 # Note: This is likely not necessary on Python 3.
2203 2229 ifh.seek(0, os.SEEK_END)
2204 2230 if dfh:
2205 2231 dfh.seek(0, os.SEEK_END)
2206 2232
2207 2233 curr = len(self) - 1
2208 2234 if not self._inline:
2209 2235 transaction.add(self.datafile, offset)
2210 2236 transaction.add(self.indexfile, curr * len(entry))
2211 2237 if data[0]:
2212 2238 dfh.write(data[0])
2213 2239 dfh.write(data[1])
2214 2240 ifh.write(entry)
2215 2241 else:
2216 2242 offset += curr * self._io.size
2217 2243 transaction.add(self.indexfile, offset, curr)
2218 2244 ifh.write(entry)
2219 2245 ifh.write(data[0])
2220 2246 ifh.write(data[1])
2221 2247 self._enforceinlinesize(transaction, ifh)
2222 2248
2223 2249 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2224 2250 """
2225 2251 add a delta group
2226 2252
2227 2253 given a set of deltas, add them to the revision log. the
2228 2254 first delta is against its parent, which should be in our
2229 2255 log, the rest are against the previous delta.
2230 2256
2231 2257 If ``addrevisioncb`` is defined, it will be called with arguments of
2232 2258 this revlog and the node that was added.
2233 2259 """
2234 2260
2235 2261 nodes = []
2236 2262
2237 2263 r = len(self)
2238 2264 end = 0
2239 2265 if r:
2240 2266 end = self.end(r - 1)
2241 2267 ifh = self._indexfp("a+")
2242 2268 isize = r * self._io.size
2243 2269 if self._inline:
2244 2270 transaction.add(self.indexfile, end + isize, r)
2245 2271 dfh = None
2246 2272 else:
2247 2273 transaction.add(self.indexfile, isize, r)
2248 2274 transaction.add(self.datafile, end)
2249 2275 dfh = self._datafp("a+")
2250 2276 def flush():
2251 2277 if dfh:
2252 2278 dfh.flush()
2253 2279 ifh.flush()
2254 2280 try:
2255 2281 deltacomputer = _deltacomputer(self)
2256 2282 # loop through our set of deltas
2257 2283 for data in deltas:
2258 2284 node, p1, p2, linknode, deltabase, delta, flags = data
2259 2285 link = linkmapper(linknode)
2260 2286 flags = flags or REVIDX_DEFAULT_FLAGS
2261 2287
2262 2288 nodes.append(node)
2263 2289
2264 2290 if node in self.nodemap:
2265 2291 # this can happen if two branches make the same change
2266 2292 continue
2267 2293
2268 2294 for p in (p1, p2):
2269 2295 if p not in self.nodemap:
2270 2296 raise LookupError(p, self.indexfile,
2271 2297 _('unknown parent'))
2272 2298
2273 2299 if deltabase not in self.nodemap:
2274 2300 raise LookupError(deltabase, self.indexfile,
2275 2301 _('unknown delta base'))
2276 2302
2277 2303 baserev = self.rev(deltabase)
2278 2304
2279 2305 if baserev != nullrev and self.iscensored(baserev):
2280 2306 # if base is censored, delta must be full replacement in a
2281 2307 # single patch operation
2282 2308 hlen = struct.calcsize(">lll")
2283 2309 oldlen = self.rawsize(baserev)
2284 2310 newlen = len(delta) - hlen
2285 2311 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2286 2312 raise error.CensoredBaseError(self.indexfile,
2287 2313 self.node(baserev))
2288 2314
2289 2315 if not flags and self._peek_iscensored(baserev, delta, flush):
2290 2316 flags |= REVIDX_ISCENSORED
2291 2317
2292 2318 # We assume consumers of addrevisioncb will want to retrieve
2293 2319 # the added revision, which will require a call to
2294 2320 # revision(). revision() will fast path if there is a cache
2295 2321 # hit. So, we tell _addrevision() to always cache in this case.
2296 2322 # We're only using addgroup() in the context of changegroup
2297 2323 # generation so the revision data can always be handled as raw
2298 2324 # by the flagprocessor.
2299 2325 self._addrevision(node, None, transaction, link,
2300 2326 p1, p2, flags, (baserev, delta),
2301 2327 ifh, dfh,
2302 2328 alwayscache=bool(addrevisioncb),
2303 2329 deltacomputer=deltacomputer)
2304 2330
2305 2331 if addrevisioncb:
2306 2332 addrevisioncb(self, node)
2307 2333
2308 2334 if not dfh and not self._inline:
2309 2335 # addrevision switched from inline to conventional
2310 2336 # reopen the index
2311 2337 ifh.close()
2312 2338 dfh = self._datafp("a+")
2313 2339 ifh = self._indexfp("a+")
2314 2340 finally:
2315 2341 if dfh:
2316 2342 dfh.close()
2317 2343 ifh.close()
2318 2344
2319 2345 return nodes
2320 2346
2321 2347 def iscensored(self, rev):
2322 2348 """Check if a file revision is censored."""
2323 2349 if not self._censorable:
2324 2350 return False
2325 2351
2326 2352 return self.flags(rev) & REVIDX_ISCENSORED
2327 2353
2328 2354 def _peek_iscensored(self, baserev, delta, flush):
2329 2355 """Quickly check if a delta produces a censored revision."""
2330 2356 if not self._censorable:
2331 2357 return False
2332 2358
2333 2359 # Fragile heuristic: unless new file meta keys are added alphabetically
2334 2360 # preceding "censored", all censored revisions are prefixed by
2335 2361 # "\1\ncensored:". A delta producing such a censored revision must be a
2336 2362 # full-replacement delta, so we inspect the first and only patch in the
2337 2363 # delta for this prefix.
2338 2364 hlen = struct.calcsize(">lll")
2339 2365 if len(delta) <= hlen:
2340 2366 return False
2341 2367
2342 2368 oldlen = self.rawsize(baserev)
2343 2369 newlen = len(delta) - hlen
2344 2370 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2345 2371 return False
2346 2372
2347 2373 add = "\1\ncensored:"
2348 2374 addlen = len(add)
2349 2375 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2350 2376
2351 2377 def getstrippoint(self, minlink):
2352 2378 """find the minimum rev that must be stripped to strip the linkrev
2353 2379
2354 2380 Returns a tuple containing the minimum rev and a set of all revs that
2355 2381 have linkrevs that will be broken by this strip.
2356 2382 """
2357 2383 brokenrevs = set()
2358 2384 strippoint = len(self)
2359 2385
2360 2386 heads = {}
2361 2387 futurelargelinkrevs = set()
2362 2388 for head in self.headrevs():
2363 2389 headlinkrev = self.linkrev(head)
2364 2390 heads[head] = headlinkrev
2365 2391 if headlinkrev >= minlink:
2366 2392 futurelargelinkrevs.add(headlinkrev)
2367 2393
2368 2394 # This algorithm involves walking down the rev graph, starting at the
2369 2395 # heads. Since the revs are topologically sorted according to linkrev,
2370 2396 # once all head linkrevs are below the minlink, we know there are
2371 2397 # no more revs that could have a linkrev greater than minlink.
2372 2398 # So we can stop walking.
2373 2399 while futurelargelinkrevs:
2374 2400 strippoint -= 1
2375 2401 linkrev = heads.pop(strippoint)
2376 2402
2377 2403 if linkrev < minlink:
2378 2404 brokenrevs.add(strippoint)
2379 2405 else:
2380 2406 futurelargelinkrevs.remove(linkrev)
2381 2407
2382 2408 for p in self.parentrevs(strippoint):
2383 2409 if p != nullrev:
2384 2410 plinkrev = self.linkrev(p)
2385 2411 heads[p] = plinkrev
2386 2412 if plinkrev >= minlink:
2387 2413 futurelargelinkrevs.add(plinkrev)
2388 2414
2389 2415 return strippoint, brokenrevs
2390 2416
2391 2417 def strip(self, minlink, transaction):
2392 2418 """truncate the revlog on the first revision with a linkrev >= minlink
2393 2419
2394 2420 This function is called when we're stripping revision minlink and
2395 2421 its descendants from the repository.
2396 2422
2397 2423 We have to remove all revisions with linkrev >= minlink, because
2398 2424 the equivalent changelog revisions will be renumbered after the
2399 2425 strip.
2400 2426
2401 2427 So we truncate the revlog on the first of these revisions, and
2402 2428 trust that the caller has saved the revisions that shouldn't be
2403 2429 removed and that it'll re-add them after this truncation.
2404 2430 """
2405 2431 if len(self) == 0:
2406 2432 return
2407 2433
2408 2434 rev, _ = self.getstrippoint(minlink)
2409 2435 if rev == len(self):
2410 2436 return
2411 2437
2412 2438 # first truncate the files on disk
2413 2439 end = self.start(rev)
2414 2440 if not self._inline:
2415 2441 transaction.add(self.datafile, end)
2416 2442 end = rev * self._io.size
2417 2443 else:
2418 2444 end += rev * self._io.size
2419 2445
2420 2446 transaction.add(self.indexfile, end)
2421 2447
2422 2448 # then reset internal state in memory to forget those revisions
2423 2449 self._cache = None
2424 2450 self._chaininfocache = {}
2425 2451 self._chunkclear()
2426 2452 for x in xrange(rev, len(self)):
2427 2453 del self.nodemap[self.node(x)]
2428 2454
2429 2455 del self.index[rev:-1]
2430 2456 self._nodepos = None
2431 2457
2432 2458 def checksize(self):
2433 2459 expected = 0
2434 2460 if len(self):
2435 2461 expected = max(0, self.end(len(self) - 1))
2436 2462
2437 2463 try:
2438 2464 with self._datafp() as f:
2439 2465 f.seek(0, 2)
2440 2466 actual = f.tell()
2441 2467 dd = actual - expected
2442 2468 except IOError as inst:
2443 2469 if inst.errno != errno.ENOENT:
2444 2470 raise
2445 2471 dd = 0
2446 2472
2447 2473 try:
2448 2474 f = self.opener(self.indexfile)
2449 2475 f.seek(0, 2)
2450 2476 actual = f.tell()
2451 2477 f.close()
2452 2478 s = self._io.size
2453 2479 i = max(0, actual // s)
2454 2480 di = actual - (i * s)
2455 2481 if self._inline:
2456 2482 databytes = 0
2457 2483 for r in self:
2458 2484 databytes += max(0, self.length(r))
2459 2485 dd = 0
2460 2486 di = actual - len(self) * s - databytes
2461 2487 except IOError as inst:
2462 2488 if inst.errno != errno.ENOENT:
2463 2489 raise
2464 2490 di = 0
2465 2491
2466 2492 return (dd, di)
2467 2493
2468 2494 def files(self):
2469 2495 res = [self.indexfile]
2470 2496 if not self._inline:
2471 2497 res.append(self.datafile)
2472 2498 return res
2473 2499
2474 2500 DELTAREUSEALWAYS = 'always'
2475 2501 DELTAREUSESAMEREVS = 'samerevs'
2476 2502 DELTAREUSENEVER = 'never'
2477 2503
2478 2504 DELTAREUSEFULLADD = 'fulladd'
2479 2505
2480 2506 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2481 2507
2482 2508 def clone(self, tr, destrevlog, addrevisioncb=None,
2483 2509 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2484 2510 """Copy this revlog to another, possibly with format changes.
2485 2511
2486 2512 The destination revlog will contain the same revisions and nodes.
2487 2513 However, it may not be bit-for-bit identical due to e.g. delta encoding
2488 2514 differences.
2489 2515
2490 2516 The ``deltareuse`` argument control how deltas from the existing revlog
2491 2517 are preserved in the destination revlog. The argument can have the
2492 2518 following values:
2493 2519
2494 2520 DELTAREUSEALWAYS
2495 2521 Deltas will always be reused (if possible), even if the destination
2496 2522 revlog would not select the same revisions for the delta. This is the
2497 2523 fastest mode of operation.
2498 2524 DELTAREUSESAMEREVS
2499 2525 Deltas will be reused if the destination revlog would pick the same
2500 2526 revisions for the delta. This mode strikes a balance between speed
2501 2527 and optimization.
2502 2528 DELTAREUSENEVER
2503 2529 Deltas will never be reused. This is the slowest mode of execution.
2504 2530 This mode can be used to recompute deltas (e.g. if the diff/delta
2505 2531 algorithm changes).
2506 2532
2507 2533 Delta computation can be slow, so the choice of delta reuse policy can
2508 2534 significantly affect run time.
2509 2535
2510 2536 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2511 2537 two extremes. Deltas will be reused if they are appropriate. But if the
2512 2538 delta could choose a better revision, it will do so. This means if you
2513 2539 are converting a non-generaldelta revlog to a generaldelta revlog,
2514 2540 deltas will be recomputed if the delta's parent isn't a parent of the
2515 2541 revision.
2516 2542
2517 2543 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2518 2544 controls whether to compute deltas against both parents for merges.
2519 2545 By default, the current default is used.
2520 2546 """
2521 2547 if deltareuse not in self.DELTAREUSEALL:
2522 2548 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2523 2549
2524 2550 if len(destrevlog):
2525 2551 raise ValueError(_('destination revlog is not empty'))
2526 2552
2527 2553 if getattr(self, 'filteredrevs', None):
2528 2554 raise ValueError(_('source revlog has filtered revisions'))
2529 2555 if getattr(destrevlog, 'filteredrevs', None):
2530 2556 raise ValueError(_('destination revlog has filtered revisions'))
2531 2557
2532 2558 # lazydeltabase controls whether to reuse a cached delta, if possible.
2533 2559 oldlazydeltabase = destrevlog._lazydeltabase
2534 2560 oldamd = destrevlog._aggressivemergedeltas
2535 2561
2536 2562 try:
2537 2563 if deltareuse == self.DELTAREUSEALWAYS:
2538 2564 destrevlog._lazydeltabase = True
2539 2565 elif deltareuse == self.DELTAREUSESAMEREVS:
2540 2566 destrevlog._lazydeltabase = False
2541 2567
2542 2568 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2543 2569
2544 2570 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2545 2571 self.DELTAREUSESAMEREVS)
2546 2572
2547 2573 deltacomputer = _deltacomputer(destrevlog)
2548 2574 index = self.index
2549 2575 for rev in self:
2550 2576 entry = index[rev]
2551 2577
2552 2578 # Some classes override linkrev to take filtered revs into
2553 2579 # account. Use raw entry from index.
2554 2580 flags = entry[0] & 0xffff
2555 2581 linkrev = entry[4]
2556 2582 p1 = index[entry[5]][7]
2557 2583 p2 = index[entry[6]][7]
2558 2584 node = entry[7]
2559 2585
2560 2586 # (Possibly) reuse the delta from the revlog if allowed and
2561 2587 # the revlog chunk is a delta.
2562 2588 cachedelta = None
2563 2589 rawtext = None
2564 2590 if populatecachedelta:
2565 2591 dp = self.deltaparent(rev)
2566 2592 if dp != nullrev:
2567 2593 cachedelta = (dp, bytes(self._chunk(rev)))
2568 2594
2569 2595 if not cachedelta:
2570 2596 rawtext = self.revision(rev, raw=True)
2571 2597
2572 2598
2573 2599 if deltareuse == self.DELTAREUSEFULLADD:
2574 2600 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2575 2601 cachedelta=cachedelta,
2576 2602 node=node, flags=flags,
2577 2603 deltacomputer=deltacomputer)
2578 2604 else:
2579 2605 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2580 2606 checkambig=False)
2581 2607 dfh = None
2582 2608 if not destrevlog._inline:
2583 2609 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2584 2610 try:
2585 2611 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2586 2612 p2, flags, cachedelta, ifh, dfh,
2587 2613 deltacomputer=deltacomputer)
2588 2614 finally:
2589 2615 if dfh:
2590 2616 dfh.close()
2591 2617 ifh.close()
2592 2618
2593 2619 if addrevisioncb:
2594 2620 addrevisioncb(self, rev, node)
2595 2621 finally:
2596 2622 destrevlog._lazydeltabase = oldlazydeltabase
2597 2623 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now