##// END OF EJS Templates
index: don't include nullid in len()...
Martin von Zweigbergk -
r39124:781b2720 default
parent child Browse files
Show More
@@ -1,797 +1,797 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #include <Python.h>
11 11 #include <ctype.h>
12 12 #include <stddef.h>
13 13 #include <string.h>
14 14
15 15 #include "bitmanipulation.h"
16 16 #include "charencode.h"
17 17 #include "util.h"
18 18
19 19 #ifdef IS_PY3K
20 20 /* The mapping of Python types is meant to be temporary to get Python
21 21 * 3 to compile. We should remove this once Python 3 support is fully
22 22 * supported and proper types are used in the extensions themselves. */
23 23 #define PyInt_Check PyLong_Check
24 24 #define PyInt_FromLong PyLong_FromLong
25 25 #define PyInt_FromSsize_t PyLong_FromSsize_t
26 26 #define PyInt_AsLong PyLong_AsLong
27 27 #endif
28 28
29 29 static const char *const versionerrortext = "Python minor version mismatch";
30 30
31 31 static PyObject *dict_new_presized(PyObject *self, PyObject *args)
32 32 {
33 33 Py_ssize_t expected_size;
34 34
35 35 if (!PyArg_ParseTuple(args, "n:make_presized_dict", &expected_size))
36 36 return NULL;
37 37
38 38 return _dict_new_presized(expected_size);
39 39 }
40 40
41 41 /*
42 42 * This code assumes that a manifest is stitched together with newline
43 43 * ('\n') characters.
44 44 */
45 45 static PyObject *parse_manifest(PyObject *self, PyObject *args)
46 46 {
47 47 PyObject *mfdict, *fdict;
48 48 char *str, *start, *end;
49 49 int len;
50 50
51 51 if (!PyArg_ParseTuple(
52 52 args, PY23("O!O!s#:parse_manifest", "O!O!y#:parse_manifest"),
53 53 &PyDict_Type, &mfdict, &PyDict_Type, &fdict, &str, &len))
54 54 goto quit;
55 55
56 56 start = str;
57 57 end = str + len;
58 58 while (start < end) {
59 59 PyObject *file = NULL, *node = NULL;
60 60 PyObject *flags = NULL;
61 61 char *zero = NULL, *newline = NULL;
62 62 ptrdiff_t nlen;
63 63
64 64 zero = memchr(start, '\0', end - start);
65 65 if (!zero) {
66 66 PyErr_SetString(PyExc_ValueError,
67 67 "manifest entry has no separator");
68 68 goto quit;
69 69 }
70 70
71 71 newline = memchr(zero + 1, '\n', end - (zero + 1));
72 72 if (!newline) {
73 73 PyErr_SetString(PyExc_ValueError,
74 74 "manifest contains trailing garbage");
75 75 goto quit;
76 76 }
77 77
78 78 file = PyBytes_FromStringAndSize(start, zero - start);
79 79
80 80 if (!file)
81 81 goto bail;
82 82
83 83 nlen = newline - zero - 1;
84 84
85 85 node = unhexlify(zero + 1, nlen > 40 ? 40 : (Py_ssize_t)nlen);
86 86 if (!node)
87 87 goto bail;
88 88
89 89 if (nlen > 40) {
90 90 flags = PyBytes_FromStringAndSize(zero + 41, nlen - 40);
91 91 if (!flags)
92 92 goto bail;
93 93
94 94 if (PyDict_SetItem(fdict, file, flags) == -1)
95 95 goto bail;
96 96 }
97 97
98 98 if (PyDict_SetItem(mfdict, file, node) == -1)
99 99 goto bail;
100 100
101 101 start = newline + 1;
102 102
103 103 Py_XDECREF(flags);
104 104 Py_XDECREF(node);
105 105 Py_XDECREF(file);
106 106 continue;
107 107 bail:
108 108 Py_XDECREF(flags);
109 109 Py_XDECREF(node);
110 110 Py_XDECREF(file);
111 111 goto quit;
112 112 }
113 113
114 114 Py_INCREF(Py_None);
115 115 return Py_None;
116 116 quit:
117 117 return NULL;
118 118 }
119 119
120 120 static inline dirstateTupleObject *make_dirstate_tuple(char state, int mode,
121 121 int size, int mtime)
122 122 {
123 123 dirstateTupleObject *t =
124 124 PyObject_New(dirstateTupleObject, &dirstateTupleType);
125 125 if (!t)
126 126 return NULL;
127 127 t->state = state;
128 128 t->mode = mode;
129 129 t->size = size;
130 130 t->mtime = mtime;
131 131 return t;
132 132 }
133 133
134 134 static PyObject *dirstate_tuple_new(PyTypeObject *subtype, PyObject *args,
135 135 PyObject *kwds)
136 136 {
137 137 /* We do all the initialization here and not a tp_init function because
138 138 * dirstate_tuple is immutable. */
139 139 dirstateTupleObject *t;
140 140 char state;
141 141 int size, mode, mtime;
142 142 if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime))
143 143 return NULL;
144 144
145 145 t = (dirstateTupleObject *)subtype->tp_alloc(subtype, 1);
146 146 if (!t)
147 147 return NULL;
148 148 t->state = state;
149 149 t->mode = mode;
150 150 t->size = size;
151 151 t->mtime = mtime;
152 152
153 153 return (PyObject *)t;
154 154 }
155 155
156 156 static void dirstate_tuple_dealloc(PyObject *o)
157 157 {
158 158 PyObject_Del(o);
159 159 }
160 160
161 161 static Py_ssize_t dirstate_tuple_length(PyObject *o)
162 162 {
163 163 return 4;
164 164 }
165 165
166 166 static PyObject *dirstate_tuple_item(PyObject *o, Py_ssize_t i)
167 167 {
168 168 dirstateTupleObject *t = (dirstateTupleObject *)o;
169 169 switch (i) {
170 170 case 0:
171 171 return PyBytes_FromStringAndSize(&t->state, 1);
172 172 case 1:
173 173 return PyInt_FromLong(t->mode);
174 174 case 2:
175 175 return PyInt_FromLong(t->size);
176 176 case 3:
177 177 return PyInt_FromLong(t->mtime);
178 178 default:
179 179 PyErr_SetString(PyExc_IndexError, "index out of range");
180 180 return NULL;
181 181 }
182 182 }
183 183
184 184 static PySequenceMethods dirstate_tuple_sq = {
185 185 dirstate_tuple_length, /* sq_length */
186 186 0, /* sq_concat */
187 187 0, /* sq_repeat */
188 188 dirstate_tuple_item, /* sq_item */
189 189 0, /* sq_ass_item */
190 190 0, /* sq_contains */
191 191 0, /* sq_inplace_concat */
192 192 0 /* sq_inplace_repeat */
193 193 };
194 194
195 195 PyTypeObject dirstateTupleType = {
196 196 PyVarObject_HEAD_INIT(NULL, 0) /* header */
197 197 "dirstate_tuple", /* tp_name */
198 198 sizeof(dirstateTupleObject), /* tp_basicsize */
199 199 0, /* tp_itemsize */
200 200 (destructor)dirstate_tuple_dealloc, /* tp_dealloc */
201 201 0, /* tp_print */
202 202 0, /* tp_getattr */
203 203 0, /* tp_setattr */
204 204 0, /* tp_compare */
205 205 0, /* tp_repr */
206 206 0, /* tp_as_number */
207 207 &dirstate_tuple_sq, /* tp_as_sequence */
208 208 0, /* tp_as_mapping */
209 209 0, /* tp_hash */
210 210 0, /* tp_call */
211 211 0, /* tp_str */
212 212 0, /* tp_getattro */
213 213 0, /* tp_setattro */
214 214 0, /* tp_as_buffer */
215 215 Py_TPFLAGS_DEFAULT, /* tp_flags */
216 216 "dirstate tuple", /* tp_doc */
217 217 0, /* tp_traverse */
218 218 0, /* tp_clear */
219 219 0, /* tp_richcompare */
220 220 0, /* tp_weaklistoffset */
221 221 0, /* tp_iter */
222 222 0, /* tp_iternext */
223 223 0, /* tp_methods */
224 224 0, /* tp_members */
225 225 0, /* tp_getset */
226 226 0, /* tp_base */
227 227 0, /* tp_dict */
228 228 0, /* tp_descr_get */
229 229 0, /* tp_descr_set */
230 230 0, /* tp_dictoffset */
231 231 0, /* tp_init */
232 232 0, /* tp_alloc */
233 233 dirstate_tuple_new, /* tp_new */
234 234 };
235 235
236 236 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
237 237 {
238 238 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
239 239 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
240 240 char state, *cur, *str, *cpos;
241 241 int mode, size, mtime;
242 242 unsigned int flen, len, pos = 40;
243 243 int readlen;
244 244
245 245 if (!PyArg_ParseTuple(
246 246 args, PY23("O!O!s#:parse_dirstate", "O!O!y#:parse_dirstate"),
247 247 &PyDict_Type, &dmap, &PyDict_Type, &cmap, &str, &readlen))
248 248 goto quit;
249 249
250 250 len = readlen;
251 251
252 252 /* read parents */
253 253 if (len < 40) {
254 254 PyErr_SetString(PyExc_ValueError,
255 255 "too little data for parents");
256 256 goto quit;
257 257 }
258 258
259 259 parents = Py_BuildValue(PY23("s#s#", "y#y#"), str, 20, str + 20, 20);
260 260 if (!parents)
261 261 goto quit;
262 262
263 263 /* read filenames */
264 264 while (pos >= 40 && pos < len) {
265 265 if (pos + 17 > len) {
266 266 PyErr_SetString(PyExc_ValueError,
267 267 "overflow in dirstate");
268 268 goto quit;
269 269 }
270 270 cur = str + pos;
271 271 /* unpack header */
272 272 state = *cur;
273 273 mode = getbe32(cur + 1);
274 274 size = getbe32(cur + 5);
275 275 mtime = getbe32(cur + 9);
276 276 flen = getbe32(cur + 13);
277 277 pos += 17;
278 278 cur += 17;
279 279 if (flen > len - pos) {
280 280 PyErr_SetString(PyExc_ValueError,
281 281 "overflow in dirstate");
282 282 goto quit;
283 283 }
284 284
285 285 entry =
286 286 (PyObject *)make_dirstate_tuple(state, mode, size, mtime);
287 287 cpos = memchr(cur, 0, flen);
288 288 if (cpos) {
289 289 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
290 290 cname = PyBytes_FromStringAndSize(
291 291 cpos + 1, flen - (cpos - cur) - 1);
292 292 if (!fname || !cname ||
293 293 PyDict_SetItem(cmap, fname, cname) == -1 ||
294 294 PyDict_SetItem(dmap, fname, entry) == -1)
295 295 goto quit;
296 296 Py_DECREF(cname);
297 297 } else {
298 298 fname = PyBytes_FromStringAndSize(cur, flen);
299 299 if (!fname || PyDict_SetItem(dmap, fname, entry) == -1)
300 300 goto quit;
301 301 }
302 302 Py_DECREF(fname);
303 303 Py_DECREF(entry);
304 304 fname = cname = entry = NULL;
305 305 pos += flen;
306 306 }
307 307
308 308 ret = parents;
309 309 Py_INCREF(ret);
310 310 quit:
311 311 Py_XDECREF(fname);
312 312 Py_XDECREF(cname);
313 313 Py_XDECREF(entry);
314 314 Py_XDECREF(parents);
315 315 return ret;
316 316 }
317 317
318 318 /*
319 319 * Build a set of non-normal and other parent entries from the dirstate dmap
320 320 */
321 321 static PyObject *nonnormalotherparententries(PyObject *self, PyObject *args)
322 322 {
323 323 PyObject *dmap, *fname, *v;
324 324 PyObject *nonnset = NULL, *otherpset = NULL, *result = NULL;
325 325 Py_ssize_t pos;
326 326
327 327 if (!PyArg_ParseTuple(args, "O!:nonnormalentries", &PyDict_Type, &dmap))
328 328 goto bail;
329 329
330 330 nonnset = PySet_New(NULL);
331 331 if (nonnset == NULL)
332 332 goto bail;
333 333
334 334 otherpset = PySet_New(NULL);
335 335 if (otherpset == NULL)
336 336 goto bail;
337 337
338 338 pos = 0;
339 339 while (PyDict_Next(dmap, &pos, &fname, &v)) {
340 340 dirstateTupleObject *t;
341 341 if (!dirstate_tuple_check(v)) {
342 342 PyErr_SetString(PyExc_TypeError,
343 343 "expected a dirstate tuple");
344 344 goto bail;
345 345 }
346 346 t = (dirstateTupleObject *)v;
347 347
348 348 if (t->state == 'n' && t->size == -2) {
349 349 if (PySet_Add(otherpset, fname) == -1) {
350 350 goto bail;
351 351 }
352 352 }
353 353
354 354 if (t->state == 'n' && t->mtime != -1)
355 355 continue;
356 356 if (PySet_Add(nonnset, fname) == -1)
357 357 goto bail;
358 358 }
359 359
360 360 result = Py_BuildValue("(OO)", nonnset, otherpset);
361 361 if (result == NULL)
362 362 goto bail;
363 363 Py_DECREF(nonnset);
364 364 Py_DECREF(otherpset);
365 365 return result;
366 366 bail:
367 367 Py_XDECREF(nonnset);
368 368 Py_XDECREF(otherpset);
369 369 Py_XDECREF(result);
370 370 return NULL;
371 371 }
372 372
373 373 /*
374 374 * Efficiently pack a dirstate object into its on-disk format.
375 375 */
376 376 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
377 377 {
378 378 PyObject *packobj = NULL;
379 379 PyObject *map, *copymap, *pl, *mtime_unset = NULL;
380 380 Py_ssize_t nbytes, pos, l;
381 381 PyObject *k, *v = NULL, *pn;
382 382 char *p, *s;
383 383 int now;
384 384
385 385 if (!PyArg_ParseTuple(args, "O!O!Oi:pack_dirstate", &PyDict_Type, &map,
386 386 &PyDict_Type, &copymap, &pl, &now))
387 387 return NULL;
388 388
389 389 if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) {
390 390 PyErr_SetString(PyExc_TypeError, "expected 2-element sequence");
391 391 return NULL;
392 392 }
393 393
394 394 /* Figure out how much we need to allocate. */
395 395 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
396 396 PyObject *c;
397 397 if (!PyBytes_Check(k)) {
398 398 PyErr_SetString(PyExc_TypeError, "expected string key");
399 399 goto bail;
400 400 }
401 401 nbytes += PyBytes_GET_SIZE(k) + 17;
402 402 c = PyDict_GetItem(copymap, k);
403 403 if (c) {
404 404 if (!PyBytes_Check(c)) {
405 405 PyErr_SetString(PyExc_TypeError,
406 406 "expected string key");
407 407 goto bail;
408 408 }
409 409 nbytes += PyBytes_GET_SIZE(c) + 1;
410 410 }
411 411 }
412 412
413 413 packobj = PyBytes_FromStringAndSize(NULL, nbytes);
414 414 if (packobj == NULL)
415 415 goto bail;
416 416
417 417 p = PyBytes_AS_STRING(packobj);
418 418
419 419 pn = PySequence_ITEM(pl, 0);
420 420 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
421 421 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
422 422 goto bail;
423 423 }
424 424 memcpy(p, s, l);
425 425 p += 20;
426 426 pn = PySequence_ITEM(pl, 1);
427 427 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
428 428 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
429 429 goto bail;
430 430 }
431 431 memcpy(p, s, l);
432 432 p += 20;
433 433
434 434 for (pos = 0; PyDict_Next(map, &pos, &k, &v);) {
435 435 dirstateTupleObject *tuple;
436 436 char state;
437 437 int mode, size, mtime;
438 438 Py_ssize_t len, l;
439 439 PyObject *o;
440 440 char *t;
441 441
442 442 if (!dirstate_tuple_check(v)) {
443 443 PyErr_SetString(PyExc_TypeError,
444 444 "expected a dirstate tuple");
445 445 goto bail;
446 446 }
447 447 tuple = (dirstateTupleObject *)v;
448 448
449 449 state = tuple->state;
450 450 mode = tuple->mode;
451 451 size = tuple->size;
452 452 mtime = tuple->mtime;
453 453 if (state == 'n' && mtime == now) {
454 454 /* See pure/parsers.py:pack_dirstate for why we do
455 455 * this. */
456 456 mtime = -1;
457 457 mtime_unset = (PyObject *)make_dirstate_tuple(
458 458 state, mode, size, mtime);
459 459 if (!mtime_unset)
460 460 goto bail;
461 461 if (PyDict_SetItem(map, k, mtime_unset) == -1)
462 462 goto bail;
463 463 Py_DECREF(mtime_unset);
464 464 mtime_unset = NULL;
465 465 }
466 466 *p++ = state;
467 467 putbe32((uint32_t)mode, p);
468 468 putbe32((uint32_t)size, p + 4);
469 469 putbe32((uint32_t)mtime, p + 8);
470 470 t = p + 12;
471 471 p += 16;
472 472 len = PyBytes_GET_SIZE(k);
473 473 memcpy(p, PyBytes_AS_STRING(k), len);
474 474 p += len;
475 475 o = PyDict_GetItem(copymap, k);
476 476 if (o) {
477 477 *p++ = '\0';
478 478 l = PyBytes_GET_SIZE(o);
479 479 memcpy(p, PyBytes_AS_STRING(o), l);
480 480 p += l;
481 481 len += l + 1;
482 482 }
483 483 putbe32((uint32_t)len, t);
484 484 }
485 485
486 486 pos = p - PyBytes_AS_STRING(packobj);
487 487 if (pos != nbytes) {
488 488 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
489 489 (long)pos, (long)nbytes);
490 490 goto bail;
491 491 }
492 492
493 493 return packobj;
494 494 bail:
495 495 Py_XDECREF(mtime_unset);
496 496 Py_XDECREF(packobj);
497 497 Py_XDECREF(v);
498 498 return NULL;
499 499 }
500 500
501 501 #define BUMPED_FIX 1
502 502 #define USING_SHA_256 2
503 503 #define FM1_HEADER_SIZE (4 + 8 + 2 + 2 + 1 + 1 + 1)
504 504
505 505 static PyObject *readshas(const char *source, unsigned char num,
506 506 Py_ssize_t hashwidth)
507 507 {
508 508 int i;
509 509 PyObject *list = PyTuple_New(num);
510 510 if (list == NULL) {
511 511 return NULL;
512 512 }
513 513 for (i = 0; i < num; i++) {
514 514 PyObject *hash = PyBytes_FromStringAndSize(source, hashwidth);
515 515 if (hash == NULL) {
516 516 Py_DECREF(list);
517 517 return NULL;
518 518 }
519 519 PyTuple_SET_ITEM(list, i, hash);
520 520 source += hashwidth;
521 521 }
522 522 return list;
523 523 }
524 524
525 525 static PyObject *fm1readmarker(const char *databegin, const char *dataend,
526 526 uint32_t *msize)
527 527 {
528 528 const char *data = databegin;
529 529 const char *meta;
530 530
531 531 double mtime;
532 532 int16_t tz;
533 533 uint16_t flags;
534 534 unsigned char nsuccs, nparents, nmetadata;
535 535 Py_ssize_t hashwidth = 20;
536 536
537 537 PyObject *prec = NULL, *parents = NULL, *succs = NULL;
538 538 PyObject *metadata = NULL, *ret = NULL;
539 539 int i;
540 540
541 541 if (data + FM1_HEADER_SIZE > dataend) {
542 542 goto overflow;
543 543 }
544 544
545 545 *msize = getbe32(data);
546 546 data += 4;
547 547 mtime = getbefloat64(data);
548 548 data += 8;
549 549 tz = getbeint16(data);
550 550 data += 2;
551 551 flags = getbeuint16(data);
552 552 data += 2;
553 553
554 554 if (flags & USING_SHA_256) {
555 555 hashwidth = 32;
556 556 }
557 557
558 558 nsuccs = (unsigned char)(*data++);
559 559 nparents = (unsigned char)(*data++);
560 560 nmetadata = (unsigned char)(*data++);
561 561
562 562 if (databegin + *msize > dataend) {
563 563 goto overflow;
564 564 }
565 565 dataend = databegin + *msize; /* narrow down to marker size */
566 566
567 567 if (data + hashwidth > dataend) {
568 568 goto overflow;
569 569 }
570 570 prec = PyBytes_FromStringAndSize(data, hashwidth);
571 571 data += hashwidth;
572 572 if (prec == NULL) {
573 573 goto bail;
574 574 }
575 575
576 576 if (data + nsuccs * hashwidth > dataend) {
577 577 goto overflow;
578 578 }
579 579 succs = readshas(data, nsuccs, hashwidth);
580 580 if (succs == NULL) {
581 581 goto bail;
582 582 }
583 583 data += nsuccs * hashwidth;
584 584
585 585 if (nparents == 1 || nparents == 2) {
586 586 if (data + nparents * hashwidth > dataend) {
587 587 goto overflow;
588 588 }
589 589 parents = readshas(data, nparents, hashwidth);
590 590 if (parents == NULL) {
591 591 goto bail;
592 592 }
593 593 data += nparents * hashwidth;
594 594 } else {
595 595 parents = Py_None;
596 596 Py_INCREF(parents);
597 597 }
598 598
599 599 if (data + 2 * nmetadata > dataend) {
600 600 goto overflow;
601 601 }
602 602 meta = data + (2 * nmetadata);
603 603 metadata = PyTuple_New(nmetadata);
604 604 if (metadata == NULL) {
605 605 goto bail;
606 606 }
607 607 for (i = 0; i < nmetadata; i++) {
608 608 PyObject *tmp, *left = NULL, *right = NULL;
609 609 Py_ssize_t leftsize = (unsigned char)(*data++);
610 610 Py_ssize_t rightsize = (unsigned char)(*data++);
611 611 if (meta + leftsize + rightsize > dataend) {
612 612 goto overflow;
613 613 }
614 614 left = PyBytes_FromStringAndSize(meta, leftsize);
615 615 meta += leftsize;
616 616 right = PyBytes_FromStringAndSize(meta, rightsize);
617 617 meta += rightsize;
618 618 tmp = PyTuple_New(2);
619 619 if (!left || !right || !tmp) {
620 620 Py_XDECREF(left);
621 621 Py_XDECREF(right);
622 622 Py_XDECREF(tmp);
623 623 goto bail;
624 624 }
625 625 PyTuple_SET_ITEM(tmp, 0, left);
626 626 PyTuple_SET_ITEM(tmp, 1, right);
627 627 PyTuple_SET_ITEM(metadata, i, tmp);
628 628 }
629 629 ret = Py_BuildValue("(OOHO(di)O)", prec, succs, flags, metadata, mtime,
630 630 (int)tz * 60, parents);
631 631 goto bail; /* return successfully */
632 632
633 633 overflow:
634 634 PyErr_SetString(PyExc_ValueError, "overflow in obsstore");
635 635 bail:
636 636 Py_XDECREF(prec);
637 637 Py_XDECREF(succs);
638 638 Py_XDECREF(metadata);
639 639 Py_XDECREF(parents);
640 640 return ret;
641 641 }
642 642
643 643 static PyObject *fm1readmarkers(PyObject *self, PyObject *args)
644 644 {
645 645 const char *data, *dataend;
646 646 int datalen;
647 647 Py_ssize_t offset, stop;
648 648 PyObject *markers = NULL;
649 649
650 650 if (!PyArg_ParseTuple(args, PY23("s#nn", "y#nn"), &data, &datalen,
651 651 &offset, &stop)) {
652 652 return NULL;
653 653 }
654 654 dataend = data + datalen;
655 655 data += offset;
656 656 markers = PyList_New(0);
657 657 if (!markers) {
658 658 return NULL;
659 659 }
660 660 while (offset < stop) {
661 661 uint32_t msize;
662 662 int error;
663 663 PyObject *record = fm1readmarker(data, dataend, &msize);
664 664 if (!record) {
665 665 goto bail;
666 666 }
667 667 error = PyList_Append(markers, record);
668 668 Py_DECREF(record);
669 669 if (error) {
670 670 goto bail;
671 671 }
672 672 data += msize;
673 673 offset += msize;
674 674 }
675 675 return markers;
676 676 bail:
677 677 Py_DECREF(markers);
678 678 return NULL;
679 679 }
680 680
681 681 static char parsers_doc[] = "Efficient content parsing.";
682 682
683 683 PyObject *encodedir(PyObject *self, PyObject *args);
684 684 PyObject *pathencode(PyObject *self, PyObject *args);
685 685 PyObject *lowerencode(PyObject *self, PyObject *args);
686 686 PyObject *parse_index2(PyObject *self, PyObject *args);
687 687
688 688 static PyMethodDef methods[] = {
689 689 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
690 690 {"nonnormalotherparententries", nonnormalotherparententries, METH_VARARGS,
691 691 "create a set containing non-normal and other parent entries of given "
692 692 "dirstate\n"},
693 693 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
694 694 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
695 695 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
696 696 {"isasciistr", isasciistr, METH_VARARGS, "check if an ASCII string\n"},
697 697 {"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"},
698 698 {"asciiupper", asciiupper, METH_VARARGS, "uppercase an ASCII string\n"},
699 699 {"dict_new_presized", dict_new_presized, METH_VARARGS,
700 700 "construct a dict with an expected size\n"},
701 701 {"make_file_foldmap", make_file_foldmap, METH_VARARGS,
702 702 "make file foldmap\n"},
703 703 {"jsonescapeu8fast", jsonescapeu8fast, METH_VARARGS,
704 704 "escape a UTF-8 byte string to JSON (fast path)\n"},
705 705 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
706 706 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
707 707 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
708 708 {"fm1readmarkers", fm1readmarkers, METH_VARARGS,
709 709 "parse v1 obsolete markers\n"},
710 710 {NULL, NULL}};
711 711
712 712 void dirs_module_init(PyObject *mod);
713 713 void manifest_module_init(PyObject *mod);
714 714 void revlog_module_init(PyObject *mod);
715 715
716 static const int version = 6;
716 static const int version = 7;
717 717
718 718 static void module_init(PyObject *mod)
719 719 {
720 720 PyModule_AddIntConstant(mod, "version", version);
721 721
722 722 /* This module constant has two purposes. First, it lets us unit test
723 723 * the ImportError raised without hard-coding any error text. This
724 724 * means we can change the text in the future without breaking tests,
725 725 * even across changesets without a recompile. Second, its presence
726 726 * can be used to determine whether the version-checking logic is
727 727 * present, which also helps in testing across changesets without a
728 728 * recompile. Note that this means the pure-Python version of parsers
729 729 * should not have this module constant. */
730 730 PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
731 731
732 732 dirs_module_init(mod);
733 733 manifest_module_init(mod);
734 734 revlog_module_init(mod);
735 735
736 736 if (PyType_Ready(&dirstateTupleType) < 0)
737 737 return;
738 738 Py_INCREF(&dirstateTupleType);
739 739 PyModule_AddObject(mod, "dirstatetuple",
740 740 (PyObject *)&dirstateTupleType);
741 741 }
742 742
743 743 static int check_python_version(void)
744 744 {
745 745 PyObject *sys = PyImport_ImportModule("sys"), *ver;
746 746 long hexversion;
747 747 if (!sys)
748 748 return -1;
749 749 ver = PyObject_GetAttrString(sys, "hexversion");
750 750 Py_DECREF(sys);
751 751 if (!ver)
752 752 return -1;
753 753 hexversion = PyInt_AsLong(ver);
754 754 Py_DECREF(ver);
755 755 /* sys.hexversion is a 32-bit number by default, so the -1 case
756 756 * should only occur in unusual circumstances (e.g. if sys.hexversion
757 757 * is manually set to an invalid value). */
758 758 if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
759 759 PyErr_Format(PyExc_ImportError,
760 760 "%s: The Mercurial extension "
761 761 "modules were compiled with Python " PY_VERSION
762 762 ", but "
763 763 "Mercurial is currently using Python with "
764 764 "sys.hexversion=%ld: "
765 765 "Python %s\n at: %s",
766 766 versionerrortext, hexversion, Py_GetVersion(),
767 767 Py_GetProgramFullPath());
768 768 return -1;
769 769 }
770 770 return 0;
771 771 }
772 772
773 773 #ifdef IS_PY3K
774 774 static struct PyModuleDef parsers_module = {PyModuleDef_HEAD_INIT, "parsers",
775 775 parsers_doc, -1, methods};
776 776
777 777 PyMODINIT_FUNC PyInit_parsers(void)
778 778 {
779 779 PyObject *mod;
780 780
781 781 if (check_python_version() == -1)
782 782 return NULL;
783 783 mod = PyModule_Create(&parsers_module);
784 784 module_init(mod);
785 785 return mod;
786 786 }
787 787 #else
788 788 PyMODINIT_FUNC initparsers(void)
789 789 {
790 790 PyObject *mod;
791 791
792 792 if (check_python_version() == -1)
793 793 return;
794 794 mod = Py_InitModule3("parsers", methods, parsers_doc);
795 795 module_init(mod);
796 796 }
797 797 #endif
@@ -1,2167 +1,2167 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #include <Python.h>
11 11 #include <assert.h>
12 12 #include <ctype.h>
13 13 #include <stddef.h>
14 14 #include <string.h>
15 15
16 16 #include "bitmanipulation.h"
17 17 #include "charencode.h"
18 18 #include "util.h"
19 19
20 20 #ifdef IS_PY3K
21 21 /* The mapping of Python types is meant to be temporary to get Python
22 22 * 3 to compile. We should remove this once Python 3 support is fully
23 23 * supported and proper types are used in the extensions themselves. */
24 24 #define PyInt_Check PyLong_Check
25 25 #define PyInt_FromLong PyLong_FromLong
26 26 #define PyInt_FromSsize_t PyLong_FromSsize_t
27 27 #define PyInt_AS_LONG PyLong_AS_LONG
28 28 #define PyInt_AsLong PyLong_AsLong
29 29 #endif
30 30
31 31 /*
32 32 * A base-16 trie for fast node->rev mapping.
33 33 *
34 34 * Positive value is index of the next node in the trie
35 35 * Negative value is a leaf: -(rev + 2)
36 36 * Zero is empty
37 37 */
38 38 typedef struct {
39 39 int children[16];
40 40 } nodetree;
41 41
42 42 /*
43 43 * This class has two behaviors.
44 44 *
45 45 * When used in a list-like way (with integer keys), we decode an
46 46 * entry in a RevlogNG index file on demand. Our last entry is a
47 47 * sentinel, always a nullid. We have limited support for
48 48 * integer-keyed insert and delete, only at elements right before the
49 49 * sentinel.
50 50 *
51 51 * With string keys, we lazily perform a reverse mapping from node to
52 52 * rev, using a base-16 trie.
53 53 */
54 54 typedef struct {
55 55 PyObject_HEAD
56 56 /* Type-specific fields go here. */
57 57 PyObject *data; /* raw bytes of index */
58 58 Py_buffer buf; /* buffer of data */
59 59 PyObject **cache; /* cached tuples */
60 60 const char **offsets; /* populated on demand */
61 61 Py_ssize_t raw_length; /* original number of elements */
62 62 Py_ssize_t length; /* current number of elements */
63 63 PyObject *added; /* populated on demand */
64 64 PyObject *headrevs; /* cache, invalidated on changes */
65 65 PyObject *filteredrevs;/* filtered revs set */
66 66 nodetree *nt; /* base-16 trie */
67 67 unsigned ntlength; /* # nodes in use */
68 68 unsigned ntcapacity; /* # nodes allocated */
69 69 int ntdepth; /* maximum depth of tree */
70 70 int ntsplits; /* # splits performed */
71 71 int ntrev; /* last rev scanned */
72 72 int ntlookups; /* # lookups */
73 73 int ntmisses; /* # lookups that miss the cache */
74 74 int inlined;
75 75 } indexObject;
76 76
77 77 static Py_ssize_t index_length(const indexObject *self)
78 78 {
79 79 if (self->added == NULL)
80 return self->length;
81 return self->length + PyList_GET_SIZE(self->added);
80 return self->length - 1;
81 return self->length + PyList_GET_SIZE(self->added) - 1;
82 82 }
83 83
84 84 static PyObject *nullentry;
85 85 static const char nullid[20];
86 86
87 87 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
88 88
89 89 #if LONG_MAX == 0x7fffffffL
90 90 static const char *const tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#");
91 91 #else
92 92 static const char *const tuple_format = PY23("kiiiiiis#", "kiiiiiiy#");
93 93 #endif
94 94
95 95 /* A RevlogNG v1 index entry is 64 bytes long. */
96 96 static const long v1_hdrsize = 64;
97 97
98 98 /*
99 99 * Return a pointer to the beginning of a RevlogNG record.
100 100 */
101 101 static const char *index_deref(indexObject *self, Py_ssize_t pos)
102 102 {
103 103 if (self->inlined && pos > 0) {
104 104 if (self->offsets == NULL) {
105 105 self->offsets = PyMem_Malloc(self->raw_length *
106 106 sizeof(*self->offsets));
107 107 if (self->offsets == NULL)
108 108 return (const char *)PyErr_NoMemory();
109 109 inline_scan(self, self->offsets);
110 110 }
111 111 return self->offsets[pos];
112 112 }
113 113
114 114 return (const char *)(self->buf.buf) + pos * v1_hdrsize;
115 115 }
116 116
117 117 static inline int index_get_parents(indexObject *self, Py_ssize_t rev,
118 118 int *ps, int maxrev)
119 119 {
120 120 if (rev >= self->length - 1) {
121 121 PyObject *tuple = PyList_GET_ITEM(self->added,
122 122 rev - self->length + 1);
123 123 ps[0] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 5));
124 124 ps[1] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 6));
125 125 } else {
126 126 const char *data = index_deref(self, rev);
127 127 ps[0] = getbe32(data + 24);
128 128 ps[1] = getbe32(data + 28);
129 129 }
130 130 /* If index file is corrupted, ps[] may point to invalid revisions. So
131 131 * there is a risk of buffer overflow to trust them unconditionally. */
132 132 if (ps[0] > maxrev || ps[1] > maxrev) {
133 133 PyErr_SetString(PyExc_ValueError, "parent out of range");
134 134 return -1;
135 135 }
136 136 return 0;
137 137 }
138 138
139 139
140 140 /*
141 141 * RevlogNG format (all in big endian, data may be inlined):
142 142 * 6 bytes: offset
143 143 * 2 bytes: flags
144 144 * 4 bytes: compressed length
145 145 * 4 bytes: uncompressed length
146 146 * 4 bytes: base revision
147 147 * 4 bytes: link revision
148 148 * 4 bytes: parent 1 revision
149 149 * 4 bytes: parent 2 revision
150 150 * 32 bytes: nodeid (only 20 bytes used)
151 151 */
152 152 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
153 153 {
154 154 uint64_t offset_flags;
155 155 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
156 156 const char *c_node_id;
157 157 const char *data;
158 Py_ssize_t length = index_length(self);
158 Py_ssize_t length = index_length(self) + 1;
159 159 PyObject *entry;
160 160
161 161 if (pos == -1 || pos == length - 1) {
162 162 Py_INCREF(nullentry);
163 163 return nullentry;
164 164 }
165 165
166 166 if (pos < 0 || pos >= length) {
167 167 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
168 168 return NULL;
169 169 }
170 170
171 171 if (pos >= self->length - 1) {
172 172 PyObject *obj;
173 173 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
174 174 Py_INCREF(obj);
175 175 return obj;
176 176 }
177 177
178 178 if (self->cache) {
179 179 if (self->cache[pos]) {
180 180 Py_INCREF(self->cache[pos]);
181 181 return self->cache[pos];
182 182 }
183 183 } else {
184 184 self->cache = calloc(self->raw_length, sizeof(PyObject *));
185 185 if (self->cache == NULL)
186 186 return PyErr_NoMemory();
187 187 }
188 188
189 189 data = index_deref(self, pos);
190 190 if (data == NULL)
191 191 return NULL;
192 192
193 193 offset_flags = getbe32(data + 4);
194 194 if (pos == 0) /* mask out version number for the first entry */
195 195 offset_flags &= 0xFFFF;
196 196 else {
197 197 uint32_t offset_high = getbe32(data);
198 198 offset_flags |= ((uint64_t)offset_high) << 32;
199 199 }
200 200
201 201 comp_len = getbe32(data + 8);
202 202 uncomp_len = getbe32(data + 12);
203 203 base_rev = getbe32(data + 16);
204 204 link_rev = getbe32(data + 20);
205 205 parent_1 = getbe32(data + 24);
206 206 parent_2 = getbe32(data + 28);
207 207 c_node_id = data + 32;
208 208
209 209 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
210 210 uncomp_len, base_rev, link_rev,
211 211 parent_1, parent_2, c_node_id, 20);
212 212
213 213 if (entry) {
214 214 PyObject_GC_UnTrack(entry);
215 215 Py_INCREF(entry);
216 216 }
217 217
218 218 self->cache[pos] = entry;
219 219
220 220 return entry;
221 221 }
222 222
223 223 /*
224 224 * Return the 20-byte SHA of the node corresponding to the given rev.
225 225 */
226 226 static const char *index_node(indexObject *self, Py_ssize_t pos)
227 227 {
228 Py_ssize_t length = index_length(self);
228 Py_ssize_t length = index_length(self) + 1;
229 229 const char *data;
230 230
231 231 if (pos == length - 1 || pos == -1)
232 232 return nullid;
233 233
234 234 if (pos >= length)
235 235 return NULL;
236 236
237 237 if (pos >= self->length - 1) {
238 238 PyObject *tuple, *str;
239 239 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
240 240 str = PyTuple_GetItem(tuple, 7);
241 241 return str ? PyBytes_AS_STRING(str) : NULL;
242 242 }
243 243
244 244 data = index_deref(self, pos);
245 245 return data ? data + 32 : NULL;
246 246 }
247 247
248 248 /*
249 249 * Return the 20-byte SHA of the node corresponding to the given rev. The
250 250 * rev is assumed to be existing. If not, an exception is set.
251 251 */
252 252 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
253 253 {
254 254 const char *node = index_node(self, pos);
255 255 if (node == NULL) {
256 256 PyErr_Format(PyExc_IndexError, "could not access rev %d",
257 257 (int)pos);
258 258 }
259 259 return node;
260 260 }
261 261
262 262 static int nt_insert(indexObject *self, const char *node, int rev);
263 263
264 264 static int node_check(PyObject *obj, char **node)
265 265 {
266 266 Py_ssize_t nodelen;
267 267 if (PyBytes_AsStringAndSize(obj, node, &nodelen) == -1)
268 268 return -1;
269 269 if (nodelen == 20)
270 270 return 0;
271 271 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
272 272 return -1;
273 273 }
274 274
275 275 static PyObject *index_append(indexObject *self, PyObject *obj)
276 276 {
277 277 char *node;
278 278 Py_ssize_t len;
279 279
280 280 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
281 281 PyErr_SetString(PyExc_TypeError, "8-tuple required");
282 282 return NULL;
283 283 }
284 284
285 285 if (node_check(PyTuple_GET_ITEM(obj, 7), &node) == -1)
286 286 return NULL;
287 287
288 len = index_length(self);
288 len = index_length(self) + 1;
289 289
290 290 if (self->added == NULL) {
291 291 self->added = PyList_New(0);
292 292 if (self->added == NULL)
293 293 return NULL;
294 294 }
295 295
296 296 if (PyList_Append(self->added, obj) == -1)
297 297 return NULL;
298 298
299 299 if (self->nt)
300 300 nt_insert(self, node, len - 1);
301 301
302 302 Py_CLEAR(self->headrevs);
303 303 Py_RETURN_NONE;
304 304 }
305 305
306 306 static void _index_clearcaches(indexObject *self)
307 307 {
308 308 if (self->cache) {
309 309 Py_ssize_t i;
310 310
311 311 for (i = 0; i < self->raw_length; i++)
312 312 Py_CLEAR(self->cache[i]);
313 313 free(self->cache);
314 314 self->cache = NULL;
315 315 }
316 316 if (self->offsets) {
317 317 PyMem_Free(self->offsets);
318 318 self->offsets = NULL;
319 319 }
320 320 free(self->nt);
321 321 self->nt = NULL;
322 322 Py_CLEAR(self->headrevs);
323 323 }
324 324
325 325 static PyObject *index_clearcaches(indexObject *self)
326 326 {
327 327 _index_clearcaches(self);
328 328 self->ntlength = self->ntcapacity = 0;
329 329 self->ntdepth = self->ntsplits = 0;
330 330 self->ntrev = -1;
331 331 self->ntlookups = self->ntmisses = 0;
332 332 Py_RETURN_NONE;
333 333 }
334 334
335 335 static PyObject *index_stats(indexObject *self)
336 336 {
337 337 PyObject *obj = PyDict_New();
338 338 PyObject *t = NULL;
339 339
340 340 if (obj == NULL)
341 341 return NULL;
342 342
343 343 #define istat(__n, __d) \
344 344 do { \
345 345 t = PyInt_FromSsize_t(self->__n); \
346 346 if (!t) \
347 347 goto bail; \
348 348 if (PyDict_SetItemString(obj, __d, t) == -1) \
349 349 goto bail; \
350 350 Py_DECREF(t); \
351 351 } while (0)
352 352
353 353 if (self->added) {
354 354 Py_ssize_t len = PyList_GET_SIZE(self->added);
355 355 t = PyInt_FromSsize_t(len);
356 356 if (!t)
357 357 goto bail;
358 358 if (PyDict_SetItemString(obj, "index entries added", t) == -1)
359 359 goto bail;
360 360 Py_DECREF(t);
361 361 }
362 362
363 363 if (self->raw_length != self->length - 1)
364 364 istat(raw_length, "revs on disk");
365 365 istat(length, "revs in memory");
366 366 istat(ntcapacity, "node trie capacity");
367 367 istat(ntdepth, "node trie depth");
368 368 istat(ntlength, "node trie count");
369 369 istat(ntlookups, "node trie lookups");
370 370 istat(ntmisses, "node trie misses");
371 371 istat(ntrev, "node trie last rev scanned");
372 372 istat(ntsplits, "node trie splits");
373 373
374 374 #undef istat
375 375
376 376 return obj;
377 377
378 378 bail:
379 379 Py_XDECREF(obj);
380 380 Py_XDECREF(t);
381 381 return NULL;
382 382 }
383 383
384 384 /*
385 385 * When we cache a list, we want to be sure the caller can't mutate
386 386 * the cached copy.
387 387 */
388 388 static PyObject *list_copy(PyObject *list)
389 389 {
390 390 Py_ssize_t len = PyList_GET_SIZE(list);
391 391 PyObject *newlist = PyList_New(len);
392 392 Py_ssize_t i;
393 393
394 394 if (newlist == NULL)
395 395 return NULL;
396 396
397 397 for (i = 0; i < len; i++) {
398 398 PyObject *obj = PyList_GET_ITEM(list, i);
399 399 Py_INCREF(obj);
400 400 PyList_SET_ITEM(newlist, i, obj);
401 401 }
402 402
403 403 return newlist;
404 404 }
405 405
406 406 static int check_filter(PyObject *filter, Py_ssize_t arg)
407 407 {
408 408 if (filter) {
409 409 PyObject *arglist, *result;
410 410 int isfiltered;
411 411
412 412 arglist = Py_BuildValue("(n)", arg);
413 413 if (!arglist) {
414 414 return -1;
415 415 }
416 416
417 417 result = PyEval_CallObject(filter, arglist);
418 418 Py_DECREF(arglist);
419 419 if (!result) {
420 420 return -1;
421 421 }
422 422
423 423 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
424 424 * same as this function, so we can just return it directly.*/
425 425 isfiltered = PyObject_IsTrue(result);
426 426 Py_DECREF(result);
427 427 return isfiltered;
428 428 } else {
429 429 return 0;
430 430 }
431 431 }
432 432
433 433 static Py_ssize_t add_roots_get_min(indexObject *self, PyObject *list,
434 434 Py_ssize_t marker, char *phases)
435 435 {
436 436 PyObject *iter = NULL;
437 437 PyObject *iter_item = NULL;
438 Py_ssize_t min_idx = index_length(self) + 1;
438 Py_ssize_t min_idx = index_length(self) + 2;
439 439 long iter_item_long;
440 440
441 441 if (PyList_GET_SIZE(list) != 0) {
442 442 iter = PyObject_GetIter(list);
443 443 if (iter == NULL)
444 444 return -2;
445 445 while ((iter_item = PyIter_Next(iter))) {
446 446 iter_item_long = PyInt_AS_LONG(iter_item);
447 447 Py_DECREF(iter_item);
448 448 if (iter_item_long < min_idx)
449 449 min_idx = iter_item_long;
450 450 phases[iter_item_long] = marker;
451 451 }
452 452 Py_DECREF(iter);
453 453 }
454 454
455 455 return min_idx;
456 456 }
457 457
458 458 static inline void set_phase_from_parents(char *phases, int parent_1,
459 459 int parent_2, Py_ssize_t i)
460 460 {
461 461 if (parent_1 >= 0 && phases[parent_1] > phases[i])
462 462 phases[i] = phases[parent_1];
463 463 if (parent_2 >= 0 && phases[parent_2] > phases[i])
464 464 phases[i] = phases[parent_2];
465 465 }
466 466
467 467 static PyObject *reachableroots2(indexObject *self, PyObject *args)
468 468 {
469 469
470 470 /* Input */
471 471 long minroot;
472 472 PyObject *includepatharg = NULL;
473 473 int includepath = 0;
474 474 /* heads and roots are lists */
475 475 PyObject *heads = NULL;
476 476 PyObject *roots = NULL;
477 477 PyObject *reachable = NULL;
478 478
479 479 PyObject *val;
480 Py_ssize_t len = index_length(self) - 1;
480 Py_ssize_t len = index_length(self);
481 481 long revnum;
482 482 Py_ssize_t k;
483 483 Py_ssize_t i;
484 484 Py_ssize_t l;
485 485 int r;
486 486 int parents[2];
487 487
488 488 /* Internal data structure:
489 489 * tovisit: array of length len+1 (all revs + nullrev), filled upto lentovisit
490 490 * revstates: array of length len+1 (all revs + nullrev) */
491 491 int *tovisit = NULL;
492 492 long lentovisit = 0;
493 493 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
494 494 char *revstates = NULL;
495 495
496 496 /* Get arguments */
497 497 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
498 498 &PyList_Type, &roots,
499 499 &PyBool_Type, &includepatharg))
500 500 goto bail;
501 501
502 502 if (includepatharg == Py_True)
503 503 includepath = 1;
504 504
505 505 /* Initialize return set */
506 506 reachable = PyList_New(0);
507 507 if (reachable == NULL)
508 508 goto bail;
509 509
510 510 /* Initialize internal datastructures */
511 511 tovisit = (int *)malloc((len + 1) * sizeof(int));
512 512 if (tovisit == NULL) {
513 513 PyErr_NoMemory();
514 514 goto bail;
515 515 }
516 516
517 517 revstates = (char *)calloc(len + 1, 1);
518 518 if (revstates == NULL) {
519 519 PyErr_NoMemory();
520 520 goto bail;
521 521 }
522 522
523 523 l = PyList_GET_SIZE(roots);
524 524 for (i = 0; i < l; i++) {
525 525 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
526 526 if (revnum == -1 && PyErr_Occurred())
527 527 goto bail;
528 528 /* If root is out of range, e.g. wdir(), it must be unreachable
529 529 * from heads. So we can just ignore it. */
530 530 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
531 531 continue;
532 532 revstates[revnum + 1] |= RS_ROOT;
533 533 }
534 534
535 535 /* Populate tovisit with all the heads */
536 536 l = PyList_GET_SIZE(heads);
537 537 for (i = 0; i < l; i++) {
538 538 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
539 539 if (revnum == -1 && PyErr_Occurred())
540 540 goto bail;
541 541 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
542 542 PyErr_SetString(PyExc_IndexError, "head out of range");
543 543 goto bail;
544 544 }
545 545 if (!(revstates[revnum + 1] & RS_SEEN)) {
546 546 tovisit[lentovisit++] = (int)revnum;
547 547 revstates[revnum + 1] |= RS_SEEN;
548 548 }
549 549 }
550 550
551 551 /* Visit the tovisit list and find the reachable roots */
552 552 k = 0;
553 553 while (k < lentovisit) {
554 554 /* Add the node to reachable if it is a root*/
555 555 revnum = tovisit[k++];
556 556 if (revstates[revnum + 1] & RS_ROOT) {
557 557 revstates[revnum + 1] |= RS_REACHABLE;
558 558 val = PyInt_FromLong(revnum);
559 559 if (val == NULL)
560 560 goto bail;
561 561 r = PyList_Append(reachable, val);
562 562 Py_DECREF(val);
563 563 if (r < 0)
564 564 goto bail;
565 565 if (includepath == 0)
566 566 continue;
567 567 }
568 568
569 569 /* Add its parents to the list of nodes to visit */
570 570 if (revnum == -1)
571 571 continue;
572 572 r = index_get_parents(self, revnum, parents, (int)len - 1);
573 573 if (r < 0)
574 574 goto bail;
575 575 for (i = 0; i < 2; i++) {
576 576 if (!(revstates[parents[i] + 1] & RS_SEEN)
577 577 && parents[i] >= minroot) {
578 578 tovisit[lentovisit++] = parents[i];
579 579 revstates[parents[i] + 1] |= RS_SEEN;
580 580 }
581 581 }
582 582 }
583 583
584 584 /* Find all the nodes in between the roots we found and the heads
585 585 * and add them to the reachable set */
586 586 if (includepath == 1) {
587 587 long minidx = minroot;
588 588 if (minidx < 0)
589 589 minidx = 0;
590 590 for (i = minidx; i < len; i++) {
591 591 if (!(revstates[i + 1] & RS_SEEN))
592 592 continue;
593 593 r = index_get_parents(self, i, parents, (int)len - 1);
594 594 /* Corrupted index file, error is set from
595 595 * index_get_parents */
596 596 if (r < 0)
597 597 goto bail;
598 598 if (((revstates[parents[0] + 1] |
599 599 revstates[parents[1] + 1]) & RS_REACHABLE)
600 600 && !(revstates[i + 1] & RS_REACHABLE)) {
601 601 revstates[i + 1] |= RS_REACHABLE;
602 602 val = PyInt_FromLong(i);
603 603 if (val == NULL)
604 604 goto bail;
605 605 r = PyList_Append(reachable, val);
606 606 Py_DECREF(val);
607 607 if (r < 0)
608 608 goto bail;
609 609 }
610 610 }
611 611 }
612 612
613 613 free(revstates);
614 614 free(tovisit);
615 615 return reachable;
616 616 bail:
617 617 Py_XDECREF(reachable);
618 618 free(revstates);
619 619 free(tovisit);
620 620 return NULL;
621 621 }
622 622
623 623 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
624 624 {
625 625 PyObject *roots = Py_None;
626 626 PyObject *ret = NULL;
627 627 PyObject *phasessize = NULL;
628 628 PyObject *phaseroots = NULL;
629 629 PyObject *phaseset = NULL;
630 630 PyObject *phasessetlist = NULL;
631 631 PyObject *rev = NULL;
632 Py_ssize_t len = index_length(self) - 1;
632 Py_ssize_t len = index_length(self);
633 633 Py_ssize_t numphase = 0;
634 634 Py_ssize_t minrevallphases = 0;
635 635 Py_ssize_t minrevphase = 0;
636 636 Py_ssize_t i = 0;
637 637 char *phases = NULL;
638 638 long phase;
639 639
640 640 if (!PyArg_ParseTuple(args, "O", &roots))
641 641 goto done;
642 642 if (roots == NULL || !PyList_Check(roots)) {
643 643 PyErr_SetString(PyExc_TypeError, "roots must be a list");
644 644 goto done;
645 645 }
646 646
647 647 phases = calloc(len, 1); /* phase per rev: {0: public, 1: draft, 2: secret} */
648 648 if (phases == NULL) {
649 649 PyErr_NoMemory();
650 650 goto done;
651 651 }
652 652 /* Put the phase information of all the roots in phases */
653 653 numphase = PyList_GET_SIZE(roots)+1;
654 654 minrevallphases = len + 1;
655 655 phasessetlist = PyList_New(numphase);
656 656 if (phasessetlist == NULL)
657 657 goto done;
658 658
659 659 PyList_SET_ITEM(phasessetlist, 0, Py_None);
660 660 Py_INCREF(Py_None);
661 661
662 662 for (i = 0; i < numphase-1; i++) {
663 663 phaseroots = PyList_GET_ITEM(roots, i);
664 664 phaseset = PySet_New(NULL);
665 665 if (phaseset == NULL)
666 666 goto release;
667 667 PyList_SET_ITEM(phasessetlist, i+1, phaseset);
668 668 if (!PyList_Check(phaseroots)) {
669 669 PyErr_SetString(PyExc_TypeError,
670 670 "roots item must be a list");
671 671 goto release;
672 672 }
673 673 minrevphase = add_roots_get_min(self, phaseroots, i+1, phases);
674 674 if (minrevphase == -2) /* Error from add_roots_get_min */
675 675 goto release;
676 676 minrevallphases = MIN(minrevallphases, minrevphase);
677 677 }
678 678 /* Propagate the phase information from the roots to the revs */
679 679 if (minrevallphases != -1) {
680 680 int parents[2];
681 681 for (i = minrevallphases; i < len; i++) {
682 682 if (index_get_parents(self, i, parents,
683 683 (int)len - 1) < 0)
684 684 goto release;
685 685 set_phase_from_parents(phases, parents[0], parents[1], i);
686 686 }
687 687 }
688 688 /* Transform phase list to a python list */
689 689 phasessize = PyInt_FromLong(len);
690 690 if (phasessize == NULL)
691 691 goto release;
692 692 for (i = 0; i < len; i++) {
693 693 phase = phases[i];
694 694 /* We only store the sets of phase for non public phase, the public phase
695 695 * is computed as a difference */
696 696 if (phase != 0) {
697 697 phaseset = PyList_GET_ITEM(phasessetlist, phase);
698 698 rev = PyInt_FromLong(i);
699 699 if (rev == NULL)
700 700 goto release;
701 701 PySet_Add(phaseset, rev);
702 702 Py_XDECREF(rev);
703 703 }
704 704 }
705 705 ret = PyTuple_Pack(2, phasessize, phasessetlist);
706 706
707 707 release:
708 708 Py_XDECREF(phasessize);
709 709 Py_XDECREF(phasessetlist);
710 710 done:
711 711 free(phases);
712 712 return ret;
713 713 }
714 714
715 715 static PyObject *index_headrevs(indexObject *self, PyObject *args)
716 716 {
717 717 Py_ssize_t i, j, len;
718 718 char *nothead = NULL;
719 719 PyObject *heads = NULL;
720 720 PyObject *filter = NULL;
721 721 PyObject *filteredrevs = Py_None;
722 722
723 723 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
724 724 return NULL;
725 725 }
726 726
727 727 if (self->headrevs && filteredrevs == self->filteredrevs)
728 728 return list_copy(self->headrevs);
729 729
730 730 Py_DECREF(self->filteredrevs);
731 731 self->filteredrevs = filteredrevs;
732 732 Py_INCREF(filteredrevs);
733 733
734 734 if (filteredrevs != Py_None) {
735 735 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
736 736 if (!filter) {
737 737 PyErr_SetString(PyExc_TypeError,
738 738 "filteredrevs has no attribute __contains__");
739 739 goto bail;
740 740 }
741 741 }
742 742
743 len = index_length(self) - 1;
743 len = index_length(self);
744 744 heads = PyList_New(0);
745 745 if (heads == NULL)
746 746 goto bail;
747 747 if (len == 0) {
748 748 PyObject *nullid = PyInt_FromLong(-1);
749 749 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
750 750 Py_XDECREF(nullid);
751 751 goto bail;
752 752 }
753 753 goto done;
754 754 }
755 755
756 756 nothead = calloc(len, 1);
757 757 if (nothead == NULL) {
758 758 PyErr_NoMemory();
759 759 goto bail;
760 760 }
761 761
762 762 for (i = len - 1; i >= 0; i--) {
763 763 int isfiltered;
764 764 int parents[2];
765 765
766 766 /* If nothead[i] == 1, it means we've seen an unfiltered child of this
767 767 * node already, and therefore this node is not filtered. So we can skip
768 768 * the expensive check_filter step.
769 769 */
770 770 if (nothead[i] != 1) {
771 771 isfiltered = check_filter(filter, i);
772 772 if (isfiltered == -1) {
773 773 PyErr_SetString(PyExc_TypeError,
774 774 "unable to check filter");
775 775 goto bail;
776 776 }
777 777
778 778 if (isfiltered) {
779 779 nothead[i] = 1;
780 780 continue;
781 781 }
782 782 }
783 783
784 784 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
785 785 goto bail;
786 786 for (j = 0; j < 2; j++) {
787 787 if (parents[j] >= 0)
788 788 nothead[parents[j]] = 1;
789 789 }
790 790 }
791 791
792 792 for (i = 0; i < len; i++) {
793 793 PyObject *head;
794 794
795 795 if (nothead[i])
796 796 continue;
797 797 head = PyInt_FromSsize_t(i);
798 798 if (head == NULL || PyList_Append(heads, head) == -1) {
799 799 Py_XDECREF(head);
800 800 goto bail;
801 801 }
802 802 }
803 803
804 804 done:
805 805 self->headrevs = heads;
806 806 Py_XDECREF(filter);
807 807 free(nothead);
808 808 return list_copy(self->headrevs);
809 809 bail:
810 810 Py_XDECREF(filter);
811 811 Py_XDECREF(heads);
812 812 free(nothead);
813 813 return NULL;
814 814 }
815 815
816 816 /**
817 817 * Obtain the base revision index entry.
818 818 *
819 819 * Callers must ensure that rev >= 0 or illegal memory access may occur.
820 820 */
821 821 static inline int index_baserev(indexObject *self, int rev)
822 822 {
823 823 const char *data;
824 824
825 825 if (rev >= self->length - 1) {
826 826 PyObject *tuple = PyList_GET_ITEM(self->added,
827 827 rev - self->length + 1);
828 828 return (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 3));
829 829 }
830 830 else {
831 831 data = index_deref(self, rev);
832 832 if (data == NULL) {
833 833 return -2;
834 834 }
835 835
836 836 return getbe32(data + 16);
837 837 }
838 838 }
839 839
840 840 static PyObject *index_deltachain(indexObject *self, PyObject *args)
841 841 {
842 842 int rev, generaldelta;
843 843 PyObject *stoparg;
844 844 int stoprev, iterrev, baserev = -1;
845 845 int stopped;
846 846 PyObject *chain = NULL, *result = NULL;
847 const Py_ssize_t length = index_length(self);
847 const Py_ssize_t length = index_length(self) + 1;
848 848
849 849 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
850 850 return NULL;
851 851 }
852 852
853 853 if (PyInt_Check(stoparg)) {
854 854 stoprev = (int)PyInt_AsLong(stoparg);
855 855 if (stoprev == -1 && PyErr_Occurred()) {
856 856 return NULL;
857 857 }
858 858 }
859 859 else if (stoparg == Py_None) {
860 860 stoprev = -2;
861 861 }
862 862 else {
863 863 PyErr_SetString(PyExc_ValueError,
864 864 "stoprev must be integer or None");
865 865 return NULL;
866 866 }
867 867
868 868 if (rev < 0 || rev >= length - 1) {
869 869 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
870 870 return NULL;
871 871 }
872 872
873 873 chain = PyList_New(0);
874 874 if (chain == NULL) {
875 875 return NULL;
876 876 }
877 877
878 878 baserev = index_baserev(self, rev);
879 879
880 880 /* This should never happen. */
881 881 if (baserev <= -2) {
882 882 /* Error should be set by index_deref() */
883 883 assert(PyErr_Occurred());
884 884 goto bail;
885 885 }
886 886
887 887 iterrev = rev;
888 888
889 889 while (iterrev != baserev && iterrev != stoprev) {
890 890 PyObject *value = PyInt_FromLong(iterrev);
891 891 if (value == NULL) {
892 892 goto bail;
893 893 }
894 894 if (PyList_Append(chain, value)) {
895 895 Py_DECREF(value);
896 896 goto bail;
897 897 }
898 898 Py_DECREF(value);
899 899
900 900 if (generaldelta) {
901 901 iterrev = baserev;
902 902 }
903 903 else {
904 904 iterrev--;
905 905 }
906 906
907 907 if (iterrev < 0) {
908 908 break;
909 909 }
910 910
911 911 if (iterrev >= length - 1) {
912 912 PyErr_SetString(PyExc_IndexError, "revision outside index");
913 913 return NULL;
914 914 }
915 915
916 916 baserev = index_baserev(self, iterrev);
917 917
918 918 /* This should never happen. */
919 919 if (baserev <= -2) {
920 920 /* Error should be set by index_deref() */
921 921 assert(PyErr_Occurred());
922 922 goto bail;
923 923 }
924 924 }
925 925
926 926 if (iterrev == stoprev) {
927 927 stopped = 1;
928 928 }
929 929 else {
930 930 PyObject *value = PyInt_FromLong(iterrev);
931 931 if (value == NULL) {
932 932 goto bail;
933 933 }
934 934 if (PyList_Append(chain, value)) {
935 935 Py_DECREF(value);
936 936 goto bail;
937 937 }
938 938 Py_DECREF(value);
939 939
940 940 stopped = 0;
941 941 }
942 942
943 943 if (PyList_Reverse(chain)) {
944 944 goto bail;
945 945 }
946 946
947 947 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
948 948 Py_DECREF(chain);
949 949 return result;
950 950
951 951 bail:
952 952 Py_DECREF(chain);
953 953 return NULL;
954 954 }
955 955
956 956 static inline int nt_level(const char *node, Py_ssize_t level)
957 957 {
958 958 int v = node[level>>1];
959 959 if (!(level & 1))
960 960 v >>= 4;
961 961 return v & 0xf;
962 962 }
963 963
964 964 /*
965 965 * Return values:
966 966 *
967 967 * -4: match is ambiguous (multiple candidates)
968 968 * -2: not found
969 969 * rest: valid rev
970 970 */
971 971 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
972 972 int hex)
973 973 {
974 974 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
975 975 int level, maxlevel, off;
976 976
977 977 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
978 978 return -1;
979 979
980 980 if (hex)
981 981 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
982 982 else
983 983 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
984 984
985 985 for (level = off = 0; level < maxlevel; level++) {
986 986 int k = getnybble(node, level);
987 987 nodetree *n = &self->nt[off];
988 988 int v = n->children[k];
989 989
990 990 if (v < 0) {
991 991 const char *n;
992 992 Py_ssize_t i;
993 993
994 994 v = -(v + 2);
995 995 n = index_node(self, v);
996 996 if (n == NULL)
997 997 return -2;
998 998 for (i = level; i < maxlevel; i++)
999 999 if (getnybble(node, i) != nt_level(n, i))
1000 1000 return -2;
1001 1001 return v;
1002 1002 }
1003 1003 if (v == 0)
1004 1004 return -2;
1005 1005 off = v;
1006 1006 }
1007 1007 /* multiple matches against an ambiguous prefix */
1008 1008 return -4;
1009 1009 }
1010 1010
1011 1011 static int nt_new(indexObject *self)
1012 1012 {
1013 1013 if (self->ntlength == self->ntcapacity) {
1014 1014 if (self->ntcapacity >= INT_MAX / (sizeof(nodetree) * 2)) {
1015 1015 PyErr_SetString(PyExc_MemoryError,
1016 1016 "overflow in nt_new");
1017 1017 return -1;
1018 1018 }
1019 1019 self->ntcapacity *= 2;
1020 1020 self->nt = realloc(self->nt,
1021 1021 self->ntcapacity * sizeof(nodetree));
1022 1022 if (self->nt == NULL) {
1023 1023 PyErr_SetString(PyExc_MemoryError, "out of memory");
1024 1024 return -1;
1025 1025 }
1026 1026 memset(&self->nt[self->ntlength], 0,
1027 1027 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
1028 1028 }
1029 1029 return self->ntlength++;
1030 1030 }
1031 1031
1032 1032 static int nt_insert(indexObject *self, const char *node, int rev)
1033 1033 {
1034 1034 int level = 0;
1035 1035 int off = 0;
1036 1036
1037 1037 while (level < 40) {
1038 1038 int k = nt_level(node, level);
1039 1039 nodetree *n;
1040 1040 int v;
1041 1041
1042 1042 n = &self->nt[off];
1043 1043 v = n->children[k];
1044 1044
1045 1045 if (v == 0) {
1046 1046 n->children[k] = -rev - 2;
1047 1047 return 0;
1048 1048 }
1049 1049 if (v < 0) {
1050 1050 const char *oldnode = index_node_existing(self, -(v + 2));
1051 1051 int noff;
1052 1052
1053 1053 if (oldnode == NULL)
1054 1054 return -1;
1055 1055 if (!memcmp(oldnode, node, 20)) {
1056 1056 n->children[k] = -rev - 2;
1057 1057 return 0;
1058 1058 }
1059 1059 noff = nt_new(self);
1060 1060 if (noff == -1)
1061 1061 return -1;
1062 1062 /* self->nt may have been changed by realloc */
1063 1063 self->nt[off].children[k] = noff;
1064 1064 off = noff;
1065 1065 n = &self->nt[off];
1066 1066 n->children[nt_level(oldnode, ++level)] = v;
1067 1067 if (level > self->ntdepth)
1068 1068 self->ntdepth = level;
1069 1069 self->ntsplits += 1;
1070 1070 } else {
1071 1071 level += 1;
1072 1072 off = v;
1073 1073 }
1074 1074 }
1075 1075
1076 1076 return -1;
1077 1077 }
1078 1078
1079 1079 static int nt_delete_node(indexObject *self, const char *node)
1080 1080 {
1081 1081 /* rev==-2 happens to get encoded as 0, which is interpreted as not set */
1082 1082 return nt_insert(self, node, -2);
1083 1083 }
1084 1084
1085 1085 static int nt_init(indexObject *self)
1086 1086 {
1087 1087 if (self->nt == NULL) {
1088 1088 if ((size_t)self->raw_length > INT_MAX / sizeof(nodetree)) {
1089 1089 PyErr_SetString(PyExc_ValueError, "overflow in nt_init");
1090 1090 return -1;
1091 1091 }
1092 1092 self->ntcapacity = self->raw_length < 4
1093 1093 ? 4 : (int)self->raw_length / 2;
1094 1094
1095 1095 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
1096 1096 if (self->nt == NULL) {
1097 1097 PyErr_NoMemory();
1098 1098 return -1;
1099 1099 }
1100 1100 self->ntlength = 1;
1101 self->ntrev = (int)index_length(self) - 1;
1101 self->ntrev = (int)index_length(self);
1102 1102 self->ntlookups = 1;
1103 1103 self->ntmisses = 0;
1104 1104 if (nt_insert(self, nullid, -1) == -1) {
1105 1105 free(self->nt);
1106 1106 self->nt = NULL;
1107 1107 return -1;
1108 1108 }
1109 1109 }
1110 1110 return 0;
1111 1111 }
1112 1112
1113 1113 /*
1114 1114 * Return values:
1115 1115 *
1116 1116 * -3: error (exception set)
1117 1117 * -2: not found (no exception set)
1118 1118 * rest: valid rev
1119 1119 */
1120 1120 static int index_find_node(indexObject *self,
1121 1121 const char *node, Py_ssize_t nodelen)
1122 1122 {
1123 1123 int rev;
1124 1124
1125 1125 if (nt_init(self) == -1)
1126 1126 return -3;
1127 1127
1128 1128 self->ntlookups++;
1129 1129 rev = nt_find(self, node, nodelen, 0);
1130 1130 if (rev >= -1)
1131 1131 return rev;
1132 1132
1133 1133 /*
1134 1134 * For the first handful of lookups, we scan the entire index,
1135 1135 * and cache only the matching nodes. This optimizes for cases
1136 1136 * like "hg tip", where only a few nodes are accessed.
1137 1137 *
1138 1138 * After that, we cache every node we visit, using a single
1139 1139 * scan amortized over multiple lookups. This gives the best
1140 1140 * bulk performance, e.g. for "hg log".
1141 1141 */
1142 1142 if (self->ntmisses++ < 4) {
1143 1143 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1144 1144 const char *n = index_node_existing(self, rev);
1145 1145 if (n == NULL)
1146 1146 return -3;
1147 1147 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1148 1148 if (nt_insert(self, n, rev) == -1)
1149 1149 return -3;
1150 1150 break;
1151 1151 }
1152 1152 }
1153 1153 } else {
1154 1154 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1155 1155 const char *n = index_node_existing(self, rev);
1156 1156 if (n == NULL)
1157 1157 return -3;
1158 1158 if (nt_insert(self, n, rev) == -1) {
1159 1159 self->ntrev = rev + 1;
1160 1160 return -3;
1161 1161 }
1162 1162 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1163 1163 break;
1164 1164 }
1165 1165 }
1166 1166 self->ntrev = rev;
1167 1167 }
1168 1168
1169 1169 if (rev >= 0)
1170 1170 return rev;
1171 1171 return -2;
1172 1172 }
1173 1173
1174 1174 static void raise_revlog_error(void)
1175 1175 {
1176 1176 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
1177 1177
1178 1178 mod = PyImport_ImportModule("mercurial.error");
1179 1179 if (mod == NULL) {
1180 1180 goto cleanup;
1181 1181 }
1182 1182
1183 1183 dict = PyModule_GetDict(mod);
1184 1184 if (dict == NULL) {
1185 1185 goto cleanup;
1186 1186 }
1187 1187 Py_INCREF(dict);
1188 1188
1189 1189 errclass = PyDict_GetItemString(dict, "RevlogError");
1190 1190 if (errclass == NULL) {
1191 1191 PyErr_SetString(PyExc_SystemError,
1192 1192 "could not find RevlogError");
1193 1193 goto cleanup;
1194 1194 }
1195 1195
1196 1196 /* value of exception is ignored by callers */
1197 1197 PyErr_SetString(errclass, "RevlogError");
1198 1198
1199 1199 cleanup:
1200 1200 Py_XDECREF(dict);
1201 1201 Py_XDECREF(mod);
1202 1202 }
1203 1203
1204 1204 static PyObject *index_getitem(indexObject *self, PyObject *value)
1205 1205 {
1206 1206 char *node;
1207 1207 int rev;
1208 1208
1209 1209 if (PyInt_Check(value))
1210 1210 return index_get(self, PyInt_AS_LONG(value));
1211 1211
1212 1212 if (node_check(value, &node) == -1)
1213 1213 return NULL;
1214 1214 rev = index_find_node(self, node, 20);
1215 1215 if (rev >= -1)
1216 1216 return PyInt_FromLong(rev);
1217 1217 if (rev == -2)
1218 1218 raise_revlog_error();
1219 1219 return NULL;
1220 1220 }
1221 1221
1222 1222 /*
1223 1223 * Fully populate the radix tree.
1224 1224 */
1225 1225 static int nt_populate(indexObject *self) {
1226 1226 int rev;
1227 1227 if (self->ntrev > 0) {
1228 1228 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1229 1229 const char *n = index_node_existing(self, rev);
1230 1230 if (n == NULL)
1231 1231 return -1;
1232 1232 if (nt_insert(self, n, rev) == -1)
1233 1233 return -1;
1234 1234 }
1235 1235 self->ntrev = -1;
1236 1236 }
1237 1237 return 0;
1238 1238 }
1239 1239
1240 1240 static int nt_partialmatch(indexObject *self, const char *node,
1241 1241 Py_ssize_t nodelen)
1242 1242 {
1243 1243 if (nt_init(self) == -1)
1244 1244 return -3;
1245 1245 if (nt_populate(self) == -1)
1246 1246 return -3;
1247 1247
1248 1248 return nt_find(self, node, nodelen, 1);
1249 1249 }
1250 1250
1251 1251 /*
1252 1252 * Find the length of the shortest unique prefix of node.
1253 1253 *
1254 1254 * Return values:
1255 1255 *
1256 1256 * -3: error (exception set)
1257 1257 * -2: not found (no exception set)
1258 1258 * rest: length of shortest prefix
1259 1259 */
1260 1260 static int nt_shortest(indexObject *self, const char *node)
1261 1261 {
1262 1262 int level, off;
1263 1263
1264 1264 if (nt_init(self) == -1)
1265 1265 return -3;
1266 1266 if (nt_populate(self) == -1)
1267 1267 return -3;
1268 1268
1269 1269 for (level = off = 0; level < 40; level++) {
1270 1270 int k, v;
1271 1271 nodetree *n = &self->nt[off];
1272 1272 k = nt_level(node, level);
1273 1273 v = n->children[k];
1274 1274 if (v < 0) {
1275 1275 const char *n;
1276 1276 v = -(v + 2);
1277 1277 n = index_node_existing(self, v);
1278 1278 if (n == NULL)
1279 1279 return -3;
1280 1280 if (memcmp(node, n, 20) != 0)
1281 1281 /*
1282 1282 * Found a unique prefix, but it wasn't for the
1283 1283 * requested node (i.e the requested node does
1284 1284 * not exist).
1285 1285 */
1286 1286 return -2;
1287 1287 return level + 1;
1288 1288 }
1289 1289 if (v == 0)
1290 1290 return -2;
1291 1291 off = v;
1292 1292 }
1293 1293 /*
1294 1294 * The node was still not unique after 40 hex digits, so this won't
1295 1295 * happen. Also, if we get here, then there's a programming error in
1296 1296 * this file that made us insert a node longer than 40 hex digits.
1297 1297 */
1298 1298 PyErr_SetString(PyExc_Exception, "broken node tree");
1299 1299 return -3;
1300 1300 }
1301 1301
1302 1302 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1303 1303 {
1304 1304 const char *fullnode;
1305 1305 int nodelen;
1306 1306 char *node;
1307 1307 int rev, i;
1308 1308
1309 1309 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
1310 1310 return NULL;
1311 1311
1312 1312 if (nodelen < 1) {
1313 1313 PyErr_SetString(PyExc_ValueError, "key too short");
1314 1314 return NULL;
1315 1315 }
1316 1316
1317 1317 if (nodelen > 40) {
1318 1318 PyErr_SetString(PyExc_ValueError, "key too long");
1319 1319 return NULL;
1320 1320 }
1321 1321
1322 1322 for (i = 0; i < nodelen; i++)
1323 1323 hexdigit(node, i);
1324 1324 if (PyErr_Occurred()) {
1325 1325 /* input contains non-hex characters */
1326 1326 PyErr_Clear();
1327 1327 Py_RETURN_NONE;
1328 1328 }
1329 1329
1330 1330 rev = nt_partialmatch(self, node, nodelen);
1331 1331
1332 1332 switch (rev) {
1333 1333 case -4:
1334 1334 raise_revlog_error();
1335 1335 case -3:
1336 1336 return NULL;
1337 1337 case -2:
1338 1338 Py_RETURN_NONE;
1339 1339 case -1:
1340 1340 return PyBytes_FromStringAndSize(nullid, 20);
1341 1341 }
1342 1342
1343 1343 fullnode = index_node_existing(self, rev);
1344 1344 if (fullnode == NULL) {
1345 1345 return NULL;
1346 1346 }
1347 1347 return PyBytes_FromStringAndSize(fullnode, 20);
1348 1348 }
1349 1349
1350 1350 static PyObject *index_shortest(indexObject *self, PyObject *args)
1351 1351 {
1352 1352 PyObject *val;
1353 1353 char *node;
1354 1354 int length;
1355 1355
1356 1356 if (!PyArg_ParseTuple(args, "O", &val))
1357 1357 return NULL;
1358 1358 if (node_check(val, &node) == -1)
1359 1359 return NULL;
1360 1360
1361 1361 self->ntlookups++;
1362 1362 length = nt_shortest(self, node);
1363 1363 if (length == -3)
1364 1364 return NULL;
1365 1365 if (length == -2) {
1366 1366 raise_revlog_error();
1367 1367 return NULL;
1368 1368 }
1369 1369 return PyInt_FromLong(length);
1370 1370 }
1371 1371
1372 1372 static PyObject *index_m_get(indexObject *self, PyObject *args)
1373 1373 {
1374 1374 PyObject *val;
1375 1375 char *node;
1376 1376 int rev;
1377 1377
1378 1378 if (!PyArg_ParseTuple(args, "O", &val))
1379 1379 return NULL;
1380 1380 if (node_check(val, &node) == -1)
1381 1381 return NULL;
1382 1382 rev = index_find_node(self, node, 20);
1383 1383 if (rev == -3)
1384 1384 return NULL;
1385 1385 if (rev == -2)
1386 1386 Py_RETURN_NONE;
1387 1387 return PyInt_FromLong(rev);
1388 1388 }
1389 1389
1390 1390 static int index_contains(indexObject *self, PyObject *value)
1391 1391 {
1392 1392 char *node;
1393 1393
1394 1394 if (PyInt_Check(value)) {
1395 1395 long rev = PyInt_AS_LONG(value);
1396 return rev >= -1 && rev < index_length(self);
1396 return rev >= -1 && rev < index_length(self) + 1;
1397 1397 }
1398 1398
1399 1399 if (node_check(value, &node) == -1)
1400 1400 return -1;
1401 1401
1402 1402 switch (index_find_node(self, node, 20)) {
1403 1403 case -3:
1404 1404 return -1;
1405 1405 case -2:
1406 1406 return 0;
1407 1407 default:
1408 1408 return 1;
1409 1409 }
1410 1410 }
1411 1411
1412 1412 typedef uint64_t bitmask;
1413 1413
1414 1414 /*
1415 1415 * Given a disjoint set of revs, return all candidates for the
1416 1416 * greatest common ancestor. In revset notation, this is the set
1417 1417 * "heads(::a and ::b and ...)"
1418 1418 */
1419 1419 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
1420 1420 int revcount)
1421 1421 {
1422 1422 const bitmask allseen = (1ull << revcount) - 1;
1423 1423 const bitmask poison = 1ull << revcount;
1424 1424 PyObject *gca = PyList_New(0);
1425 1425 int i, v, interesting;
1426 1426 int maxrev = -1;
1427 1427 bitmask sp;
1428 1428 bitmask *seen;
1429 1429
1430 1430 if (gca == NULL)
1431 1431 return PyErr_NoMemory();
1432 1432
1433 1433 for (i = 0; i < revcount; i++) {
1434 1434 if (revs[i] > maxrev)
1435 1435 maxrev = revs[i];
1436 1436 }
1437 1437
1438 1438 seen = calloc(sizeof(*seen), maxrev + 1);
1439 1439 if (seen == NULL) {
1440 1440 Py_DECREF(gca);
1441 1441 return PyErr_NoMemory();
1442 1442 }
1443 1443
1444 1444 for (i = 0; i < revcount; i++)
1445 1445 seen[revs[i]] = 1ull << i;
1446 1446
1447 1447 interesting = revcount;
1448 1448
1449 1449 for (v = maxrev; v >= 0 && interesting; v--) {
1450 1450 bitmask sv = seen[v];
1451 1451 int parents[2];
1452 1452
1453 1453 if (!sv)
1454 1454 continue;
1455 1455
1456 1456 if (sv < poison) {
1457 1457 interesting -= 1;
1458 1458 if (sv == allseen) {
1459 1459 PyObject *obj = PyInt_FromLong(v);
1460 1460 if (obj == NULL)
1461 1461 goto bail;
1462 1462 if (PyList_Append(gca, obj) == -1) {
1463 1463 Py_DECREF(obj);
1464 1464 goto bail;
1465 1465 }
1466 1466 sv |= poison;
1467 1467 for (i = 0; i < revcount; i++) {
1468 1468 if (revs[i] == v)
1469 1469 goto done;
1470 1470 }
1471 1471 }
1472 1472 }
1473 1473 if (index_get_parents(self, v, parents, maxrev) < 0)
1474 1474 goto bail;
1475 1475
1476 1476 for (i = 0; i < 2; i++) {
1477 1477 int p = parents[i];
1478 1478 if (p == -1)
1479 1479 continue;
1480 1480 sp = seen[p];
1481 1481 if (sv < poison) {
1482 1482 if (sp == 0) {
1483 1483 seen[p] = sv;
1484 1484 interesting++;
1485 1485 }
1486 1486 else if (sp != sv)
1487 1487 seen[p] |= sv;
1488 1488 } else {
1489 1489 if (sp && sp < poison)
1490 1490 interesting--;
1491 1491 seen[p] = sv;
1492 1492 }
1493 1493 }
1494 1494 }
1495 1495
1496 1496 done:
1497 1497 free(seen);
1498 1498 return gca;
1499 1499 bail:
1500 1500 free(seen);
1501 1501 Py_XDECREF(gca);
1502 1502 return NULL;
1503 1503 }
1504 1504
1505 1505 /*
1506 1506 * Given a disjoint set of revs, return the subset with the longest
1507 1507 * path to the root.
1508 1508 */
1509 1509 static PyObject *find_deepest(indexObject *self, PyObject *revs)
1510 1510 {
1511 1511 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
1512 1512 static const Py_ssize_t capacity = 24;
1513 1513 int *depth, *interesting = NULL;
1514 1514 int i, j, v, ninteresting;
1515 1515 PyObject *dict = NULL, *keys = NULL;
1516 1516 long *seen = NULL;
1517 1517 int maxrev = -1;
1518 1518 long final;
1519 1519
1520 1520 if (revcount > capacity) {
1521 1521 PyErr_Format(PyExc_OverflowError,
1522 1522 "bitset size (%ld) > capacity (%ld)",
1523 1523 (long)revcount, (long)capacity);
1524 1524 return NULL;
1525 1525 }
1526 1526
1527 1527 for (i = 0; i < revcount; i++) {
1528 1528 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1529 1529 if (n > maxrev)
1530 1530 maxrev = n;
1531 1531 }
1532 1532
1533 1533 depth = calloc(sizeof(*depth), maxrev + 1);
1534 1534 if (depth == NULL)
1535 1535 return PyErr_NoMemory();
1536 1536
1537 1537 seen = calloc(sizeof(*seen), maxrev + 1);
1538 1538 if (seen == NULL) {
1539 1539 PyErr_NoMemory();
1540 1540 goto bail;
1541 1541 }
1542 1542
1543 1543 interesting = calloc(sizeof(*interesting), 1 << revcount);
1544 1544 if (interesting == NULL) {
1545 1545 PyErr_NoMemory();
1546 1546 goto bail;
1547 1547 }
1548 1548
1549 1549 if (PyList_Sort(revs) == -1)
1550 1550 goto bail;
1551 1551
1552 1552 for (i = 0; i < revcount; i++) {
1553 1553 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1554 1554 long b = 1l << i;
1555 1555 depth[n] = 1;
1556 1556 seen[n] = b;
1557 1557 interesting[b] = 1;
1558 1558 }
1559 1559
1560 1560 /* invariant: ninteresting is the number of non-zero entries in
1561 1561 * interesting. */
1562 1562 ninteresting = (int)revcount;
1563 1563
1564 1564 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
1565 1565 int dv = depth[v];
1566 1566 int parents[2];
1567 1567 long sv;
1568 1568
1569 1569 if (dv == 0)
1570 1570 continue;
1571 1571
1572 1572 sv = seen[v];
1573 1573 if (index_get_parents(self, v, parents, maxrev) < 0)
1574 1574 goto bail;
1575 1575
1576 1576 for (i = 0; i < 2; i++) {
1577 1577 int p = parents[i];
1578 1578 long sp;
1579 1579 int dp;
1580 1580
1581 1581 if (p == -1)
1582 1582 continue;
1583 1583
1584 1584 dp = depth[p];
1585 1585 sp = seen[p];
1586 1586 if (dp <= dv) {
1587 1587 depth[p] = dv + 1;
1588 1588 if (sp != sv) {
1589 1589 interesting[sv] += 1;
1590 1590 seen[p] = sv;
1591 1591 if (sp) {
1592 1592 interesting[sp] -= 1;
1593 1593 if (interesting[sp] == 0)
1594 1594 ninteresting -= 1;
1595 1595 }
1596 1596 }
1597 1597 }
1598 1598 else if (dv == dp - 1) {
1599 1599 long nsp = sp | sv;
1600 1600 if (nsp == sp)
1601 1601 continue;
1602 1602 seen[p] = nsp;
1603 1603 interesting[sp] -= 1;
1604 1604 if (interesting[sp] == 0)
1605 1605 ninteresting -= 1;
1606 1606 if (interesting[nsp] == 0)
1607 1607 ninteresting += 1;
1608 1608 interesting[nsp] += 1;
1609 1609 }
1610 1610 }
1611 1611 interesting[sv] -= 1;
1612 1612 if (interesting[sv] == 0)
1613 1613 ninteresting -= 1;
1614 1614 }
1615 1615
1616 1616 final = 0;
1617 1617 j = ninteresting;
1618 1618 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
1619 1619 if (interesting[i] == 0)
1620 1620 continue;
1621 1621 final |= i;
1622 1622 j -= 1;
1623 1623 }
1624 1624 if (final == 0) {
1625 1625 keys = PyList_New(0);
1626 1626 goto bail;
1627 1627 }
1628 1628
1629 1629 dict = PyDict_New();
1630 1630 if (dict == NULL)
1631 1631 goto bail;
1632 1632
1633 1633 for (i = 0; i < revcount; i++) {
1634 1634 PyObject *key;
1635 1635
1636 1636 if ((final & (1 << i)) == 0)
1637 1637 continue;
1638 1638
1639 1639 key = PyList_GET_ITEM(revs, i);
1640 1640 Py_INCREF(key);
1641 1641 Py_INCREF(Py_None);
1642 1642 if (PyDict_SetItem(dict, key, Py_None) == -1) {
1643 1643 Py_DECREF(key);
1644 1644 Py_DECREF(Py_None);
1645 1645 goto bail;
1646 1646 }
1647 1647 }
1648 1648
1649 1649 keys = PyDict_Keys(dict);
1650 1650
1651 1651 bail:
1652 1652 free(depth);
1653 1653 free(seen);
1654 1654 free(interesting);
1655 1655 Py_XDECREF(dict);
1656 1656
1657 1657 return keys;
1658 1658 }
1659 1659
1660 1660 /*
1661 1661 * Given a (possibly overlapping) set of revs, return all the
1662 1662 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
1663 1663 */
1664 1664 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
1665 1665 {
1666 1666 PyObject *ret = NULL;
1667 1667 Py_ssize_t argcount, i, len;
1668 1668 bitmask repeat = 0;
1669 1669 int revcount = 0;
1670 1670 int *revs;
1671 1671
1672 1672 argcount = PySequence_Length(args);
1673 1673 revs = PyMem_Malloc(argcount * sizeof(*revs));
1674 1674 if (argcount > 0 && revs == NULL)
1675 1675 return PyErr_NoMemory();
1676 len = index_length(self) - 1;
1676 len = index_length(self);
1677 1677
1678 1678 for (i = 0; i < argcount; i++) {
1679 1679 static const int capacity = 24;
1680 1680 PyObject *obj = PySequence_GetItem(args, i);
1681 1681 bitmask x;
1682 1682 long val;
1683 1683
1684 1684 if (!PyInt_Check(obj)) {
1685 1685 PyErr_SetString(PyExc_TypeError,
1686 1686 "arguments must all be ints");
1687 1687 Py_DECREF(obj);
1688 1688 goto bail;
1689 1689 }
1690 1690 val = PyInt_AsLong(obj);
1691 1691 Py_DECREF(obj);
1692 1692 if (val == -1) {
1693 1693 ret = PyList_New(0);
1694 1694 goto done;
1695 1695 }
1696 1696 if (val < 0 || val >= len) {
1697 1697 PyErr_SetString(PyExc_IndexError,
1698 1698 "index out of range");
1699 1699 goto bail;
1700 1700 }
1701 1701 /* this cheesy bloom filter lets us avoid some more
1702 1702 * expensive duplicate checks in the common set-is-disjoint
1703 1703 * case */
1704 1704 x = 1ull << (val & 0x3f);
1705 1705 if (repeat & x) {
1706 1706 int k;
1707 1707 for (k = 0; k < revcount; k++) {
1708 1708 if (val == revs[k])
1709 1709 goto duplicate;
1710 1710 }
1711 1711 }
1712 1712 else repeat |= x;
1713 1713 if (revcount >= capacity) {
1714 1714 PyErr_Format(PyExc_OverflowError,
1715 1715 "bitset size (%d) > capacity (%d)",
1716 1716 revcount, capacity);
1717 1717 goto bail;
1718 1718 }
1719 1719 revs[revcount++] = (int)val;
1720 1720 duplicate:;
1721 1721 }
1722 1722
1723 1723 if (revcount == 0) {
1724 1724 ret = PyList_New(0);
1725 1725 goto done;
1726 1726 }
1727 1727 if (revcount == 1) {
1728 1728 PyObject *obj;
1729 1729 ret = PyList_New(1);
1730 1730 if (ret == NULL)
1731 1731 goto bail;
1732 1732 obj = PyInt_FromLong(revs[0]);
1733 1733 if (obj == NULL)
1734 1734 goto bail;
1735 1735 PyList_SET_ITEM(ret, 0, obj);
1736 1736 goto done;
1737 1737 }
1738 1738
1739 1739 ret = find_gca_candidates(self, revs, revcount);
1740 1740 if (ret == NULL)
1741 1741 goto bail;
1742 1742
1743 1743 done:
1744 1744 PyMem_Free(revs);
1745 1745 return ret;
1746 1746
1747 1747 bail:
1748 1748 PyMem_Free(revs);
1749 1749 Py_XDECREF(ret);
1750 1750 return NULL;
1751 1751 }
1752 1752
1753 1753 /*
1754 1754 * Given a (possibly overlapping) set of revs, return the greatest
1755 1755 * common ancestors: those with the longest path to the root.
1756 1756 */
1757 1757 static PyObject *index_ancestors(indexObject *self, PyObject *args)
1758 1758 {
1759 1759 PyObject *ret;
1760 1760 PyObject *gca = index_commonancestorsheads(self, args);
1761 1761 if (gca == NULL)
1762 1762 return NULL;
1763 1763
1764 1764 if (PyList_GET_SIZE(gca) <= 1) {
1765 1765 return gca;
1766 1766 }
1767 1767
1768 1768 ret = find_deepest(self, gca);
1769 1769 Py_DECREF(gca);
1770 1770 return ret;
1771 1771 }
1772 1772
1773 1773 /*
1774 1774 * Invalidate any trie entries introduced by added revs.
1775 1775 */
1776 1776 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
1777 1777 {
1778 1778 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
1779 1779
1780 1780 for (i = start; i < len; i++) {
1781 1781 PyObject *tuple = PyList_GET_ITEM(self->added, i);
1782 1782 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
1783 1783
1784 1784 nt_delete_node(self, PyBytes_AS_STRING(node));
1785 1785 }
1786 1786
1787 1787 if (start == 0)
1788 1788 Py_CLEAR(self->added);
1789 1789 }
1790 1790
1791 1791 /*
1792 1792 * Delete a numeric range of revs, which must be at the end of the
1793 1793 * range, but exclude the sentinel nullid entry.
1794 1794 */
1795 1795 static int index_slice_del(indexObject *self, PyObject *item)
1796 1796 {
1797 1797 Py_ssize_t start, stop, step, slicelength;
1798 Py_ssize_t length = index_length(self);
1798 Py_ssize_t length = index_length(self) + 1;
1799 1799 int ret = 0;
1800 1800
1801 1801 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
1802 1802 #ifdef IS_PY3K
1803 1803 if (PySlice_GetIndicesEx(item, length,
1804 1804 #else
1805 1805 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
1806 1806 #endif
1807 1807 &start, &stop, &step, &slicelength) < 0)
1808 1808 return -1;
1809 1809
1810 1810 if (slicelength <= 0)
1811 1811 return 0;
1812 1812
1813 1813 if ((step < 0 && start < stop) || (step > 0 && start > stop))
1814 1814 stop = start;
1815 1815
1816 1816 if (step < 0) {
1817 1817 stop = start + 1;
1818 1818 start = stop + step*(slicelength - 1) - 1;
1819 1819 step = -step;
1820 1820 }
1821 1821
1822 1822 if (step != 1) {
1823 1823 PyErr_SetString(PyExc_ValueError,
1824 1824 "revlog index delete requires step size of 1");
1825 1825 return -1;
1826 1826 }
1827 1827
1828 1828 if (stop != length - 1) {
1829 1829 PyErr_SetString(PyExc_IndexError,
1830 1830 "revlog index deletion indices are invalid");
1831 1831 return -1;
1832 1832 }
1833 1833
1834 1834 if (start < self->length - 1) {
1835 1835 if (self->nt) {
1836 1836 Py_ssize_t i;
1837 1837
1838 1838 for (i = start + 1; i < self->length - 1; i++) {
1839 1839 const char *node = index_node_existing(self, i);
1840 1840 if (node == NULL)
1841 1841 return -1;
1842 1842
1843 1843 nt_delete_node(self, node);
1844 1844 }
1845 1845 if (self->added)
1846 1846 nt_invalidate_added(self, 0);
1847 1847 if (self->ntrev > start)
1848 1848 self->ntrev = (int)start;
1849 1849 }
1850 1850 self->length = start + 1;
1851 1851 if (start < self->raw_length) {
1852 1852 if (self->cache) {
1853 1853 Py_ssize_t i;
1854 1854 for (i = start; i < self->raw_length; i++)
1855 1855 Py_CLEAR(self->cache[i]);
1856 1856 }
1857 1857 self->raw_length = start;
1858 1858 }
1859 1859 goto done;
1860 1860 }
1861 1861
1862 1862 if (self->nt) {
1863 1863 nt_invalidate_added(self, start - self->length + 1);
1864 1864 if (self->ntrev > start)
1865 1865 self->ntrev = (int)start;
1866 1866 }
1867 1867 if (self->added)
1868 1868 ret = PyList_SetSlice(self->added, start - self->length + 1,
1869 1869 PyList_GET_SIZE(self->added), NULL);
1870 1870 done:
1871 1871 Py_CLEAR(self->headrevs);
1872 1872 return ret;
1873 1873 }
1874 1874
1875 1875 /*
1876 1876 * Supported ops:
1877 1877 *
1878 1878 * slice deletion
1879 1879 * string assignment (extend node->rev mapping)
1880 1880 * string deletion (shrink node->rev mapping)
1881 1881 */
1882 1882 static int index_assign_subscript(indexObject *self, PyObject *item,
1883 1883 PyObject *value)
1884 1884 {
1885 1885 char *node;
1886 1886 long rev;
1887 1887
1888 1888 if (PySlice_Check(item) && value == NULL)
1889 1889 return index_slice_del(self, item);
1890 1890
1891 1891 if (node_check(item, &node) == -1)
1892 1892 return -1;
1893 1893
1894 1894 if (value == NULL)
1895 1895 return self->nt ? nt_delete_node(self, node) : 0;
1896 1896 rev = PyInt_AsLong(value);
1897 1897 if (rev > INT_MAX || rev < 0) {
1898 1898 if (!PyErr_Occurred())
1899 1899 PyErr_SetString(PyExc_ValueError, "rev out of range");
1900 1900 return -1;
1901 1901 }
1902 1902
1903 1903 if (nt_init(self) == -1)
1904 1904 return -1;
1905 1905 return nt_insert(self, node, (int)rev);
1906 1906 }
1907 1907
1908 1908 /*
1909 1909 * Find all RevlogNG entries in an index that has inline data. Update
1910 1910 * the optional "offsets" table with those entries.
1911 1911 */
1912 1912 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
1913 1913 {
1914 1914 const char *data = (const char *)self->buf.buf;
1915 1915 Py_ssize_t pos = 0;
1916 1916 Py_ssize_t end = self->buf.len;
1917 1917 long incr = v1_hdrsize;
1918 1918 Py_ssize_t len = 0;
1919 1919
1920 1920 while (pos + v1_hdrsize <= end && pos >= 0) {
1921 1921 uint32_t comp_len;
1922 1922 /* 3rd element of header is length of compressed inline data */
1923 1923 comp_len = getbe32(data + pos + 8);
1924 1924 incr = v1_hdrsize + comp_len;
1925 1925 if (offsets)
1926 1926 offsets[len] = data + pos;
1927 1927 len++;
1928 1928 pos += incr;
1929 1929 }
1930 1930
1931 1931 if (pos != end) {
1932 1932 if (!PyErr_Occurred())
1933 1933 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1934 1934 return -1;
1935 1935 }
1936 1936
1937 1937 return len;
1938 1938 }
1939 1939
1940 1940 static int index_init(indexObject *self, PyObject *args)
1941 1941 {
1942 1942 PyObject *data_obj, *inlined_obj;
1943 1943 Py_ssize_t size;
1944 1944
1945 1945 /* Initialize before argument-checking to avoid index_dealloc() crash. */
1946 1946 self->raw_length = 0;
1947 1947 self->added = NULL;
1948 1948 self->cache = NULL;
1949 1949 self->data = NULL;
1950 1950 memset(&self->buf, 0, sizeof(self->buf));
1951 1951 self->headrevs = NULL;
1952 1952 self->filteredrevs = Py_None;
1953 1953 Py_INCREF(Py_None);
1954 1954 self->nt = NULL;
1955 1955 self->offsets = NULL;
1956 1956
1957 1957 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
1958 1958 return -1;
1959 1959 if (!PyObject_CheckBuffer(data_obj)) {
1960 1960 PyErr_SetString(PyExc_TypeError,
1961 1961 "data does not support buffer interface");
1962 1962 return -1;
1963 1963 }
1964 1964
1965 1965 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
1966 1966 return -1;
1967 1967 size = self->buf.len;
1968 1968
1969 1969 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
1970 1970 self->data = data_obj;
1971 1971
1972 1972 self->ntlength = self->ntcapacity = 0;
1973 1973 self->ntdepth = self->ntsplits = 0;
1974 1974 self->ntlookups = self->ntmisses = 0;
1975 1975 self->ntrev = -1;
1976 1976 Py_INCREF(self->data);
1977 1977
1978 1978 if (self->inlined) {
1979 1979 Py_ssize_t len = inline_scan(self, NULL);
1980 1980 if (len == -1)
1981 1981 goto bail;
1982 1982 self->raw_length = len;
1983 1983 self->length = len + 1;
1984 1984 } else {
1985 1985 if (size % v1_hdrsize) {
1986 1986 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1987 1987 goto bail;
1988 1988 }
1989 1989 self->raw_length = size / v1_hdrsize;
1990 1990 self->length = self->raw_length + 1;
1991 1991 }
1992 1992
1993 1993 return 0;
1994 1994 bail:
1995 1995 return -1;
1996 1996 }
1997 1997
1998 1998 static PyObject *index_nodemap(indexObject *self)
1999 1999 {
2000 2000 Py_INCREF(self);
2001 2001 return (PyObject *)self;
2002 2002 }
2003 2003
2004 2004 static void index_dealloc(indexObject *self)
2005 2005 {
2006 2006 _index_clearcaches(self);
2007 2007 Py_XDECREF(self->filteredrevs);
2008 2008 if (self->buf.buf) {
2009 2009 PyBuffer_Release(&self->buf);
2010 2010 memset(&self->buf, 0, sizeof(self->buf));
2011 2011 }
2012 2012 Py_XDECREF(self->data);
2013 2013 Py_XDECREF(self->added);
2014 2014 PyObject_Del(self);
2015 2015 }
2016 2016
2017 2017 static PySequenceMethods index_sequence_methods = {
2018 2018 (lenfunc)index_length, /* sq_length */
2019 2019 0, /* sq_concat */
2020 2020 0, /* sq_repeat */
2021 2021 (ssizeargfunc)index_get, /* sq_item */
2022 2022 0, /* sq_slice */
2023 2023 0, /* sq_ass_item */
2024 2024 0, /* sq_ass_slice */
2025 2025 (objobjproc)index_contains, /* sq_contains */
2026 2026 };
2027 2027
2028 2028 static PyMappingMethods index_mapping_methods = {
2029 2029 (lenfunc)index_length, /* mp_length */
2030 2030 (binaryfunc)index_getitem, /* mp_subscript */
2031 2031 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
2032 2032 };
2033 2033
2034 2034 static PyMethodDef index_methods[] = {
2035 2035 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
2036 2036 "return the gca set of the given revs"},
2037 2037 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
2038 2038 METH_VARARGS,
2039 2039 "return the heads of the common ancestors of the given revs"},
2040 2040 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
2041 2041 "clear the index caches"},
2042 2042 {"get", (PyCFunction)index_m_get, METH_VARARGS,
2043 2043 "get an index entry"},
2044 2044 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets,
2045 2045 METH_VARARGS, "compute phases"},
2046 2046 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
2047 2047 "reachableroots"},
2048 2048 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
2049 2049 "get head revisions"}, /* Can do filtering since 3.2 */
2050 2050 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
2051 2051 "get filtered head revisions"}, /* Can always do filtering */
2052 2052 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
2053 2053 "determine revisions with deltas to reconstruct fulltext"},
2054 2054 {"append", (PyCFunction)index_append, METH_O,
2055 2055 "append an index entry"},
2056 2056 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2057 2057 "match a potentially ambiguous node ID"},
2058 2058 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
2059 2059 "find length of shortest hex nodeid of a binary ID"},
2060 2060 {"stats", (PyCFunction)index_stats, METH_NOARGS,
2061 2061 "stats for the index"},
2062 2062 {NULL} /* Sentinel */
2063 2063 };
2064 2064
2065 2065 static PyGetSetDef index_getset[] = {
2066 2066 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2067 2067 {NULL} /* Sentinel */
2068 2068 };
2069 2069
2070 2070 static PyTypeObject indexType = {
2071 2071 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2072 2072 "parsers.index", /* tp_name */
2073 2073 sizeof(indexObject), /* tp_basicsize */
2074 2074 0, /* tp_itemsize */
2075 2075 (destructor)index_dealloc, /* tp_dealloc */
2076 2076 0, /* tp_print */
2077 2077 0, /* tp_getattr */
2078 2078 0, /* tp_setattr */
2079 2079 0, /* tp_compare */
2080 2080 0, /* tp_repr */
2081 2081 0, /* tp_as_number */
2082 2082 &index_sequence_methods, /* tp_as_sequence */
2083 2083 &index_mapping_methods, /* tp_as_mapping */
2084 2084 0, /* tp_hash */
2085 2085 0, /* tp_call */
2086 2086 0, /* tp_str */
2087 2087 0, /* tp_getattro */
2088 2088 0, /* tp_setattro */
2089 2089 0, /* tp_as_buffer */
2090 2090 Py_TPFLAGS_DEFAULT, /* tp_flags */
2091 2091 "revlog index", /* tp_doc */
2092 2092 0, /* tp_traverse */
2093 2093 0, /* tp_clear */
2094 2094 0, /* tp_richcompare */
2095 2095 0, /* tp_weaklistoffset */
2096 2096 0, /* tp_iter */
2097 2097 0, /* tp_iternext */
2098 2098 index_methods, /* tp_methods */
2099 2099 0, /* tp_members */
2100 2100 index_getset, /* tp_getset */
2101 2101 0, /* tp_base */
2102 2102 0, /* tp_dict */
2103 2103 0, /* tp_descr_get */
2104 2104 0, /* tp_descr_set */
2105 2105 0, /* tp_dictoffset */
2106 2106 (initproc)index_init, /* tp_init */
2107 2107 0, /* tp_alloc */
2108 2108 };
2109 2109
2110 2110 /*
2111 2111 * returns a tuple of the form (index, index, cache) with elements as
2112 2112 * follows:
2113 2113 *
2114 2114 * index: an index object that lazily parses RevlogNG records
2115 2115 * cache: if data is inlined, a tuple (0, index_file_content), else None
2116 2116 * index_file_content could be a string, or a buffer
2117 2117 *
2118 2118 * added complications are for backwards compatibility
2119 2119 */
2120 2120 PyObject *parse_index2(PyObject *self, PyObject *args)
2121 2121 {
2122 2122 PyObject *tuple = NULL, *cache = NULL;
2123 2123 indexObject *idx;
2124 2124 int ret;
2125 2125
2126 2126 idx = PyObject_New(indexObject, &indexType);
2127 2127 if (idx == NULL)
2128 2128 goto bail;
2129 2129
2130 2130 ret = index_init(idx, args);
2131 2131 if (ret == -1)
2132 2132 goto bail;
2133 2133
2134 2134 if (idx->inlined) {
2135 2135 cache = Py_BuildValue("iO", 0, idx->data);
2136 2136 if (cache == NULL)
2137 2137 goto bail;
2138 2138 } else {
2139 2139 cache = Py_None;
2140 2140 Py_INCREF(cache);
2141 2141 }
2142 2142
2143 2143 tuple = Py_BuildValue("NN", idx, cache);
2144 2144 if (!tuple)
2145 2145 goto bail;
2146 2146 return tuple;
2147 2147
2148 2148 bail:
2149 2149 Py_XDECREF(idx);
2150 2150 Py_XDECREF(cache);
2151 2151 Py_XDECREF(tuple);
2152 2152 return NULL;
2153 2153 }
2154 2154
2155 2155 void revlog_module_init(PyObject *mod)
2156 2156 {
2157 2157 indexType.tp_new = PyType_GenericNew;
2158 2158 if (PyType_Ready(&indexType) < 0)
2159 2159 return;
2160 2160 Py_INCREF(&indexType);
2161 2161 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
2162 2162
2163 2163 nullentry = Py_BuildValue(PY23("iiiiiiis#", "iiiiiiiy#"), 0, 0, 0,
2164 2164 -1, -1, -1, -1, nullid, 20);
2165 2165 if (nullentry)
2166 2166 PyObject_GC_UnTrack(nullentry);
2167 2167 }
@@ -1,109 +1,109 b''
1 1 # policy.py - module policy logic for Mercurial.
2 2 #
3 3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import sys
12 12
13 13 # Rules for how modules can be loaded. Values are:
14 14 #
15 15 # c - require C extensions
16 16 # allow - allow pure Python implementation when C loading fails
17 17 # cffi - required cffi versions (implemented within pure module)
18 18 # cffi-allow - allow pure Python implementation if cffi version is missing
19 19 # py - only load pure Python modules
20 20 #
21 21 # By default, fall back to the pure modules so the in-place build can
22 22 # run without recompiling the C extensions. This will be overridden by
23 23 # __modulepolicy__ generated by setup.py.
24 24 policy = b'allow'
25 25 _packageprefs = {
26 26 # policy: (versioned package, pure package)
27 27 b'c': (r'cext', None),
28 28 b'allow': (r'cext', r'pure'),
29 29 b'cffi': (r'cffi', None),
30 30 b'cffi-allow': (r'cffi', r'pure'),
31 31 b'py': (None, r'pure'),
32 32 }
33 33
34 34 try:
35 35 from . import __modulepolicy__
36 36 policy = __modulepolicy__.modulepolicy
37 37 except ImportError:
38 38 pass
39 39
40 40 # PyPy doesn't load C extensions.
41 41 #
42 42 # The canonical way to do this is to test platform.python_implementation().
43 43 # But we don't import platform and don't bloat for it here.
44 44 if r'__pypy__' in sys.builtin_module_names:
45 45 policy = b'cffi'
46 46
47 47 # Environment variable can always force settings.
48 48 if sys.version_info[0] >= 3:
49 49 if r'HGMODULEPOLICY' in os.environ:
50 50 policy = os.environ[r'HGMODULEPOLICY'].encode(r'utf-8')
51 51 else:
52 52 policy = os.environ.get(r'HGMODULEPOLICY', policy)
53 53
54 54 def _importfrom(pkgname, modname):
55 55 # from .<pkgname> import <modname> (where . is looked through this module)
56 56 fakelocals = {}
57 57 pkg = __import__(pkgname, globals(), fakelocals, [modname], level=1)
58 58 try:
59 59 fakelocals[modname] = mod = getattr(pkg, modname)
60 60 except AttributeError:
61 61 raise ImportError(r'cannot import name %s' % modname)
62 62 # force import; fakelocals[modname] may be replaced with the real module
63 63 getattr(mod, r'__doc__', None)
64 64 return fakelocals[modname]
65 65
66 66 # keep in sync with "version" in C modules
67 67 _cextversions = {
68 68 (r'cext', r'base85'): 1,
69 69 (r'cext', r'bdiff'): 3,
70 70 (r'cext', r'mpatch'): 1,
71 71 (r'cext', r'osutil'): 4,
72 (r'cext', r'parsers'): 6,
72 (r'cext', r'parsers'): 7,
73 73 }
74 74
75 75 # map import request to other package or module
76 76 _modredirects = {
77 77 (r'cext', r'charencode'): (r'cext', r'parsers'),
78 78 (r'cffi', r'base85'): (r'pure', r'base85'),
79 79 (r'cffi', r'charencode'): (r'pure', r'charencode'),
80 80 (r'cffi', r'parsers'): (r'pure', r'parsers'),
81 81 }
82 82
83 83 def _checkmod(pkgname, modname, mod):
84 84 expected = _cextversions.get((pkgname, modname))
85 85 actual = getattr(mod, r'version', None)
86 86 if actual != expected:
87 87 raise ImportError(r'cannot import module %s.%s '
88 88 r'(expected version: %d, actual: %r)'
89 89 % (pkgname, modname, expected, actual))
90 90
91 91 def importmod(modname):
92 92 """Import module according to policy and check API version"""
93 93 try:
94 94 verpkg, purepkg = _packageprefs[policy]
95 95 except KeyError:
96 96 raise ImportError(r'invalid HGMODULEPOLICY %r' % policy)
97 97 assert verpkg or purepkg
98 98 if verpkg:
99 99 pn, mn = _modredirects.get((verpkg, modname), (verpkg, modname))
100 100 try:
101 101 mod = _importfrom(pn, mn)
102 102 if pn == verpkg:
103 103 _checkmod(pn, mn, mod)
104 104 return mod
105 105 except ImportError:
106 106 if not purepkg:
107 107 raise
108 108 pn, mn = _modredirects.get((purepkg, modname), (purepkg, modname))
109 109 return _importfrom(pn, mn)
@@ -1,176 +1,176 b''
1 1 # parsers.py - Python implementation of parsers.c
2 2 #
3 3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import struct
11 11 import zlib
12 12
13 13 from ..node import nullid
14 14 from .. import pycompat
15 15 stringio = pycompat.bytesio
16 16
17 17
18 18 _pack = struct.pack
19 19 _unpack = struct.unpack
20 20 _compress = zlib.compress
21 21 _decompress = zlib.decompress
22 22
23 23 # Some code below makes tuples directly because it's more convenient. However,
24 24 # code outside this module should always use dirstatetuple.
25 25 def dirstatetuple(*x):
26 26 # x is a tuple
27 27 return x
28 28
29 29 indexformatng = ">Qiiiiii20s12x"
30 30 indexfirst = struct.calcsize('Q')
31 31 sizeint = struct.calcsize('i')
32 32 indexsize = struct.calcsize(indexformatng)
33 33
34 34 def gettype(q):
35 35 return int(q & 0xFFFF)
36 36
37 37 def offset_type(offset, type):
38 38 return int(int(offset) << 16 | type)
39 39
40 40 class BaseIndexObject(object):
41 41 def __len__(self):
42 return self._lgt + len(self._extra) + 1
42 return self._lgt + len(self._extra)
43 43
44 44 def append(self, tup):
45 45 self._extra.append(tup)
46 46
47 47 def _fix_index(self, i):
48 48 if not isinstance(i, int):
49 49 raise TypeError("expecting int indexes")
50 if i < 0 or i >= len(self):
50 if i < 0 or i >= len(self) + 1:
51 51 raise IndexError
52 52 return i
53 53
54 54 def __getitem__(self, i):
55 if i == -1 or i == len(self) - 1:
55 if i == -1 or i == len(self):
56 56 return (0, 0, 0, -1, -1, -1, -1, nullid)
57 57 i = self._fix_index(i)
58 58 if i >= self._lgt:
59 59 return self._extra[i - self._lgt]
60 60 index = self._calculate_index(i)
61 61 r = struct.unpack(indexformatng, self._data[index:index + indexsize])
62 62 if i == 0:
63 63 e = list(r)
64 64 type = gettype(e[0])
65 65 e[0] = offset_type(0, type)
66 66 return tuple(e)
67 67 return r
68 68
69 69 class IndexObject(BaseIndexObject):
70 70 def __init__(self, data):
71 71 assert len(data) % indexsize == 0
72 72 self._data = data
73 73 self._lgt = len(data) // indexsize
74 74 self._extra = []
75 75
76 76 def _calculate_index(self, i):
77 77 return i * indexsize
78 78
79 79 def __delitem__(self, i):
80 80 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
81 81 raise ValueError("deleting slices only supports a:-1 with step 1")
82 82 i = self._fix_index(i.start)
83 83 if i < self._lgt:
84 84 self._data = self._data[:i * indexsize]
85 85 self._lgt = i
86 86 self._extra = []
87 87 else:
88 88 self._extra = self._extra[:i - self._lgt]
89 89
90 90 class InlinedIndexObject(BaseIndexObject):
91 91 def __init__(self, data, inline=0):
92 92 self._data = data
93 93 self._lgt = self._inline_scan(None)
94 94 self._inline_scan(self._lgt)
95 95 self._extra = []
96 96
97 97 def _inline_scan(self, lgt):
98 98 off = 0
99 99 if lgt is not None:
100 100 self._offsets = [0] * lgt
101 101 count = 0
102 102 while off <= len(self._data) - indexsize:
103 103 s, = struct.unpack('>i',
104 104 self._data[off + indexfirst:off + sizeint + indexfirst])
105 105 if lgt is not None:
106 106 self._offsets[count] = off
107 107 count += 1
108 108 off += indexsize + s
109 109 if off != len(self._data):
110 110 raise ValueError("corrupted data")
111 111 return count
112 112
113 113 def __delitem__(self, i):
114 114 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
115 115 raise ValueError("deleting slices only supports a:-1 with step 1")
116 116 i = self._fix_index(i.start)
117 117 if i < self._lgt:
118 118 self._offsets = self._offsets[:i]
119 119 self._lgt = i
120 120 self._extra = []
121 121 else:
122 122 self._extra = self._extra[:i - self._lgt]
123 123
124 124 def _calculate_index(self, i):
125 125 return self._offsets[i]
126 126
127 127 def parse_index2(data, inline):
128 128 if not inline:
129 129 return IndexObject(data), None
130 130 return InlinedIndexObject(data, inline), (0, data)
131 131
132 132 def parse_dirstate(dmap, copymap, st):
133 133 parents = [st[:20], st[20: 40]]
134 134 # dereference fields so they will be local in loop
135 135 format = ">cllll"
136 136 e_size = struct.calcsize(format)
137 137 pos1 = 40
138 138 l = len(st)
139 139
140 140 # the inner loop
141 141 while pos1 < l:
142 142 pos2 = pos1 + e_size
143 143 e = _unpack(">cllll", st[pos1:pos2]) # a literal here is faster
144 144 pos1 = pos2 + e[4]
145 145 f = st[pos2:pos1]
146 146 if '\0' in f:
147 147 f, c = f.split('\0')
148 148 copymap[f] = c
149 149 dmap[f] = e[:4]
150 150 return parents
151 151
152 152 def pack_dirstate(dmap, copymap, pl, now):
153 153 now = int(now)
154 154 cs = stringio()
155 155 write = cs.write
156 156 write("".join(pl))
157 157 for f, e in dmap.iteritems():
158 158 if e[0] == 'n' and e[3] == now:
159 159 # The file was last modified "simultaneously" with the current
160 160 # write to dirstate (i.e. within the same second for file-
161 161 # systems with a granularity of 1 sec). This commonly happens
162 162 # for at least a couple of files on 'update'.
163 163 # The user could change the file without changing its size
164 164 # within the same second. Invalidate the file's mtime in
165 165 # dirstate, forcing future 'status' calls to compare the
166 166 # contents of the file if the size is the same. This prevents
167 167 # mistakenly treating such files as clean.
168 168 e = dirstatetuple(e[0], e[1], e[2], -1)
169 169 dmap[f] = e
170 170
171 171 if f in copymap:
172 172 f = "%s\0%s" % (f, copymap[f])
173 173 e = _pack(">cllll", e[0], e[1], e[2], e[3], len(f))
174 174 write(e)
175 175 write(f)
176 176 return cs.getvalue()
@@ -1,268 +1,268 b''
1 1 # repoview.py - Filtered view of a localrepo object
2 2 #
3 3 # Copyright 2012 Pierre-Yves David <pierre-yves.david@ens-lyon.org>
4 4 # Logilab SA <contact@logilab.fr>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 import copy
12 12 import weakref
13 13
14 14 from .node import nullrev
15 15 from . import (
16 16 obsolete,
17 17 phases,
18 18 pycompat,
19 19 tags as tagsmod,
20 20 )
21 21
22 22 def hideablerevs(repo):
23 23 """Revision candidates to be hidden
24 24
25 25 This is a standalone function to allow extensions to wrap it.
26 26
27 27 Because we use the set of immutable changesets as a fallback subset in
28 28 branchmap (see mercurial.branchmap.subsettable), you cannot set "public"
29 29 changesets as "hideable". Doing so would break multiple code assertions and
30 30 lead to crashes."""
31 31 return obsolete.getrevs(repo, 'obsolete')
32 32
33 33 def pinnedrevs(repo):
34 34 """revisions blocking hidden changesets from being filtered
35 35 """
36 36
37 37 cl = repo.changelog
38 38 pinned = set()
39 39 pinned.update([par.rev() for par in repo[None].parents()])
40 40 pinned.update([cl.rev(bm) for bm in repo._bookmarks.values()])
41 41
42 42 tags = {}
43 43 tagsmod.readlocaltags(repo.ui, repo, tags, {})
44 44 if tags:
45 45 rev, nodemap = cl.rev, cl.nodemap
46 46 pinned.update(rev(t[0]) for t in tags.values() if t[0] in nodemap)
47 47 return pinned
48 48
49 49
50 50 def _revealancestors(pfunc, hidden, revs):
51 51 """reveals contiguous chains of hidden ancestors of 'revs' by removing them
52 52 from 'hidden'
53 53
54 54 - pfunc(r): a funtion returning parent of 'r',
55 55 - hidden: the (preliminary) hidden revisions, to be updated
56 56 - revs: iterable of revnum,
57 57
58 58 (Ancestors are revealed exclusively, i.e. the elements in 'revs' are
59 59 *not* revealed)
60 60 """
61 61 stack = list(revs)
62 62 while stack:
63 63 for p in pfunc(stack.pop()):
64 64 if p != nullrev and p in hidden:
65 65 hidden.remove(p)
66 66 stack.append(p)
67 67
68 68 def computehidden(repo, visibilityexceptions=None):
69 69 """compute the set of hidden revision to filter
70 70
71 71 During most operation hidden should be filtered."""
72 72 assert not repo.changelog.filteredrevs
73 73
74 74 hidden = hideablerevs(repo)
75 75 if hidden:
76 76 hidden = set(hidden - pinnedrevs(repo))
77 77 if visibilityexceptions:
78 78 hidden -= visibilityexceptions
79 79 pfunc = repo.changelog.parentrevs
80 80 mutable = repo._phasecache.getrevset(repo, phases.mutablephases)
81 81
82 82 visible = mutable - hidden
83 83 _revealancestors(pfunc, hidden, visible)
84 84 return frozenset(hidden)
85 85
86 86 def computeunserved(repo, visibilityexceptions=None):
87 87 """compute the set of revision that should be filtered when used a server
88 88
89 89 Secret and hidden changeset should not pretend to be here."""
90 90 assert not repo.changelog.filteredrevs
91 91 # fast path in simple case to avoid impact of non optimised code
92 92 hiddens = filterrevs(repo, 'visible')
93 93 if phases.hassecret(repo):
94 94 secrets = repo._phasecache.getrevset(repo, phases.remotehiddenphases)
95 95 return frozenset(hiddens | frozenset(secrets))
96 96 else:
97 97 return hiddens
98 98
99 99 def computemutable(repo, visibilityexceptions=None):
100 100 assert not repo.changelog.filteredrevs
101 101 # fast check to avoid revset call on huge repo
102 102 if any(repo._phasecache.phaseroots[1:]):
103 103 getphase = repo._phasecache.phase
104 104 maymutable = filterrevs(repo, 'base')
105 105 return frozenset(r for r in maymutable if getphase(repo, r))
106 106 return frozenset()
107 107
108 108 def computeimpactable(repo, visibilityexceptions=None):
109 109 """Everything impactable by mutable revision
110 110
111 111 The immutable filter still have some chance to get invalidated. This will
112 112 happen when:
113 113
114 114 - you garbage collect hidden changeset,
115 115 - public phase is moved backward,
116 116 - something is changed in the filtering (this could be fixed)
117 117
118 118 This filter out any mutable changeset and any public changeset that may be
119 119 impacted by something happening to a mutable revision.
120 120
121 121 This is achieved by filtered everything with a revision number egal or
122 122 higher than the first mutable changeset is filtered."""
123 123 assert not repo.changelog.filteredrevs
124 124 cl = repo.changelog
125 125 firstmutable = len(cl)
126 126 for roots in repo._phasecache.phaseroots[1:]:
127 127 if roots:
128 128 firstmutable = min(firstmutable, min(cl.rev(r) for r in roots))
129 129 # protect from nullrev root
130 130 firstmutable = max(0, firstmutable)
131 131 return frozenset(pycompat.xrange(firstmutable, len(cl)))
132 132
133 133 # function to compute filtered set
134 134 #
135 135 # When adding a new filter you MUST update the table at:
136 136 # mercurial.branchmap.subsettable
137 137 # Otherwise your filter will have to recompute all its branches cache
138 138 # from scratch (very slow).
139 139 filtertable = {'visible': computehidden,
140 140 'visible-hidden': computehidden,
141 141 'served': computeunserved,
142 142 'immutable': computemutable,
143 143 'base': computeimpactable}
144 144
145 145 def filterrevs(repo, filtername, visibilityexceptions=None):
146 146 """returns set of filtered revision for this filter name
147 147
148 148 visibilityexceptions is a set of revs which must are exceptions for
149 149 hidden-state and must be visible. They are dynamic and hence we should not
150 150 cache it's result"""
151 151 if filtername not in repo.filteredrevcache:
152 152 func = filtertable[filtername]
153 153 if visibilityexceptions:
154 154 return func(repo.unfiltered, visibilityexceptions)
155 155 repo.filteredrevcache[filtername] = func(repo.unfiltered())
156 156 return repo.filteredrevcache[filtername]
157 157
158 158 class repoview(object):
159 159 """Provide a read/write view of a repo through a filtered changelog
160 160
161 161 This object is used to access a filtered version of a repository without
162 162 altering the original repository object itself. We can not alter the
163 163 original object for two main reasons:
164 164 - It prevents the use of a repo with multiple filters at the same time. In
165 165 particular when multiple threads are involved.
166 166 - It makes scope of the filtering harder to control.
167 167
168 168 This object behaves very closely to the original repository. All attribute
169 169 operations are done on the original repository:
170 170 - An access to `repoview.someattr` actually returns `repo.someattr`,
171 171 - A write to `repoview.someattr` actually sets value of `repo.someattr`,
172 172 - A deletion of `repoview.someattr` actually drops `someattr`
173 173 from `repo.__dict__`.
174 174
175 175 The only exception is the `changelog` property. It is overridden to return
176 176 a (surface) copy of `repo.changelog` with some revisions filtered. The
177 177 `filtername` attribute of the view control the revisions that need to be
178 178 filtered. (the fact the changelog is copied is an implementation detail).
179 179
180 180 Unlike attributes, this object intercepts all method calls. This means that
181 181 all methods are run on the `repoview` object with the filtered `changelog`
182 182 property. For this purpose the simple `repoview` class must be mixed with
183 183 the actual class of the repository. This ensures that the resulting
184 184 `repoview` object have the very same methods than the repo object. This
185 185 leads to the property below.
186 186
187 187 repoview.method() --> repo.__class__.method(repoview)
188 188
189 189 The inheritance has to be done dynamically because `repo` can be of any
190 190 subclasses of `localrepo`. Eg: `bundlerepo` or `statichttprepo`.
191 191 """
192 192
193 193 def __init__(self, repo, filtername, visibilityexceptions=None):
194 194 object.__setattr__(self, r'_unfilteredrepo', repo)
195 195 object.__setattr__(self, r'filtername', filtername)
196 196 object.__setattr__(self, r'_clcachekey', None)
197 197 object.__setattr__(self, r'_clcache', None)
198 198 # revs which are exceptions and must not be hidden
199 199 object.__setattr__(self, r'_visibilityexceptions',
200 200 visibilityexceptions)
201 201
202 202 # not a propertycache on purpose we shall implement a proper cache later
203 203 @property
204 204 def changelog(self):
205 205 """return a filtered version of the changeset
206 206
207 207 this changelog must not be used for writing"""
208 208 # some cache may be implemented later
209 209 unfi = self._unfilteredrepo
210 210 unfichangelog = unfi.changelog
211 211 # bypass call to changelog.method
212 212 unfiindex = unfichangelog.index
213 unfilen = len(unfiindex) - 1
213 unfilen = len(unfiindex)
214 214 unfinode = unfiindex[unfilen - 1][7]
215 215
216 216 revs = filterrevs(unfi, self.filtername, self._visibilityexceptions)
217 217 cl = self._clcache
218 218 newkey = (unfilen, unfinode, hash(revs), unfichangelog._delayed)
219 219 # if cl.index is not unfiindex, unfi.changelog would be
220 220 # recreated, and our clcache refers to garbage object
221 221 if (cl is not None and
222 222 (cl.index is not unfiindex or newkey != self._clcachekey)):
223 223 cl = None
224 224 # could have been made None by the previous if
225 225 if cl is None:
226 226 cl = copy.copy(unfichangelog)
227 227 cl.filteredrevs = revs
228 228 object.__setattr__(self, r'_clcache', cl)
229 229 object.__setattr__(self, r'_clcachekey', newkey)
230 230 return cl
231 231
232 232 def unfiltered(self):
233 233 """Return an unfiltered version of a repo"""
234 234 return self._unfilteredrepo
235 235
236 236 def filtered(self, name, visibilityexceptions=None):
237 237 """Return a filtered version of a repository"""
238 238 if name == self.filtername and not visibilityexceptions:
239 239 return self
240 240 return self.unfiltered().filtered(name, visibilityexceptions)
241 241
242 242 def __repr__(self):
243 243 return r'<%s:%s %r>' % (self.__class__.__name__,
244 244 pycompat.sysstr(self.filtername),
245 245 self.unfiltered())
246 246
247 247 # everything access are forwarded to the proxied repo
248 248 def __getattr__(self, attr):
249 249 return getattr(self._unfilteredrepo, attr)
250 250
251 251 def __setattr__(self, attr, value):
252 252 return setattr(self._unfilteredrepo, attr, value)
253 253
254 254 def __delattr__(self, attr):
255 255 return delattr(self._unfilteredrepo, attr)
256 256
257 257 # Python <3.4 easily leaks types via __mro__. See
258 258 # https://bugs.python.org/issue17950. We cache dynamically created types
259 259 # so they won't be leaked on every invocation of repo.filtered().
260 260 _filteredrepotypes = weakref.WeakKeyDictionary()
261 261
262 262 def newtype(base):
263 263 """Create a new type with the repoview mixin and the given base class"""
264 264 if base not in _filteredrepotypes:
265 265 class filteredrepo(repoview, base):
266 266 pass
267 267 _filteredrepotypes[base] = filteredrepo
268 268 return _filteredrepotypes[base]
@@ -1,2971 +1,2969 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import hashlib
20 20 import heapq
21 21 import os
22 22 import re
23 23 import struct
24 24 import zlib
25 25
26 26 # import stuff from node for others to import from revlog
27 27 from .node import (
28 28 bin,
29 29 hex,
30 30 nullid,
31 31 nullrev,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .thirdparty import (
39 39 attr,
40 40 )
41 41 from . import (
42 42 ancestor,
43 43 error,
44 44 mdiff,
45 45 policy,
46 46 pycompat,
47 47 templatefilters,
48 48 util,
49 49 )
50 50 from .utils import (
51 51 stringutil,
52 52 )
53 53
54 54 parsers = policy.importmod(r'parsers')
55 55
56 56 # Aliased for performance.
57 57 _zlibdecompress = zlib.decompress
58 58
59 59 # revlog header flags
60 60 REVLOGV0 = 0
61 61 REVLOGV1 = 1
62 62 # Dummy value until file format is finalized.
63 63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 64 REVLOGV2 = 0xDEAD
65 65 FLAG_INLINE_DATA = (1 << 16)
66 66 FLAG_GENERALDELTA = (1 << 17)
67 67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 68 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72 72
73 73 # revlog index flags
74 74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 77 REVIDX_DEFAULT_FLAGS = 0
78 78 # stable order in which flags need to be processed and their processors applied
79 79 REVIDX_FLAGS_ORDER = [
80 80 REVIDX_ISCENSORED,
81 81 REVIDX_ELLIPSIS,
82 82 REVIDX_EXTSTORED,
83 83 ]
84 84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 85 # bitmark for flags that could cause rawdata content change
86 86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87 87
88 88 # max size of revlog with inline data
89 89 _maxinline = 131072
90 90 _chunksize = 1048576
91 91
92 92 RevlogError = error.RevlogError
93 93 LookupError = error.LookupError
94 94 AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
95 95 CensoredNodeError = error.CensoredNodeError
96 96 ProgrammingError = error.ProgrammingError
97 97
98 98 # Store flag processors (cf. 'addflagprocessor()' to register)
99 99 _flagprocessors = {
100 100 REVIDX_ISCENSORED: None,
101 101 }
102 102
103 103 _mdre = re.compile('\1\n')
104 104 def parsemeta(text):
105 105 """return (metadatadict, metadatasize)"""
106 106 # text can be buffer, so we can't use .startswith or .index
107 107 if text[:2] != '\1\n':
108 108 return None, None
109 109 s = _mdre.search(text, 2).start()
110 110 mtext = text[2:s]
111 111 meta = {}
112 112 for l in mtext.splitlines():
113 113 k, v = l.split(": ", 1)
114 114 meta[k] = v
115 115 return meta, (s + 2)
116 116
117 117 def packmeta(meta, text):
118 118 keys = sorted(meta)
119 119 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
120 120 return "\1\n%s\1\n%s" % (metatext, text)
121 121
122 122 def _censoredtext(text):
123 123 m, offs = parsemeta(text)
124 124 return m and "censored" in m
125 125
126 126 def addflagprocessor(flag, processor):
127 127 """Register a flag processor on a revision data flag.
128 128
129 129 Invariant:
130 130 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
131 131 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
132 132 - Only one flag processor can be registered on a specific flag.
133 133 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
134 134 following signatures:
135 135 - (read) f(self, rawtext) -> text, bool
136 136 - (write) f(self, text) -> rawtext, bool
137 137 - (raw) f(self, rawtext) -> bool
138 138 "text" is presented to the user. "rawtext" is stored in revlog data, not
139 139 directly visible to the user.
140 140 The boolean returned by these transforms is used to determine whether
141 141 the returned text can be used for hash integrity checking. For example,
142 142 if "write" returns False, then "text" is used to generate hash. If
143 143 "write" returns True, that basically means "rawtext" returned by "write"
144 144 should be used to generate hash. Usually, "write" and "read" return
145 145 different booleans. And "raw" returns a same boolean as "write".
146 146
147 147 Note: The 'raw' transform is used for changegroup generation and in some
148 148 debug commands. In this case the transform only indicates whether the
149 149 contents can be used for hash integrity checks.
150 150 """
151 151 if not flag & REVIDX_KNOWN_FLAGS:
152 152 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
153 153 raise ProgrammingError(msg)
154 154 if flag not in REVIDX_FLAGS_ORDER:
155 155 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
156 156 raise ProgrammingError(msg)
157 157 if flag in _flagprocessors:
158 158 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
159 159 raise error.Abort(msg)
160 160 _flagprocessors[flag] = processor
161 161
162 162 def getoffset(q):
163 163 return int(q >> 16)
164 164
165 165 def gettype(q):
166 166 return int(q & 0xFFFF)
167 167
168 168 def offset_type(offset, type):
169 169 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
170 170 raise ValueError('unknown revlog index flags')
171 171 return int(int(offset) << 16 | type)
172 172
173 173 _nullhash = hashlib.sha1(nullid)
174 174
175 175 def hash(text, p1, p2):
176 176 """generate a hash from the given text and its parent hashes
177 177
178 178 This hash combines both the current file contents and its history
179 179 in a manner that makes it easy to distinguish nodes with the same
180 180 content in the revision graph.
181 181 """
182 182 # As of now, if one of the parent node is null, p2 is null
183 183 if p2 == nullid:
184 184 # deep copy of a hash is faster than creating one
185 185 s = _nullhash.copy()
186 186 s.update(p1)
187 187 else:
188 188 # none of the parent nodes are nullid
189 189 if p1 < p2:
190 190 a = p1
191 191 b = p2
192 192 else:
193 193 a = p2
194 194 b = p1
195 195 s = hashlib.sha1(a)
196 196 s.update(b)
197 197 s.update(text)
198 198 return s.digest()
199 199
200 200 class _testrevlog(object):
201 201 """minimalist fake revlog to use in doctests"""
202 202
203 203 def __init__(self, data, density=0.5, mingap=0):
204 204 """data is an list of revision payload boundaries"""
205 205 self._data = data
206 206 self._srdensitythreshold = density
207 207 self._srmingapsize = mingap
208 208
209 209 def start(self, rev):
210 210 if rev == 0:
211 211 return 0
212 212 return self._data[rev - 1]
213 213
214 214 def end(self, rev):
215 215 return self._data[rev]
216 216
217 217 def length(self, rev):
218 218 return self.end(rev) - self.start(rev)
219 219
220 220 def __len__(self):
221 221 return len(self._data)
222 222
223 223 def _trimchunk(revlog, revs, startidx, endidx=None):
224 224 """returns revs[startidx:endidx] without empty trailing revs
225 225
226 226 Doctest Setup
227 227 >>> revlog = _testrevlog([
228 228 ... 5, #0
229 229 ... 10, #1
230 230 ... 12, #2
231 231 ... 12, #3 (empty)
232 232 ... 17, #4
233 233 ... 21, #5
234 234 ... 21, #6 (empty)
235 235 ... ])
236 236
237 237 Contiguous cases:
238 238 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
239 239 [0, 1, 2, 3, 4, 5]
240 240 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
241 241 [0, 1, 2, 3, 4]
242 242 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
243 243 [0, 1, 2]
244 244 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
245 245 [2]
246 246 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
247 247 [3, 4, 5]
248 248 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
249 249 [3, 4]
250 250
251 251 Discontiguous cases:
252 252 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
253 253 [1, 3, 5]
254 254 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
255 255 [1]
256 256 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
257 257 [3, 5]
258 258 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
259 259 [3, 5]
260 260 """
261 261 length = revlog.length
262 262
263 263 if endidx is None:
264 264 endidx = len(revs)
265 265
266 266 # Trim empty revs at the end, but never the very first revision of a chain
267 267 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
268 268 endidx -= 1
269 269
270 270 return revs[startidx:endidx]
271 271
272 272 def _segmentspan(revlog, revs):
273 273 """Get the byte span of a segment of revisions
274 274
275 275 revs is a sorted array of revision numbers
276 276
277 277 >>> revlog = _testrevlog([
278 278 ... 5, #0
279 279 ... 10, #1
280 280 ... 12, #2
281 281 ... 12, #3 (empty)
282 282 ... 17, #4
283 283 ... ])
284 284
285 285 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
286 286 17
287 287 >>> _segmentspan(revlog, [0, 4])
288 288 17
289 289 >>> _segmentspan(revlog, [3, 4])
290 290 5
291 291 >>> _segmentspan(revlog, [1, 2, 3,])
292 292 7
293 293 >>> _segmentspan(revlog, [1, 3])
294 294 7
295 295 """
296 296 if not revs:
297 297 return 0
298 298 return revlog.end(revs[-1]) - revlog.start(revs[0])
299 299
300 300 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
301 301 """slice revs to reduce the amount of unrelated data to be read from disk.
302 302
303 303 ``revs`` is sliced into groups that should be read in one time.
304 304 Assume that revs are sorted.
305 305
306 306 The initial chunk is sliced until the overall density (payload/chunks-span
307 307 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
308 308 `revlog._srmingapsize` is skipped.
309 309
310 310 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
311 311 For consistency with other slicing choice, this limit won't go lower than
312 312 `revlog._srmingapsize`.
313 313
314 314 If individual revisions chunk are larger than this limit, they will still
315 315 be raised individually.
316 316
317 317 >>> revlog = _testrevlog([
318 318 ... 5, #00 (5)
319 319 ... 10, #01 (5)
320 320 ... 12, #02 (2)
321 321 ... 12, #03 (empty)
322 322 ... 27, #04 (15)
323 323 ... 31, #05 (4)
324 324 ... 31, #06 (empty)
325 325 ... 42, #07 (11)
326 326 ... 47, #08 (5)
327 327 ... 47, #09 (empty)
328 328 ... 48, #10 (1)
329 329 ... 51, #11 (3)
330 330 ... 74, #12 (23)
331 331 ... 85, #13 (11)
332 332 ... 86, #14 (1)
333 333 ... 91, #15 (5)
334 334 ... ])
335 335
336 336 >>> list(_slicechunk(revlog, list(range(16))))
337 337 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
338 338 >>> list(_slicechunk(revlog, [0, 15]))
339 339 [[0], [15]]
340 340 >>> list(_slicechunk(revlog, [0, 11, 15]))
341 341 [[0], [11], [15]]
342 342 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
343 343 [[0], [11, 13, 15]]
344 344 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
345 345 [[1, 2], [5, 8, 10, 11], [14]]
346 346
347 347 Slicing with a maximum chunk size
348 348 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
349 349 [[0], [11], [13], [15]]
350 350 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
351 351 [[0], [11], [13, 15]]
352 352 """
353 353 if targetsize is not None:
354 354 targetsize = max(targetsize, revlog._srmingapsize)
355 355 # targetsize should not be specified when evaluating delta candidates:
356 356 # * targetsize is used to ensure we stay within specification when reading,
357 357 # * deltainfo is used to pick are good delta chain when writing.
358 358 if not (deltainfo is None or targetsize is None):
359 359 msg = 'cannot use `targetsize` with a `deltainfo`'
360 360 raise error.ProgrammingError(msg)
361 361 for chunk in _slicechunktodensity(revlog, revs,
362 362 deltainfo,
363 363 revlog._srdensitythreshold,
364 364 revlog._srmingapsize):
365 365 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
366 366 yield subchunk
367 367
368 368 def _slicechunktosize(revlog, revs, targetsize=None):
369 369 """slice revs to match the target size
370 370
371 371 This is intended to be used on chunk that density slicing selected by that
372 372 are still too large compared to the read garantee of revlog. This might
373 373 happens when "minimal gap size" interrupted the slicing or when chain are
374 374 built in a way that create large blocks next to each other.
375 375
376 376 >>> revlog = _testrevlog([
377 377 ... 3, #0 (3)
378 378 ... 5, #1 (2)
379 379 ... 6, #2 (1)
380 380 ... 8, #3 (2)
381 381 ... 8, #4 (empty)
382 382 ... 11, #5 (3)
383 383 ... 12, #6 (1)
384 384 ... 13, #7 (1)
385 385 ... 14, #8 (1)
386 386 ... ])
387 387
388 388 Cases where chunk is already small enough
389 389 >>> list(_slicechunktosize(revlog, [0], 3))
390 390 [[0]]
391 391 >>> list(_slicechunktosize(revlog, [6, 7], 3))
392 392 [[6, 7]]
393 393 >>> list(_slicechunktosize(revlog, [0], None))
394 394 [[0]]
395 395 >>> list(_slicechunktosize(revlog, [6, 7], None))
396 396 [[6, 7]]
397 397
398 398 cases where we need actual slicing
399 399 >>> list(_slicechunktosize(revlog, [0, 1], 3))
400 400 [[0], [1]]
401 401 >>> list(_slicechunktosize(revlog, [1, 3], 3))
402 402 [[1], [3]]
403 403 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
404 404 [[1, 2], [3]]
405 405 >>> list(_slicechunktosize(revlog, [3, 5], 3))
406 406 [[3], [5]]
407 407 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
408 408 [[3], [5]]
409 409 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
410 410 [[5], [6, 7, 8]]
411 411 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
412 412 [[0], [1, 2], [3], [5], [6, 7, 8]]
413 413
414 414 Case with too large individual chunk (must return valid chunk)
415 415 >>> list(_slicechunktosize(revlog, [0, 1], 2))
416 416 [[0], [1]]
417 417 >>> list(_slicechunktosize(revlog, [1, 3], 1))
418 418 [[1], [3]]
419 419 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
420 420 [[3], [5]]
421 421 """
422 422 assert targetsize is None or 0 <= targetsize
423 423 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
424 424 yield revs
425 425 return
426 426
427 427 startrevidx = 0
428 428 startdata = revlog.start(revs[0])
429 429 endrevidx = 0
430 430 iterrevs = enumerate(revs)
431 431 next(iterrevs) # skip first rev.
432 432 for idx, r in iterrevs:
433 433 span = revlog.end(r) - startdata
434 434 if span <= targetsize:
435 435 endrevidx = idx
436 436 else:
437 437 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
438 438 if chunk:
439 439 yield chunk
440 440 startrevidx = idx
441 441 startdata = revlog.start(r)
442 442 endrevidx = idx
443 443 yield _trimchunk(revlog, revs, startrevidx)
444 444
445 445 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
446 446 mingapsize=0):
447 447 """slice revs to reduce the amount of unrelated data to be read from disk.
448 448
449 449 ``revs`` is sliced into groups that should be read in one time.
450 450 Assume that revs are sorted.
451 451
452 452 ``deltainfo`` is a _deltainfo instance of a revision that we would append
453 453 to the top of the revlog.
454 454
455 455 The initial chunk is sliced until the overall density (payload/chunks-span
456 456 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
457 457 skipped.
458 458
459 459 >>> revlog = _testrevlog([
460 460 ... 5, #00 (5)
461 461 ... 10, #01 (5)
462 462 ... 12, #02 (2)
463 463 ... 12, #03 (empty)
464 464 ... 27, #04 (15)
465 465 ... 31, #05 (4)
466 466 ... 31, #06 (empty)
467 467 ... 42, #07 (11)
468 468 ... 47, #08 (5)
469 469 ... 47, #09 (empty)
470 470 ... 48, #10 (1)
471 471 ... 51, #11 (3)
472 472 ... 74, #12 (23)
473 473 ... 85, #13 (11)
474 474 ... 86, #14 (1)
475 475 ... 91, #15 (5)
476 476 ... ])
477 477
478 478 >>> list(_slicechunktodensity(revlog, list(range(16))))
479 479 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
480 480 >>> list(_slicechunktodensity(revlog, [0, 15]))
481 481 [[0], [15]]
482 482 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
483 483 [[0], [11], [15]]
484 484 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
485 485 [[0], [11, 13, 15]]
486 486 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
487 487 [[1, 2], [5, 8, 10, 11], [14]]
488 488 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
489 489 ... mingapsize=20))
490 490 [[1, 2, 3, 5, 8, 10, 11], [14]]
491 491 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
492 492 ... targetdensity=0.95))
493 493 [[1, 2], [5], [8, 10, 11], [14]]
494 494 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
495 495 ... targetdensity=0.95, mingapsize=12))
496 496 [[1, 2], [5, 8, 10, 11], [14]]
497 497 """
498 498 start = revlog.start
499 499 length = revlog.length
500 500
501 501 if len(revs) <= 1:
502 502 yield revs
503 503 return
504 504
505 505 nextrev = len(revlog)
506 506 nextoffset = revlog.end(nextrev - 1)
507 507
508 508 if deltainfo is None:
509 509 deltachainspan = _segmentspan(revlog, revs)
510 510 chainpayload = sum(length(r) for r in revs)
511 511 else:
512 512 deltachainspan = deltainfo.distance
513 513 chainpayload = deltainfo.compresseddeltalen
514 514
515 515 if deltachainspan < mingapsize:
516 516 yield revs
517 517 return
518 518
519 519 readdata = deltachainspan
520 520
521 521 if deltachainspan:
522 522 density = chainpayload / float(deltachainspan)
523 523 else:
524 524 density = 1.0
525 525
526 526 if density >= targetdensity:
527 527 yield revs
528 528 return
529 529
530 530 if deltainfo is not None:
531 531 revs = list(revs)
532 532 revs.append(nextrev)
533 533
534 534 # Store the gaps in a heap to have them sorted by decreasing size
535 535 gapsheap = []
536 536 heapq.heapify(gapsheap)
537 537 prevend = None
538 538 for i, rev in enumerate(revs):
539 539 if rev < nextrev:
540 540 revstart = start(rev)
541 541 revlen = length(rev)
542 542 else:
543 543 revstart = nextoffset
544 544 revlen = deltainfo.deltalen
545 545
546 546 # Skip empty revisions to form larger holes
547 547 if revlen == 0:
548 548 continue
549 549
550 550 if prevend is not None:
551 551 gapsize = revstart - prevend
552 552 # only consider holes that are large enough
553 553 if gapsize > mingapsize:
554 554 heapq.heappush(gapsheap, (-gapsize, i))
555 555
556 556 prevend = revstart + revlen
557 557
558 558 # Collect the indices of the largest holes until the density is acceptable
559 559 indicesheap = []
560 560 heapq.heapify(indicesheap)
561 561 while gapsheap and density < targetdensity:
562 562 oppgapsize, gapidx = heapq.heappop(gapsheap)
563 563
564 564 heapq.heappush(indicesheap, gapidx)
565 565
566 566 # the gap sizes are stored as negatives to be sorted decreasingly
567 567 # by the heap
568 568 readdata -= (-oppgapsize)
569 569 if readdata > 0:
570 570 density = chainpayload / float(readdata)
571 571 else:
572 572 density = 1.0
573 573
574 574 # Cut the revs at collected indices
575 575 previdx = 0
576 576 while indicesheap:
577 577 idx = heapq.heappop(indicesheap)
578 578
579 579 chunk = _trimchunk(revlog, revs, previdx, idx)
580 580 if chunk:
581 581 yield chunk
582 582
583 583 previdx = idx
584 584
585 585 chunk = _trimchunk(revlog, revs, previdx)
586 586 if chunk:
587 587 yield chunk
588 588
589 589 @attr.s(slots=True, frozen=True)
590 590 class _deltainfo(object):
591 591 distance = attr.ib()
592 592 deltalen = attr.ib()
593 593 data = attr.ib()
594 594 base = attr.ib()
595 595 chainbase = attr.ib()
596 596 chainlen = attr.ib()
597 597 compresseddeltalen = attr.ib()
598 598
599 599 class _deltacomputer(object):
600 600 def __init__(self, revlog):
601 601 self.revlog = revlog
602 602
603 603 def _getcandidaterevs(self, p1, p2, cachedelta):
604 604 """
605 605 Provides revisions that present an interest to be diffed against,
606 606 grouped by level of easiness.
607 607 """
608 608 revlog = self.revlog
609 609 gdelta = revlog._generaldelta
610 610 curr = len(revlog)
611 611 prev = curr - 1
612 612 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
613 613
614 614 # should we try to build a delta?
615 615 if prev != nullrev and revlog.storedeltachains:
616 616 tested = set()
617 617 # This condition is true most of the time when processing
618 618 # changegroup data into a generaldelta repo. The only time it
619 619 # isn't true is if this is the first revision in a delta chain
620 620 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
621 621 if cachedelta and gdelta and revlog._lazydeltabase:
622 622 # Assume what we received from the server is a good choice
623 623 # build delta will reuse the cache
624 624 yield (cachedelta[0],)
625 625 tested.add(cachedelta[0])
626 626
627 627 if gdelta:
628 628 # exclude already lazy tested base if any
629 629 parents = [p for p in (p1r, p2r)
630 630 if p != nullrev and p not in tested]
631 631
632 632 if not revlog._deltabothparents and len(parents) == 2:
633 633 parents.sort()
634 634 # To minimize the chance of having to build a fulltext,
635 635 # pick first whichever parent is closest to us (max rev)
636 636 yield (parents[1],)
637 637 # then the other one (min rev) if the first did not fit
638 638 yield (parents[0],)
639 639 tested.update(parents)
640 640 elif len(parents) > 0:
641 641 # Test all parents (1 or 2), and keep the best candidate
642 642 yield parents
643 643 tested.update(parents)
644 644
645 645 if prev not in tested:
646 646 # other approach failed try against prev to hopefully save us a
647 647 # fulltext.
648 648 yield (prev,)
649 649 tested.add(prev)
650 650
651 651 def buildtext(self, revinfo, fh):
652 652 """Builds a fulltext version of a revision
653 653
654 654 revinfo: _revisioninfo instance that contains all needed info
655 655 fh: file handle to either the .i or the .d revlog file,
656 656 depending on whether it is inlined or not
657 657 """
658 658 btext = revinfo.btext
659 659 if btext[0] is not None:
660 660 return btext[0]
661 661
662 662 revlog = self.revlog
663 663 cachedelta = revinfo.cachedelta
664 664 flags = revinfo.flags
665 665 node = revinfo.node
666 666
667 667 baserev = cachedelta[0]
668 668 delta = cachedelta[1]
669 669 # special case deltas which replace entire base; no need to decode
670 670 # base revision. this neatly avoids censored bases, which throw when
671 671 # they're decoded.
672 672 hlen = struct.calcsize(">lll")
673 673 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
674 674 len(delta) - hlen):
675 675 btext[0] = delta[hlen:]
676 676 else:
677 677 # deltabase is rawtext before changed by flag processors, which is
678 678 # equivalent to non-raw text
679 679 basetext = revlog.revision(baserev, _df=fh, raw=False)
680 680 btext[0] = mdiff.patch(basetext, delta)
681 681
682 682 try:
683 683 res = revlog._processflags(btext[0], flags, 'read', raw=True)
684 684 btext[0], validatehash = res
685 685 if validatehash:
686 686 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
687 687 if flags & REVIDX_ISCENSORED:
688 688 raise RevlogError(_('node %s is not censored') % node)
689 689 except CensoredNodeError:
690 690 # must pass the censored index flag to add censored revisions
691 691 if not flags & REVIDX_ISCENSORED:
692 692 raise
693 693 return btext[0]
694 694
695 695 def _builddeltadiff(self, base, revinfo, fh):
696 696 revlog = self.revlog
697 697 t = self.buildtext(revinfo, fh)
698 698 if revlog.iscensored(base):
699 699 # deltas based on a censored revision must replace the
700 700 # full content in one patch, so delta works everywhere
701 701 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
702 702 delta = header + t
703 703 else:
704 704 ptext = revlog.revision(base, _df=fh, raw=True)
705 705 delta = mdiff.textdiff(ptext, t)
706 706
707 707 return delta
708 708
709 709 def _builddeltainfo(self, revinfo, base, fh):
710 710 # can we use the cached delta?
711 711 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
712 712 delta = revinfo.cachedelta[1]
713 713 else:
714 714 delta = self._builddeltadiff(base, revinfo, fh)
715 715 revlog = self.revlog
716 716 header, data = revlog.compress(delta)
717 717 deltalen = len(header) + len(data)
718 718 chainbase = revlog.chainbase(base)
719 719 offset = revlog.end(len(revlog) - 1)
720 720 dist = deltalen + offset - revlog.start(chainbase)
721 721 if revlog._generaldelta:
722 722 deltabase = base
723 723 else:
724 724 deltabase = chainbase
725 725 chainlen, compresseddeltalen = revlog._chaininfo(base)
726 726 chainlen += 1
727 727 compresseddeltalen += deltalen
728 728 return _deltainfo(dist, deltalen, (header, data), deltabase,
729 729 chainbase, chainlen, compresseddeltalen)
730 730
731 731 def finddeltainfo(self, revinfo, fh):
732 732 """Find an acceptable delta against a candidate revision
733 733
734 734 revinfo: information about the revision (instance of _revisioninfo)
735 735 fh: file handle to either the .i or the .d revlog file,
736 736 depending on whether it is inlined or not
737 737
738 738 Returns the first acceptable candidate revision, as ordered by
739 739 _getcandidaterevs
740 740 """
741 741 cachedelta = revinfo.cachedelta
742 742 p1 = revinfo.p1
743 743 p2 = revinfo.p2
744 744 revlog = self.revlog
745 745
746 746 deltainfo = None
747 747 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
748 748 nominateddeltas = []
749 749 for candidaterev in candidaterevs:
750 750 # no delta for rawtext-changing revs (see "candelta" for why)
751 751 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
752 752 continue
753 753 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
754 754 if revlog._isgooddeltainfo(candidatedelta, revinfo):
755 755 nominateddeltas.append(candidatedelta)
756 756 if nominateddeltas:
757 757 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
758 758 break
759 759
760 760 return deltainfo
761 761
762 762 @attr.s(slots=True, frozen=True)
763 763 class _revisioninfo(object):
764 764 """Information about a revision that allows building its fulltext
765 765 node: expected hash of the revision
766 766 p1, p2: parent revs of the revision
767 767 btext: built text cache consisting of a one-element list
768 768 cachedelta: (baserev, uncompressed_delta) or None
769 769 flags: flags associated to the revision storage
770 770
771 771 One of btext[0] or cachedelta must be set.
772 772 """
773 773 node = attr.ib()
774 774 p1 = attr.ib()
775 775 p2 = attr.ib()
776 776 btext = attr.ib()
777 777 textlen = attr.ib()
778 778 cachedelta = attr.ib()
779 779 flags = attr.ib()
780 780
781 781 # index v0:
782 782 # 4 bytes: offset
783 783 # 4 bytes: compressed length
784 784 # 4 bytes: base rev
785 785 # 4 bytes: link rev
786 786 # 20 bytes: parent 1 nodeid
787 787 # 20 bytes: parent 2 nodeid
788 788 # 20 bytes: nodeid
789 789 indexformatv0 = struct.Struct(">4l20s20s20s")
790 790 indexformatv0_pack = indexformatv0.pack
791 791 indexformatv0_unpack = indexformatv0.unpack
792 792
793 793 class revlogoldindex(list):
794 def __len__(self):
795 return list.__len__(self) + 1
796 794 def __getitem__(self, i):
797 if i == -1 or i == len(self) - 1:
795 if i == -1 or i == len(self):
798 796 return (0, 0, 0, -1, -1, -1, -1, nullid)
799 797 return list.__getitem__(self, i)
800 798
801 799 class revlogoldio(object):
802 800 def __init__(self):
803 801 self.size = indexformatv0.size
804 802
805 803 def parseindex(self, data, inline):
806 804 s = self.size
807 805 index = []
808 806 nodemap = {nullid: nullrev}
809 807 n = off = 0
810 808 l = len(data)
811 809 while off + s <= l:
812 810 cur = data[off:off + s]
813 811 off += s
814 812 e = indexformatv0_unpack(cur)
815 813 # transform to revlogv1 format
816 814 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
817 815 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
818 816 index.append(e2)
819 817 nodemap[e[6]] = n
820 818 n += 1
821 819
822 820 return revlogoldindex(index), nodemap, None
823 821
824 822 def packentry(self, entry, node, version, rev):
825 823 if gettype(entry[0]):
826 824 raise RevlogError(_('index entry flags need revlog version 1'))
827 825 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
828 826 node(entry[5]), node(entry[6]), entry[7])
829 827 return indexformatv0_pack(*e2)
830 828
831 829 # index ng:
832 830 # 6 bytes: offset
833 831 # 2 bytes: flags
834 832 # 4 bytes: compressed length
835 833 # 4 bytes: uncompressed length
836 834 # 4 bytes: base rev
837 835 # 4 bytes: link rev
838 836 # 4 bytes: parent 1 rev
839 837 # 4 bytes: parent 2 rev
840 838 # 32 bytes: nodeid
841 839 indexformatng = struct.Struct(">Qiiiiii20s12x")
842 840 indexformatng_pack = indexformatng.pack
843 841 versionformat = struct.Struct(">I")
844 842 versionformat_pack = versionformat.pack
845 843 versionformat_unpack = versionformat.unpack
846 844
847 845 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
848 846 # signed integer)
849 847 _maxentrysize = 0x7fffffff
850 848
851 849 class revlogio(object):
852 850 def __init__(self):
853 851 self.size = indexformatng.size
854 852
855 853 def parseindex(self, data, inline):
856 854 # call the C implementation to parse the index data
857 855 index, cache = parsers.parse_index2(data, inline)
858 856 return index, getattr(index, 'nodemap', None), cache
859 857
860 858 def packentry(self, entry, node, version, rev):
861 859 p = indexformatng_pack(*entry)
862 860 if rev == 0:
863 861 p = versionformat_pack(version) + p[4:]
864 862 return p
865 863
866 864 class revlog(object):
867 865 """
868 866 the underlying revision storage object
869 867
870 868 A revlog consists of two parts, an index and the revision data.
871 869
872 870 The index is a file with a fixed record size containing
873 871 information on each revision, including its nodeid (hash), the
874 872 nodeids of its parents, the position and offset of its data within
875 873 the data file, and the revision it's based on. Finally, each entry
876 874 contains a linkrev entry that can serve as a pointer to external
877 875 data.
878 876
879 877 The revision data itself is a linear collection of data chunks.
880 878 Each chunk represents a revision and is usually represented as a
881 879 delta against the previous chunk. To bound lookup time, runs of
882 880 deltas are limited to about 2 times the length of the original
883 881 version data. This makes retrieval of a version proportional to
884 882 its size, or O(1) relative to the number of revisions.
885 883
886 884 Both pieces of the revlog are written to in an append-only
887 885 fashion, which means we never need to rewrite a file to insert or
888 886 remove data, and can use some simple techniques to avoid the need
889 887 for locking while reading.
890 888
891 889 If checkambig, indexfile is opened with checkambig=True at
892 890 writing, to avoid file stat ambiguity.
893 891
894 892 If mmaplargeindex is True, and an mmapindexthreshold is set, the
895 893 index will be mmapped rather than read if it is larger than the
896 894 configured threshold.
897 895
898 896 If censorable is True, the revlog can have censored revisions.
899 897 """
900 898 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
901 899 mmaplargeindex=False, censorable=False):
902 900 """
903 901 create a revlog object
904 902
905 903 opener is a function that abstracts the file opening operation
906 904 and can be used to implement COW semantics or the like.
907 905 """
908 906 self.indexfile = indexfile
909 907 self.datafile = datafile or (indexfile[:-2] + ".d")
910 908 self.opener = opener
911 909 # When True, indexfile is opened with checkambig=True at writing, to
912 910 # avoid file stat ambiguity.
913 911 self._checkambig = checkambig
914 912 self._censorable = censorable
915 913 # 3-tuple of (node, rev, text) for a raw revision.
916 914 self._cache = None
917 915 # Maps rev to chain base rev.
918 916 self._chainbasecache = util.lrucachedict(100)
919 917 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
920 918 self._chunkcache = (0, '')
921 919 # How much data to read and cache into the raw revlog data cache.
922 920 self._chunkcachesize = 65536
923 921 self._maxchainlen = None
924 922 self._deltabothparents = True
925 923 self.index = []
926 924 # Mapping of partial identifiers to full nodes.
927 925 self._pcache = {}
928 926 # Mapping of revision integer to full node.
929 927 self._nodecache = {nullid: nullrev}
930 928 self._nodepos = None
931 929 self._compengine = 'zlib'
932 930 self._maxdeltachainspan = -1
933 931 self._withsparseread = False
934 932 self._sparserevlog = False
935 933 self._srdensitythreshold = 0.50
936 934 self._srmingapsize = 262144
937 935
938 936 mmapindexthreshold = None
939 937 v = REVLOG_DEFAULT_VERSION
940 938 opts = getattr(opener, 'options', None)
941 939 if opts is not None:
942 940 if 'revlogv2' in opts:
943 941 # version 2 revlogs always use generaldelta.
944 942 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
945 943 elif 'revlogv1' in opts:
946 944 if 'generaldelta' in opts:
947 945 v |= FLAG_GENERALDELTA
948 946 else:
949 947 v = 0
950 948 if 'chunkcachesize' in opts:
951 949 self._chunkcachesize = opts['chunkcachesize']
952 950 if 'maxchainlen' in opts:
953 951 self._maxchainlen = opts['maxchainlen']
954 952 if 'deltabothparents' in opts:
955 953 self._deltabothparents = opts['deltabothparents']
956 954 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
957 955 if 'compengine' in opts:
958 956 self._compengine = opts['compengine']
959 957 if 'maxdeltachainspan' in opts:
960 958 self._maxdeltachainspan = opts['maxdeltachainspan']
961 959 if mmaplargeindex and 'mmapindexthreshold' in opts:
962 960 mmapindexthreshold = opts['mmapindexthreshold']
963 961 self._sparserevlog = bool(opts.get('sparse-revlog', False))
964 962 withsparseread = bool(opts.get('with-sparse-read', False))
965 963 # sparse-revlog forces sparse-read
966 964 self._withsparseread = self._sparserevlog or withsparseread
967 965 if 'sparse-read-density-threshold' in opts:
968 966 self._srdensitythreshold = opts['sparse-read-density-threshold']
969 967 if 'sparse-read-min-gap-size' in opts:
970 968 self._srmingapsize = opts['sparse-read-min-gap-size']
971 969
972 970 if self._chunkcachesize <= 0:
973 971 raise RevlogError(_('revlog chunk cache size %r is not greater '
974 972 'than 0') % self._chunkcachesize)
975 973 elif self._chunkcachesize & (self._chunkcachesize - 1):
976 974 raise RevlogError(_('revlog chunk cache size %r is not a power '
977 975 'of 2') % self._chunkcachesize)
978 976
979 977 indexdata = ''
980 978 self._initempty = True
981 979 try:
982 980 with self._indexfp() as f:
983 981 if (mmapindexthreshold is not None and
984 982 self.opener.fstat(f).st_size >= mmapindexthreshold):
985 983 indexdata = util.buffer(util.mmapread(f))
986 984 else:
987 985 indexdata = f.read()
988 986 if len(indexdata) > 0:
989 987 v = versionformat_unpack(indexdata[:4])[0]
990 988 self._initempty = False
991 989 except IOError as inst:
992 990 if inst.errno != errno.ENOENT:
993 991 raise
994 992
995 993 self.version = v
996 994 self._inline = v & FLAG_INLINE_DATA
997 995 self._generaldelta = v & FLAG_GENERALDELTA
998 996 flags = v & ~0xFFFF
999 997 fmt = v & 0xFFFF
1000 998 if fmt == REVLOGV0:
1001 999 if flags:
1002 1000 raise RevlogError(_('unknown flags (%#04x) in version %d '
1003 1001 'revlog %s') %
1004 1002 (flags >> 16, fmt, self.indexfile))
1005 1003 elif fmt == REVLOGV1:
1006 1004 if flags & ~REVLOGV1_FLAGS:
1007 1005 raise RevlogError(_('unknown flags (%#04x) in version %d '
1008 1006 'revlog %s') %
1009 1007 (flags >> 16, fmt, self.indexfile))
1010 1008 elif fmt == REVLOGV2:
1011 1009 if flags & ~REVLOGV2_FLAGS:
1012 1010 raise RevlogError(_('unknown flags (%#04x) in version %d '
1013 1011 'revlog %s') %
1014 1012 (flags >> 16, fmt, self.indexfile))
1015 1013 else:
1016 1014 raise RevlogError(_('unknown version (%d) in revlog %s') %
1017 1015 (fmt, self.indexfile))
1018 1016
1019 1017 self.storedeltachains = True
1020 1018
1021 1019 self._io = revlogio()
1022 1020 if self.version == REVLOGV0:
1023 1021 self._io = revlogoldio()
1024 1022 try:
1025 1023 d = self._io.parseindex(indexdata, self._inline)
1026 1024 except (ValueError, IndexError):
1027 1025 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
1028 1026 self.index, nodemap, self._chunkcache = d
1029 1027 if nodemap is not None:
1030 1028 self.nodemap = self._nodecache = nodemap
1031 1029 if not self._chunkcache:
1032 1030 self._chunkclear()
1033 1031 # revnum -> (chain-length, sum-delta-length)
1034 1032 self._chaininfocache = {}
1035 1033 # revlog header -> revlog compressor
1036 1034 self._decompressors = {}
1037 1035
1038 1036 @util.propertycache
1039 1037 def _compressor(self):
1040 1038 return util.compengines[self._compengine].revlogcompressor()
1041 1039
1042 1040 def _indexfp(self, mode='r'):
1043 1041 """file object for the revlog's index file"""
1044 1042 args = {r'mode': mode}
1045 1043 if mode != 'r':
1046 1044 args[r'checkambig'] = self._checkambig
1047 1045 if mode == 'w':
1048 1046 args[r'atomictemp'] = True
1049 1047 return self.opener(self.indexfile, **args)
1050 1048
1051 1049 def _datafp(self, mode='r'):
1052 1050 """file object for the revlog's data file"""
1053 1051 return self.opener(self.datafile, mode=mode)
1054 1052
1055 1053 @contextlib.contextmanager
1056 1054 def _datareadfp(self, existingfp=None):
1057 1055 """file object suitable to read data"""
1058 1056 if existingfp is not None:
1059 1057 yield existingfp
1060 1058 else:
1061 1059 if self._inline:
1062 1060 func = self._indexfp
1063 1061 else:
1064 1062 func = self._datafp
1065 1063 with func() as fp:
1066 1064 yield fp
1067 1065
1068 1066 def tip(self):
1069 return self.node(len(self.index) - 2)
1067 return self.node(len(self.index) - 1)
1070 1068 def __contains__(self, rev):
1071 1069 return 0 <= rev < len(self)
1072 1070 def __len__(self):
1073 return len(self.index) - 1
1071 return len(self.index)
1074 1072 def __iter__(self):
1075 1073 return iter(pycompat.xrange(len(self)))
1076 1074 def revs(self, start=0, stop=None):
1077 1075 """iterate over all rev in this revlog (from start to stop)"""
1078 1076 step = 1
1079 1077 length = len(self)
1080 1078 if stop is not None:
1081 1079 if start > stop:
1082 1080 step = -1
1083 1081 stop += step
1084 1082 if stop > length:
1085 1083 stop = length
1086 1084 else:
1087 1085 stop = length
1088 1086 return pycompat.xrange(start, stop, step)
1089 1087
1090 1088 @util.propertycache
1091 1089 def nodemap(self):
1092 1090 self.rev(self.node(0))
1093 1091 return self._nodecache
1094 1092
1095 1093 def hasnode(self, node):
1096 1094 try:
1097 1095 self.rev(node)
1098 1096 return True
1099 1097 except KeyError:
1100 1098 return False
1101 1099
1102 1100 def candelta(self, baserev, rev):
1103 1101 """whether two revisions (baserev, rev) can be delta-ed or not"""
1104 1102 # Disable delta if either rev requires a content-changing flag
1105 1103 # processor (ex. LFS). This is because such flag processor can alter
1106 1104 # the rawtext content that the delta will be based on, and two clients
1107 1105 # could have a same revlog node with different flags (i.e. different
1108 1106 # rawtext contents) and the delta could be incompatible.
1109 1107 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1110 1108 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1111 1109 return False
1112 1110 return True
1113 1111
1114 1112 def clearcaches(self):
1115 1113 self._cache = None
1116 1114 self._chainbasecache.clear()
1117 1115 self._chunkcache = (0, '')
1118 1116 self._pcache = {}
1119 1117
1120 1118 try:
1121 1119 self._nodecache.clearcaches()
1122 1120 except AttributeError:
1123 1121 self._nodecache = {nullid: nullrev}
1124 1122 self._nodepos = None
1125 1123
1126 1124 def rev(self, node):
1127 1125 try:
1128 1126 return self._nodecache[node]
1129 1127 except TypeError:
1130 1128 raise
1131 1129 except RevlogError:
1132 1130 # parsers.c radix tree lookup failed
1133 1131 if node == wdirid or node in wdirfilenodeids:
1134 1132 raise error.WdirUnsupported
1135 1133 raise LookupError(node, self.indexfile, _('no node'))
1136 1134 except KeyError:
1137 1135 # pure python cache lookup failed
1138 1136 n = self._nodecache
1139 1137 i = self.index
1140 1138 p = self._nodepos
1141 1139 if p is None:
1142 p = len(i) - 2
1140 p = len(i) - 1
1143 1141 else:
1144 1142 assert p < len(i)
1145 1143 for r in pycompat.xrange(p, -1, -1):
1146 1144 v = i[r][7]
1147 1145 n[v] = r
1148 1146 if v == node:
1149 1147 self._nodepos = r - 1
1150 1148 return r
1151 1149 if node == wdirid or node in wdirfilenodeids:
1152 1150 raise error.WdirUnsupported
1153 1151 raise LookupError(node, self.indexfile, _('no node'))
1154 1152
1155 1153 # Accessors for index entries.
1156 1154
1157 1155 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1158 1156 # are flags.
1159 1157 def start(self, rev):
1160 1158 return int(self.index[rev][0] >> 16)
1161 1159
1162 1160 def flags(self, rev):
1163 1161 return self.index[rev][0] & 0xFFFF
1164 1162
1165 1163 def length(self, rev):
1166 1164 return self.index[rev][1]
1167 1165
1168 1166 def rawsize(self, rev):
1169 1167 """return the length of the uncompressed text for a given revision"""
1170 1168 l = self.index[rev][2]
1171 1169 if l >= 0:
1172 1170 return l
1173 1171
1174 1172 t = self.revision(rev, raw=True)
1175 1173 return len(t)
1176 1174
1177 1175 def size(self, rev):
1178 1176 """length of non-raw text (processed by a "read" flag processor)"""
1179 1177 # fast path: if no "read" flag processor could change the content,
1180 1178 # size is rawsize. note: ELLIPSIS is known to not change the content.
1181 1179 flags = self.flags(rev)
1182 1180 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1183 1181 return self.rawsize(rev)
1184 1182
1185 1183 return len(self.revision(rev, raw=False))
1186 1184
1187 1185 def chainbase(self, rev):
1188 1186 base = self._chainbasecache.get(rev)
1189 1187 if base is not None:
1190 1188 return base
1191 1189
1192 1190 index = self.index
1193 1191 iterrev = rev
1194 1192 base = index[iterrev][3]
1195 1193 while base != iterrev:
1196 1194 iterrev = base
1197 1195 base = index[iterrev][3]
1198 1196
1199 1197 self._chainbasecache[rev] = base
1200 1198 return base
1201 1199
1202 1200 def linkrev(self, rev):
1203 1201 return self.index[rev][4]
1204 1202
1205 1203 def parentrevs(self, rev):
1206 1204 try:
1207 1205 entry = self.index[rev]
1208 1206 except IndexError:
1209 1207 if rev == wdirrev:
1210 1208 raise error.WdirUnsupported
1211 1209 raise
1212 1210
1213 1211 return entry[5], entry[6]
1214 1212
1215 1213 def node(self, rev):
1216 1214 try:
1217 1215 return self.index[rev][7]
1218 1216 except IndexError:
1219 1217 if rev == wdirrev:
1220 1218 raise error.WdirUnsupported
1221 1219 raise
1222 1220
1223 1221 # Derived from index values.
1224 1222
1225 1223 def end(self, rev):
1226 1224 return self.start(rev) + self.length(rev)
1227 1225
1228 1226 def parents(self, node):
1229 1227 i = self.index
1230 1228 d = i[self.rev(node)]
1231 1229 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1232 1230
1233 1231 def chainlen(self, rev):
1234 1232 return self._chaininfo(rev)[0]
1235 1233
1236 1234 def _chaininfo(self, rev):
1237 1235 chaininfocache = self._chaininfocache
1238 1236 if rev in chaininfocache:
1239 1237 return chaininfocache[rev]
1240 1238 index = self.index
1241 1239 generaldelta = self._generaldelta
1242 1240 iterrev = rev
1243 1241 e = index[iterrev]
1244 1242 clen = 0
1245 1243 compresseddeltalen = 0
1246 1244 while iterrev != e[3]:
1247 1245 clen += 1
1248 1246 compresseddeltalen += e[1]
1249 1247 if generaldelta:
1250 1248 iterrev = e[3]
1251 1249 else:
1252 1250 iterrev -= 1
1253 1251 if iterrev in chaininfocache:
1254 1252 t = chaininfocache[iterrev]
1255 1253 clen += t[0]
1256 1254 compresseddeltalen += t[1]
1257 1255 break
1258 1256 e = index[iterrev]
1259 1257 else:
1260 1258 # Add text length of base since decompressing that also takes
1261 1259 # work. For cache hits the length is already included.
1262 1260 compresseddeltalen += e[1]
1263 1261 r = (clen, compresseddeltalen)
1264 1262 chaininfocache[rev] = r
1265 1263 return r
1266 1264
1267 1265 def _deltachain(self, rev, stoprev=None):
1268 1266 """Obtain the delta chain for a revision.
1269 1267
1270 1268 ``stoprev`` specifies a revision to stop at. If not specified, we
1271 1269 stop at the base of the chain.
1272 1270
1273 1271 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1274 1272 revs in ascending order and ``stopped`` is a bool indicating whether
1275 1273 ``stoprev`` was hit.
1276 1274 """
1277 1275 # Try C implementation.
1278 1276 try:
1279 1277 return self.index.deltachain(rev, stoprev, self._generaldelta)
1280 1278 except AttributeError:
1281 1279 pass
1282 1280
1283 1281 chain = []
1284 1282
1285 1283 # Alias to prevent attribute lookup in tight loop.
1286 1284 index = self.index
1287 1285 generaldelta = self._generaldelta
1288 1286
1289 1287 iterrev = rev
1290 1288 e = index[iterrev]
1291 1289 while iterrev != e[3] and iterrev != stoprev:
1292 1290 chain.append(iterrev)
1293 1291 if generaldelta:
1294 1292 iterrev = e[3]
1295 1293 else:
1296 1294 iterrev -= 1
1297 1295 e = index[iterrev]
1298 1296
1299 1297 if iterrev == stoprev:
1300 1298 stopped = True
1301 1299 else:
1302 1300 chain.append(iterrev)
1303 1301 stopped = False
1304 1302
1305 1303 chain.reverse()
1306 1304 return chain, stopped
1307 1305
1308 1306 def ancestors(self, revs, stoprev=0, inclusive=False):
1309 1307 """Generate the ancestors of 'revs' in reverse topological order.
1310 1308 Does not generate revs lower than stoprev.
1311 1309
1312 1310 See the documentation for ancestor.lazyancestors for more details."""
1313 1311
1314 1312 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1315 1313 inclusive=inclusive)
1316 1314
1317 1315 def descendants(self, revs):
1318 1316 """Generate the descendants of 'revs' in revision order.
1319 1317
1320 1318 Yield a sequence of revision numbers starting with a child of
1321 1319 some rev in revs, i.e., each revision is *not* considered a
1322 1320 descendant of itself. Results are ordered by revision number (a
1323 1321 topological sort)."""
1324 1322 first = min(revs)
1325 1323 if first == nullrev:
1326 1324 for i in self:
1327 1325 yield i
1328 1326 return
1329 1327
1330 1328 seen = set(revs)
1331 1329 for i in self.revs(start=first + 1):
1332 1330 for x in self.parentrevs(i):
1333 1331 if x != nullrev and x in seen:
1334 1332 seen.add(i)
1335 1333 yield i
1336 1334 break
1337 1335
1338 1336 def findcommonmissing(self, common=None, heads=None):
1339 1337 """Return a tuple of the ancestors of common and the ancestors of heads
1340 1338 that are not ancestors of common. In revset terminology, we return the
1341 1339 tuple:
1342 1340
1343 1341 ::common, (::heads) - (::common)
1344 1342
1345 1343 The list is sorted by revision number, meaning it is
1346 1344 topologically sorted.
1347 1345
1348 1346 'heads' and 'common' are both lists of node IDs. If heads is
1349 1347 not supplied, uses all of the revlog's heads. If common is not
1350 1348 supplied, uses nullid."""
1351 1349 if common is None:
1352 1350 common = [nullid]
1353 1351 if heads is None:
1354 1352 heads = self.heads()
1355 1353
1356 1354 common = [self.rev(n) for n in common]
1357 1355 heads = [self.rev(n) for n in heads]
1358 1356
1359 1357 # we want the ancestors, but inclusive
1360 1358 class lazyset(object):
1361 1359 def __init__(self, lazyvalues):
1362 1360 self.addedvalues = set()
1363 1361 self.lazyvalues = lazyvalues
1364 1362
1365 1363 def __contains__(self, value):
1366 1364 return value in self.addedvalues or value in self.lazyvalues
1367 1365
1368 1366 def __iter__(self):
1369 1367 added = self.addedvalues
1370 1368 for r in added:
1371 1369 yield r
1372 1370 for r in self.lazyvalues:
1373 1371 if not r in added:
1374 1372 yield r
1375 1373
1376 1374 def add(self, value):
1377 1375 self.addedvalues.add(value)
1378 1376
1379 1377 def update(self, values):
1380 1378 self.addedvalues.update(values)
1381 1379
1382 1380 has = lazyset(self.ancestors(common))
1383 1381 has.add(nullrev)
1384 1382 has.update(common)
1385 1383
1386 1384 # take all ancestors from heads that aren't in has
1387 1385 missing = set()
1388 1386 visit = collections.deque(r for r in heads if r not in has)
1389 1387 while visit:
1390 1388 r = visit.popleft()
1391 1389 if r in missing:
1392 1390 continue
1393 1391 else:
1394 1392 missing.add(r)
1395 1393 for p in self.parentrevs(r):
1396 1394 if p not in has:
1397 1395 visit.append(p)
1398 1396 missing = list(missing)
1399 1397 missing.sort()
1400 1398 return has, [self.node(miss) for miss in missing]
1401 1399
1402 1400 def incrementalmissingrevs(self, common=None):
1403 1401 """Return an object that can be used to incrementally compute the
1404 1402 revision numbers of the ancestors of arbitrary sets that are not
1405 1403 ancestors of common. This is an ancestor.incrementalmissingancestors
1406 1404 object.
1407 1405
1408 1406 'common' is a list of revision numbers. If common is not supplied, uses
1409 1407 nullrev.
1410 1408 """
1411 1409 if common is None:
1412 1410 common = [nullrev]
1413 1411
1414 1412 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1415 1413
1416 1414 def findmissingrevs(self, common=None, heads=None):
1417 1415 """Return the revision numbers of the ancestors of heads that
1418 1416 are not ancestors of common.
1419 1417
1420 1418 More specifically, return a list of revision numbers corresponding to
1421 1419 nodes N such that every N satisfies the following constraints:
1422 1420
1423 1421 1. N is an ancestor of some node in 'heads'
1424 1422 2. N is not an ancestor of any node in 'common'
1425 1423
1426 1424 The list is sorted by revision number, meaning it is
1427 1425 topologically sorted.
1428 1426
1429 1427 'heads' and 'common' are both lists of revision numbers. If heads is
1430 1428 not supplied, uses all of the revlog's heads. If common is not
1431 1429 supplied, uses nullid."""
1432 1430 if common is None:
1433 1431 common = [nullrev]
1434 1432 if heads is None:
1435 1433 heads = self.headrevs()
1436 1434
1437 1435 inc = self.incrementalmissingrevs(common=common)
1438 1436 return inc.missingancestors(heads)
1439 1437
1440 1438 def findmissing(self, common=None, heads=None):
1441 1439 """Return the ancestors of heads that are not ancestors of common.
1442 1440
1443 1441 More specifically, return a list of nodes N such that every N
1444 1442 satisfies the following constraints:
1445 1443
1446 1444 1. N is an ancestor of some node in 'heads'
1447 1445 2. N is not an ancestor of any node in 'common'
1448 1446
1449 1447 The list is sorted by revision number, meaning it is
1450 1448 topologically sorted.
1451 1449
1452 1450 'heads' and 'common' are both lists of node IDs. If heads is
1453 1451 not supplied, uses all of the revlog's heads. If common is not
1454 1452 supplied, uses nullid."""
1455 1453 if common is None:
1456 1454 common = [nullid]
1457 1455 if heads is None:
1458 1456 heads = self.heads()
1459 1457
1460 1458 common = [self.rev(n) for n in common]
1461 1459 heads = [self.rev(n) for n in heads]
1462 1460
1463 1461 inc = self.incrementalmissingrevs(common=common)
1464 1462 return [self.node(r) for r in inc.missingancestors(heads)]
1465 1463
1466 1464 def nodesbetween(self, roots=None, heads=None):
1467 1465 """Return a topological path from 'roots' to 'heads'.
1468 1466
1469 1467 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1470 1468 topologically sorted list of all nodes N that satisfy both of
1471 1469 these constraints:
1472 1470
1473 1471 1. N is a descendant of some node in 'roots'
1474 1472 2. N is an ancestor of some node in 'heads'
1475 1473
1476 1474 Every node is considered to be both a descendant and an ancestor
1477 1475 of itself, so every reachable node in 'roots' and 'heads' will be
1478 1476 included in 'nodes'.
1479 1477
1480 1478 'outroots' is the list of reachable nodes in 'roots', i.e., the
1481 1479 subset of 'roots' that is returned in 'nodes'. Likewise,
1482 1480 'outheads' is the subset of 'heads' that is also in 'nodes'.
1483 1481
1484 1482 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1485 1483 unspecified, uses nullid as the only root. If 'heads' is
1486 1484 unspecified, uses list of all of the revlog's heads."""
1487 1485 nonodes = ([], [], [])
1488 1486 if roots is not None:
1489 1487 roots = list(roots)
1490 1488 if not roots:
1491 1489 return nonodes
1492 1490 lowestrev = min([self.rev(n) for n in roots])
1493 1491 else:
1494 1492 roots = [nullid] # Everybody's a descendant of nullid
1495 1493 lowestrev = nullrev
1496 1494 if (lowestrev == nullrev) and (heads is None):
1497 1495 # We want _all_ the nodes!
1498 1496 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1499 1497 if heads is None:
1500 1498 # All nodes are ancestors, so the latest ancestor is the last
1501 1499 # node.
1502 1500 highestrev = len(self) - 1
1503 1501 # Set ancestors to None to signal that every node is an ancestor.
1504 1502 ancestors = None
1505 1503 # Set heads to an empty dictionary for later discovery of heads
1506 1504 heads = {}
1507 1505 else:
1508 1506 heads = list(heads)
1509 1507 if not heads:
1510 1508 return nonodes
1511 1509 ancestors = set()
1512 1510 # Turn heads into a dictionary so we can remove 'fake' heads.
1513 1511 # Also, later we will be using it to filter out the heads we can't
1514 1512 # find from roots.
1515 1513 heads = dict.fromkeys(heads, False)
1516 1514 # Start at the top and keep marking parents until we're done.
1517 1515 nodestotag = set(heads)
1518 1516 # Remember where the top was so we can use it as a limit later.
1519 1517 highestrev = max([self.rev(n) for n in nodestotag])
1520 1518 while nodestotag:
1521 1519 # grab a node to tag
1522 1520 n = nodestotag.pop()
1523 1521 # Never tag nullid
1524 1522 if n == nullid:
1525 1523 continue
1526 1524 # A node's revision number represents its place in a
1527 1525 # topologically sorted list of nodes.
1528 1526 r = self.rev(n)
1529 1527 if r >= lowestrev:
1530 1528 if n not in ancestors:
1531 1529 # If we are possibly a descendant of one of the roots
1532 1530 # and we haven't already been marked as an ancestor
1533 1531 ancestors.add(n) # Mark as ancestor
1534 1532 # Add non-nullid parents to list of nodes to tag.
1535 1533 nodestotag.update([p for p in self.parents(n) if
1536 1534 p != nullid])
1537 1535 elif n in heads: # We've seen it before, is it a fake head?
1538 1536 # So it is, real heads should not be the ancestors of
1539 1537 # any other heads.
1540 1538 heads.pop(n)
1541 1539 if not ancestors:
1542 1540 return nonodes
1543 1541 # Now that we have our set of ancestors, we want to remove any
1544 1542 # roots that are not ancestors.
1545 1543
1546 1544 # If one of the roots was nullid, everything is included anyway.
1547 1545 if lowestrev > nullrev:
1548 1546 # But, since we weren't, let's recompute the lowest rev to not
1549 1547 # include roots that aren't ancestors.
1550 1548
1551 1549 # Filter out roots that aren't ancestors of heads
1552 1550 roots = [root for root in roots if root in ancestors]
1553 1551 # Recompute the lowest revision
1554 1552 if roots:
1555 1553 lowestrev = min([self.rev(root) for root in roots])
1556 1554 else:
1557 1555 # No more roots? Return empty list
1558 1556 return nonodes
1559 1557 else:
1560 1558 # We are descending from nullid, and don't need to care about
1561 1559 # any other roots.
1562 1560 lowestrev = nullrev
1563 1561 roots = [nullid]
1564 1562 # Transform our roots list into a set.
1565 1563 descendants = set(roots)
1566 1564 # Also, keep the original roots so we can filter out roots that aren't
1567 1565 # 'real' roots (i.e. are descended from other roots).
1568 1566 roots = descendants.copy()
1569 1567 # Our topologically sorted list of output nodes.
1570 1568 orderedout = []
1571 1569 # Don't start at nullid since we don't want nullid in our output list,
1572 1570 # and if nullid shows up in descendants, empty parents will look like
1573 1571 # they're descendants.
1574 1572 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1575 1573 n = self.node(r)
1576 1574 isdescendant = False
1577 1575 if lowestrev == nullrev: # Everybody is a descendant of nullid
1578 1576 isdescendant = True
1579 1577 elif n in descendants:
1580 1578 # n is already a descendant
1581 1579 isdescendant = True
1582 1580 # This check only needs to be done here because all the roots
1583 1581 # will start being marked is descendants before the loop.
1584 1582 if n in roots:
1585 1583 # If n was a root, check if it's a 'real' root.
1586 1584 p = tuple(self.parents(n))
1587 1585 # If any of its parents are descendants, it's not a root.
1588 1586 if (p[0] in descendants) or (p[1] in descendants):
1589 1587 roots.remove(n)
1590 1588 else:
1591 1589 p = tuple(self.parents(n))
1592 1590 # A node is a descendant if either of its parents are
1593 1591 # descendants. (We seeded the dependents list with the roots
1594 1592 # up there, remember?)
1595 1593 if (p[0] in descendants) or (p[1] in descendants):
1596 1594 descendants.add(n)
1597 1595 isdescendant = True
1598 1596 if isdescendant and ((ancestors is None) or (n in ancestors)):
1599 1597 # Only include nodes that are both descendants and ancestors.
1600 1598 orderedout.append(n)
1601 1599 if (ancestors is not None) and (n in heads):
1602 1600 # We're trying to figure out which heads are reachable
1603 1601 # from roots.
1604 1602 # Mark this head as having been reached
1605 1603 heads[n] = True
1606 1604 elif ancestors is None:
1607 1605 # Otherwise, we're trying to discover the heads.
1608 1606 # Assume this is a head because if it isn't, the next step
1609 1607 # will eventually remove it.
1610 1608 heads[n] = True
1611 1609 # But, obviously its parents aren't.
1612 1610 for p in self.parents(n):
1613 1611 heads.pop(p, None)
1614 1612 heads = [head for head, flag in heads.iteritems() if flag]
1615 1613 roots = list(roots)
1616 1614 assert orderedout
1617 1615 assert roots
1618 1616 assert heads
1619 1617 return (orderedout, roots, heads)
1620 1618
1621 1619 def headrevs(self):
1622 1620 try:
1623 1621 return self.index.headrevs()
1624 1622 except AttributeError:
1625 1623 return self._headrevs()
1626 1624
1627 1625 def computephases(self, roots):
1628 1626 return self.index.computephasesmapsets(roots)
1629 1627
1630 1628 def _headrevs(self):
1631 1629 count = len(self)
1632 1630 if not count:
1633 1631 return [nullrev]
1634 1632 # we won't iter over filtered rev so nobody is a head at start
1635 1633 ishead = [0] * (count + 1)
1636 1634 index = self.index
1637 1635 for r in self:
1638 1636 ishead[r] = 1 # I may be an head
1639 1637 e = index[r]
1640 1638 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1641 1639 return [r for r, val in enumerate(ishead) if val]
1642 1640
1643 1641 def heads(self, start=None, stop=None):
1644 1642 """return the list of all nodes that have no children
1645 1643
1646 1644 if start is specified, only heads that are descendants of
1647 1645 start will be returned
1648 1646 if stop is specified, it will consider all the revs from stop
1649 1647 as if they had no children
1650 1648 """
1651 1649 if start is None and stop is None:
1652 1650 if not len(self):
1653 1651 return [nullid]
1654 1652 return [self.node(r) for r in self.headrevs()]
1655 1653
1656 1654 if start is None:
1657 1655 start = nullid
1658 1656 if stop is None:
1659 1657 stop = []
1660 1658 stoprevs = set([self.rev(n) for n in stop])
1661 1659 startrev = self.rev(start)
1662 1660 reachable = {startrev}
1663 1661 heads = {startrev}
1664 1662
1665 1663 parentrevs = self.parentrevs
1666 1664 for r in self.revs(start=startrev + 1):
1667 1665 for p in parentrevs(r):
1668 1666 if p in reachable:
1669 1667 if r not in stoprevs:
1670 1668 reachable.add(r)
1671 1669 heads.add(r)
1672 1670 if p in heads and p not in stoprevs:
1673 1671 heads.remove(p)
1674 1672
1675 1673 return [self.node(r) for r in heads]
1676 1674
1677 1675 def children(self, node):
1678 1676 """find the children of a given node"""
1679 1677 c = []
1680 1678 p = self.rev(node)
1681 1679 for r in self.revs(start=p + 1):
1682 1680 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1683 1681 if prevs:
1684 1682 for pr in prevs:
1685 1683 if pr == p:
1686 1684 c.append(self.node(r))
1687 1685 elif p == nullrev:
1688 1686 c.append(self.node(r))
1689 1687 return c
1690 1688
1691 1689 def commonancestorsheads(self, a, b):
1692 1690 """calculate all the heads of the common ancestors of nodes a and b"""
1693 1691 a, b = self.rev(a), self.rev(b)
1694 1692 ancs = self._commonancestorsheads(a, b)
1695 1693 return pycompat.maplist(self.node, ancs)
1696 1694
1697 1695 def _commonancestorsheads(self, *revs):
1698 1696 """calculate all the heads of the common ancestors of revs"""
1699 1697 try:
1700 1698 ancs = self.index.commonancestorsheads(*revs)
1701 1699 except (AttributeError, OverflowError): # C implementation failed
1702 1700 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1703 1701 return ancs
1704 1702
1705 1703 def isancestor(self, a, b):
1706 1704 """return True if node a is an ancestor of node b
1707 1705
1708 1706 A revision is considered an ancestor of itself."""
1709 1707 a, b = self.rev(a), self.rev(b)
1710 1708 return self.isancestorrev(a, b)
1711 1709
1712 1710 def descendant(self, a, b):
1713 1711 msg = 'revlog.descendant is deprecated, use revlog.isancestorrev'
1714 1712 util.nouideprecwarn(msg, '4.7')
1715 1713 return self.isancestorrev(a, b)
1716 1714
1717 1715 def isancestorrev(self, a, b):
1718 1716 """return True if revision a is an ancestor of revision b
1719 1717
1720 1718 A revision is considered an ancestor of itself.
1721 1719
1722 1720 The implementation of this is trivial but the use of
1723 1721 commonancestorsheads is not."""
1724 1722 if a == nullrev:
1725 1723 return True
1726 1724 elif a == b:
1727 1725 return True
1728 1726 elif a > b:
1729 1727 return False
1730 1728 return a in self._commonancestorsheads(a, b)
1731 1729
1732 1730 def ancestor(self, a, b):
1733 1731 """calculate the "best" common ancestor of nodes a and b"""
1734 1732
1735 1733 a, b = self.rev(a), self.rev(b)
1736 1734 try:
1737 1735 ancs = self.index.ancestors(a, b)
1738 1736 except (AttributeError, OverflowError):
1739 1737 ancs = ancestor.ancestors(self.parentrevs, a, b)
1740 1738 if ancs:
1741 1739 # choose a consistent winner when there's a tie
1742 1740 return min(map(self.node, ancs))
1743 1741 return nullid
1744 1742
1745 1743 def _match(self, id):
1746 1744 if isinstance(id, int):
1747 1745 # rev
1748 1746 return self.node(id)
1749 1747 if len(id) == 20:
1750 1748 # possibly a binary node
1751 1749 # odds of a binary node being all hex in ASCII are 1 in 10**25
1752 1750 try:
1753 1751 node = id
1754 1752 self.rev(node) # quick search the index
1755 1753 return node
1756 1754 except LookupError:
1757 1755 pass # may be partial hex id
1758 1756 try:
1759 1757 # str(rev)
1760 1758 rev = int(id)
1761 1759 if "%d" % rev != id:
1762 1760 raise ValueError
1763 1761 if rev < 0:
1764 1762 rev = len(self) + rev
1765 1763 if rev < 0 or rev >= len(self):
1766 1764 raise ValueError
1767 1765 return self.node(rev)
1768 1766 except (ValueError, OverflowError):
1769 1767 pass
1770 1768 if len(id) == 40:
1771 1769 try:
1772 1770 # a full hex nodeid?
1773 1771 node = bin(id)
1774 1772 self.rev(node)
1775 1773 return node
1776 1774 except (TypeError, LookupError):
1777 1775 pass
1778 1776
1779 1777 def _partialmatch(self, id):
1780 1778 # we don't care wdirfilenodeids as they should be always full hash
1781 1779 maybewdir = wdirhex.startswith(id)
1782 1780 try:
1783 1781 partial = self.index.partialmatch(id)
1784 1782 if partial and self.hasnode(partial):
1785 1783 if maybewdir:
1786 1784 # single 'ff...' match in radix tree, ambiguous with wdir
1787 1785 raise RevlogError
1788 1786 return partial
1789 1787 if maybewdir:
1790 1788 # no 'ff...' match in radix tree, wdir identified
1791 1789 raise error.WdirUnsupported
1792 1790 return None
1793 1791 except RevlogError:
1794 1792 # parsers.c radix tree lookup gave multiple matches
1795 1793 # fast path: for unfiltered changelog, radix tree is accurate
1796 1794 if not getattr(self, 'filteredrevs', None):
1797 1795 raise AmbiguousPrefixLookupError(id, self.indexfile,
1798 1796 _('ambiguous identifier'))
1799 1797 # fall through to slow path that filters hidden revisions
1800 1798 except (AttributeError, ValueError):
1801 1799 # we are pure python, or key was too short to search radix tree
1802 1800 pass
1803 1801
1804 1802 if id in self._pcache:
1805 1803 return self._pcache[id]
1806 1804
1807 1805 if len(id) <= 40:
1808 1806 try:
1809 1807 # hex(node)[:...]
1810 1808 l = len(id) // 2 # grab an even number of digits
1811 1809 prefix = bin(id[:l * 2])
1812 1810 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1813 1811 nl = [n for n in nl if hex(n).startswith(id) and
1814 1812 self.hasnode(n)]
1815 1813 if len(nl) > 0:
1816 1814 if len(nl) == 1 and not maybewdir:
1817 1815 self._pcache[id] = nl[0]
1818 1816 return nl[0]
1819 1817 raise AmbiguousPrefixLookupError(id, self.indexfile,
1820 1818 _('ambiguous identifier'))
1821 1819 if maybewdir:
1822 1820 raise error.WdirUnsupported
1823 1821 return None
1824 1822 except TypeError:
1825 1823 pass
1826 1824
1827 1825 def lookup(self, id):
1828 1826 """locate a node based on:
1829 1827 - revision number or str(revision number)
1830 1828 - nodeid or subset of hex nodeid
1831 1829 """
1832 1830 n = self._match(id)
1833 1831 if n is not None:
1834 1832 return n
1835 1833 n = self._partialmatch(id)
1836 1834 if n:
1837 1835 return n
1838 1836
1839 1837 raise LookupError(id, self.indexfile, _('no match found'))
1840 1838
1841 1839 def shortest(self, node, minlength=1):
1842 1840 """Find the shortest unambiguous prefix that matches node."""
1843 1841 def isvalid(prefix):
1844 1842 try:
1845 1843 node = self._partialmatch(prefix)
1846 1844 except error.RevlogError:
1847 1845 return False
1848 1846 except error.WdirUnsupported:
1849 1847 # single 'ff...' match
1850 1848 return True
1851 1849 if node is None:
1852 1850 raise LookupError(node, self.indexfile, _('no node'))
1853 1851 return True
1854 1852
1855 1853 def maybewdir(prefix):
1856 1854 return all(c == 'f' for c in prefix)
1857 1855
1858 1856 hexnode = hex(node)
1859 1857
1860 1858 def disambiguate(hexnode, minlength):
1861 1859 """Disambiguate against wdirid."""
1862 1860 for length in range(minlength, 41):
1863 1861 prefix = hexnode[:length]
1864 1862 if not maybewdir(prefix):
1865 1863 return prefix
1866 1864
1867 1865 if not getattr(self, 'filteredrevs', None):
1868 1866 try:
1869 1867 length = max(self.index.shortest(node), minlength)
1870 1868 return disambiguate(hexnode, length)
1871 1869 except RevlogError:
1872 1870 if node != wdirid:
1873 1871 raise LookupError(node, self.indexfile, _('no node'))
1874 1872 except AttributeError:
1875 1873 # Fall through to pure code
1876 1874 pass
1877 1875
1878 1876 if node == wdirid:
1879 1877 for length in range(minlength, 41):
1880 1878 prefix = hexnode[:length]
1881 1879 if isvalid(prefix):
1882 1880 return prefix
1883 1881
1884 1882 for length in range(minlength, 41):
1885 1883 prefix = hexnode[:length]
1886 1884 if isvalid(prefix):
1887 1885 return disambiguate(hexnode, length)
1888 1886
1889 1887 def cmp(self, node, text):
1890 1888 """compare text with a given file revision
1891 1889
1892 1890 returns True if text is different than what is stored.
1893 1891 """
1894 1892 p1, p2 = self.parents(node)
1895 1893 return hash(text, p1, p2) != node
1896 1894
1897 1895 def _cachesegment(self, offset, data):
1898 1896 """Add a segment to the revlog cache.
1899 1897
1900 1898 Accepts an absolute offset and the data that is at that location.
1901 1899 """
1902 1900 o, d = self._chunkcache
1903 1901 # try to add to existing cache
1904 1902 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1905 1903 self._chunkcache = o, d + data
1906 1904 else:
1907 1905 self._chunkcache = offset, data
1908 1906
1909 1907 def _readsegment(self, offset, length, df=None):
1910 1908 """Load a segment of raw data from the revlog.
1911 1909
1912 1910 Accepts an absolute offset, length to read, and an optional existing
1913 1911 file handle to read from.
1914 1912
1915 1913 If an existing file handle is passed, it will be seeked and the
1916 1914 original seek position will NOT be restored.
1917 1915
1918 1916 Returns a str or buffer of raw byte data.
1919 1917 """
1920 1918 # Cache data both forward and backward around the requested
1921 1919 # data, in a fixed size window. This helps speed up operations
1922 1920 # involving reading the revlog backwards.
1923 1921 cachesize = self._chunkcachesize
1924 1922 realoffset = offset & ~(cachesize - 1)
1925 1923 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1926 1924 - realoffset)
1927 1925 with self._datareadfp(df) as df:
1928 1926 df.seek(realoffset)
1929 1927 d = df.read(reallength)
1930 1928 self._cachesegment(realoffset, d)
1931 1929 if offset != realoffset or reallength != length:
1932 1930 return util.buffer(d, offset - realoffset, length)
1933 1931 return d
1934 1932
1935 1933 def _getsegment(self, offset, length, df=None):
1936 1934 """Obtain a segment of raw data from the revlog.
1937 1935
1938 1936 Accepts an absolute offset, length of bytes to obtain, and an
1939 1937 optional file handle to the already-opened revlog. If the file
1940 1938 handle is used, it's original seek position will not be preserved.
1941 1939
1942 1940 Requests for data may be returned from a cache.
1943 1941
1944 1942 Returns a str or a buffer instance of raw byte data.
1945 1943 """
1946 1944 o, d = self._chunkcache
1947 1945 l = len(d)
1948 1946
1949 1947 # is it in the cache?
1950 1948 cachestart = offset - o
1951 1949 cacheend = cachestart + length
1952 1950 if cachestart >= 0 and cacheend <= l:
1953 1951 if cachestart == 0 and cacheend == l:
1954 1952 return d # avoid a copy
1955 1953 return util.buffer(d, cachestart, cacheend - cachestart)
1956 1954
1957 1955 return self._readsegment(offset, length, df=df)
1958 1956
1959 1957 def _getsegmentforrevs(self, startrev, endrev, df=None):
1960 1958 """Obtain a segment of raw data corresponding to a range of revisions.
1961 1959
1962 1960 Accepts the start and end revisions and an optional already-open
1963 1961 file handle to be used for reading. If the file handle is read, its
1964 1962 seek position will not be preserved.
1965 1963
1966 1964 Requests for data may be satisfied by a cache.
1967 1965
1968 1966 Returns a 2-tuple of (offset, data) for the requested range of
1969 1967 revisions. Offset is the integer offset from the beginning of the
1970 1968 revlog and data is a str or buffer of the raw byte data.
1971 1969
1972 1970 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1973 1971 to determine where each revision's data begins and ends.
1974 1972 """
1975 1973 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1976 1974 # (functions are expensive).
1977 1975 index = self.index
1978 1976 istart = index[startrev]
1979 1977 start = int(istart[0] >> 16)
1980 1978 if startrev == endrev:
1981 1979 end = start + istart[1]
1982 1980 else:
1983 1981 iend = index[endrev]
1984 1982 end = int(iend[0] >> 16) + iend[1]
1985 1983
1986 1984 if self._inline:
1987 1985 start += (startrev + 1) * self._io.size
1988 1986 end += (endrev + 1) * self._io.size
1989 1987 length = end - start
1990 1988
1991 1989 return start, self._getsegment(start, length, df=df)
1992 1990
1993 1991 def _chunk(self, rev, df=None):
1994 1992 """Obtain a single decompressed chunk for a revision.
1995 1993
1996 1994 Accepts an integer revision and an optional already-open file handle
1997 1995 to be used for reading. If used, the seek position of the file will not
1998 1996 be preserved.
1999 1997
2000 1998 Returns a str holding uncompressed data for the requested revision.
2001 1999 """
2002 2000 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
2003 2001
2004 2002 def _chunks(self, revs, df=None, targetsize=None):
2005 2003 """Obtain decompressed chunks for the specified revisions.
2006 2004
2007 2005 Accepts an iterable of numeric revisions that are assumed to be in
2008 2006 ascending order. Also accepts an optional already-open file handle
2009 2007 to be used for reading. If used, the seek position of the file will
2010 2008 not be preserved.
2011 2009
2012 2010 This function is similar to calling ``self._chunk()`` multiple times,
2013 2011 but is faster.
2014 2012
2015 2013 Returns a list with decompressed data for each requested revision.
2016 2014 """
2017 2015 if not revs:
2018 2016 return []
2019 2017 start = self.start
2020 2018 length = self.length
2021 2019 inline = self._inline
2022 2020 iosize = self._io.size
2023 2021 buffer = util.buffer
2024 2022
2025 2023 l = []
2026 2024 ladd = l.append
2027 2025
2028 2026 if not self._withsparseread:
2029 2027 slicedchunks = (revs,)
2030 2028 else:
2031 2029 slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
2032 2030
2033 2031 for revschunk in slicedchunks:
2034 2032 firstrev = revschunk[0]
2035 2033 # Skip trailing revisions with empty diff
2036 2034 for lastrev in revschunk[::-1]:
2037 2035 if length(lastrev) != 0:
2038 2036 break
2039 2037
2040 2038 try:
2041 2039 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
2042 2040 except OverflowError:
2043 2041 # issue4215 - we can't cache a run of chunks greater than
2044 2042 # 2G on Windows
2045 2043 return [self._chunk(rev, df=df) for rev in revschunk]
2046 2044
2047 2045 decomp = self.decompress
2048 2046 for rev in revschunk:
2049 2047 chunkstart = start(rev)
2050 2048 if inline:
2051 2049 chunkstart += (rev + 1) * iosize
2052 2050 chunklength = length(rev)
2053 2051 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
2054 2052
2055 2053 return l
2056 2054
2057 2055 def _chunkclear(self):
2058 2056 """Clear the raw chunk cache."""
2059 2057 self._chunkcache = (0, '')
2060 2058
2061 2059 def deltaparent(self, rev):
2062 2060 """return deltaparent of the given revision"""
2063 2061 base = self.index[rev][3]
2064 2062 if base == rev:
2065 2063 return nullrev
2066 2064 elif self._generaldelta:
2067 2065 return base
2068 2066 else:
2069 2067 return rev - 1
2070 2068
2071 2069 def revdiff(self, rev1, rev2):
2072 2070 """return or calculate a delta between two revisions
2073 2071
2074 2072 The delta calculated is in binary form and is intended to be written to
2075 2073 revlog data directly. So this function needs raw revision data.
2076 2074 """
2077 2075 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2078 2076 return bytes(self._chunk(rev2))
2079 2077
2080 2078 return mdiff.textdiff(self.revision(rev1, raw=True),
2081 2079 self.revision(rev2, raw=True))
2082 2080
2083 2081 def revision(self, nodeorrev, _df=None, raw=False):
2084 2082 """return an uncompressed revision of a given node or revision
2085 2083 number.
2086 2084
2087 2085 _df - an existing file handle to read from. (internal-only)
2088 2086 raw - an optional argument specifying if the revision data is to be
2089 2087 treated as raw data when applying flag transforms. 'raw' should be set
2090 2088 to True when generating changegroups or in debug commands.
2091 2089 """
2092 2090 if isinstance(nodeorrev, int):
2093 2091 rev = nodeorrev
2094 2092 node = self.node(rev)
2095 2093 else:
2096 2094 node = nodeorrev
2097 2095 rev = None
2098 2096
2099 2097 cachedrev = None
2100 2098 flags = None
2101 2099 rawtext = None
2102 2100 if node == nullid:
2103 2101 return ""
2104 2102 if self._cache:
2105 2103 if self._cache[0] == node:
2106 2104 # _cache only stores rawtext
2107 2105 if raw:
2108 2106 return self._cache[2]
2109 2107 # duplicated, but good for perf
2110 2108 if rev is None:
2111 2109 rev = self.rev(node)
2112 2110 if flags is None:
2113 2111 flags = self.flags(rev)
2114 2112 # no extra flags set, no flag processor runs, text = rawtext
2115 2113 if flags == REVIDX_DEFAULT_FLAGS:
2116 2114 return self._cache[2]
2117 2115 # rawtext is reusable. need to run flag processor
2118 2116 rawtext = self._cache[2]
2119 2117
2120 2118 cachedrev = self._cache[1]
2121 2119
2122 2120 # look up what we need to read
2123 2121 if rawtext is None:
2124 2122 if rev is None:
2125 2123 rev = self.rev(node)
2126 2124
2127 2125 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2128 2126 if stopped:
2129 2127 rawtext = self._cache[2]
2130 2128
2131 2129 # drop cache to save memory
2132 2130 self._cache = None
2133 2131
2134 2132 targetsize = None
2135 2133 rawsize = self.index[rev][2]
2136 2134 if 0 <= rawsize:
2137 2135 targetsize = 4 * rawsize
2138 2136
2139 2137 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2140 2138 if rawtext is None:
2141 2139 rawtext = bytes(bins[0])
2142 2140 bins = bins[1:]
2143 2141
2144 2142 rawtext = mdiff.patches(rawtext, bins)
2145 2143 self._cache = (node, rev, rawtext)
2146 2144
2147 2145 if flags is None:
2148 2146 if rev is None:
2149 2147 rev = self.rev(node)
2150 2148 flags = self.flags(rev)
2151 2149
2152 2150 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2153 2151 if validatehash:
2154 2152 self.checkhash(text, node, rev=rev)
2155 2153
2156 2154 return text
2157 2155
2158 2156 def hash(self, text, p1, p2):
2159 2157 """Compute a node hash.
2160 2158
2161 2159 Available as a function so that subclasses can replace the hash
2162 2160 as needed.
2163 2161 """
2164 2162 return hash(text, p1, p2)
2165 2163
2166 2164 def _processflags(self, text, flags, operation, raw=False):
2167 2165 """Inspect revision data flags and applies transforms defined by
2168 2166 registered flag processors.
2169 2167
2170 2168 ``text`` - the revision data to process
2171 2169 ``flags`` - the revision flags
2172 2170 ``operation`` - the operation being performed (read or write)
2173 2171 ``raw`` - an optional argument describing if the raw transform should be
2174 2172 applied.
2175 2173
2176 2174 This method processes the flags in the order (or reverse order if
2177 2175 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2178 2176 flag processors registered for present flags. The order of flags defined
2179 2177 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2180 2178
2181 2179 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2182 2180 processed text and ``validatehash`` is a bool indicating whether the
2183 2181 returned text should be checked for hash integrity.
2184 2182
2185 2183 Note: If the ``raw`` argument is set, it has precedence over the
2186 2184 operation and will only update the value of ``validatehash``.
2187 2185 """
2188 2186 # fast path: no flag processors will run
2189 2187 if flags == 0:
2190 2188 return text, True
2191 2189 if not operation in ('read', 'write'):
2192 2190 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2193 2191 # Check all flags are known.
2194 2192 if flags & ~REVIDX_KNOWN_FLAGS:
2195 2193 raise RevlogError(_("incompatible revision flag '%#x'") %
2196 2194 (flags & ~REVIDX_KNOWN_FLAGS))
2197 2195 validatehash = True
2198 2196 # Depending on the operation (read or write), the order might be
2199 2197 # reversed due to non-commutative transforms.
2200 2198 orderedflags = REVIDX_FLAGS_ORDER
2201 2199 if operation == 'write':
2202 2200 orderedflags = reversed(orderedflags)
2203 2201
2204 2202 for flag in orderedflags:
2205 2203 # If a flagprocessor has been registered for a known flag, apply the
2206 2204 # related operation transform and update result tuple.
2207 2205 if flag & flags:
2208 2206 vhash = True
2209 2207
2210 2208 if flag not in _flagprocessors:
2211 2209 message = _("missing processor for flag '%#x'") % (flag)
2212 2210 raise RevlogError(message)
2213 2211
2214 2212 processor = _flagprocessors[flag]
2215 2213 if processor is not None:
2216 2214 readtransform, writetransform, rawtransform = processor
2217 2215
2218 2216 if raw:
2219 2217 vhash = rawtransform(self, text)
2220 2218 elif operation == 'read':
2221 2219 text, vhash = readtransform(self, text)
2222 2220 else: # write operation
2223 2221 text, vhash = writetransform(self, text)
2224 2222 validatehash = validatehash and vhash
2225 2223
2226 2224 return text, validatehash
2227 2225
2228 2226 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2229 2227 """Check node hash integrity.
2230 2228
2231 2229 Available as a function so that subclasses can extend hash mismatch
2232 2230 behaviors as needed.
2233 2231 """
2234 2232 try:
2235 2233 if p1 is None and p2 is None:
2236 2234 p1, p2 = self.parents(node)
2237 2235 if node != self.hash(text, p1, p2):
2238 2236 revornode = rev
2239 2237 if revornode is None:
2240 2238 revornode = templatefilters.short(hex(node))
2241 2239 raise RevlogError(_("integrity check failed on %s:%s")
2242 2240 % (self.indexfile, pycompat.bytestr(revornode)))
2243 2241 except RevlogError:
2244 2242 if self._censorable and _censoredtext(text):
2245 2243 raise error.CensoredNodeError(self.indexfile, node, text)
2246 2244 raise
2247 2245
2248 2246 def _enforceinlinesize(self, tr, fp=None):
2249 2247 """Check if the revlog is too big for inline and convert if so.
2250 2248
2251 2249 This should be called after revisions are added to the revlog. If the
2252 2250 revlog has grown too large to be an inline revlog, it will convert it
2253 2251 to use multiple index and data files.
2254 2252 """
2255 2253 tiprev = len(self) - 1
2256 2254 if (not self._inline or
2257 2255 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
2258 2256 return
2259 2257
2260 2258 trinfo = tr.find(self.indexfile)
2261 2259 if trinfo is None:
2262 2260 raise RevlogError(_("%s not found in the transaction")
2263 2261 % self.indexfile)
2264 2262
2265 2263 trindex = trinfo[2]
2266 2264 if trindex is not None:
2267 2265 dataoff = self.start(trindex)
2268 2266 else:
2269 2267 # revlog was stripped at start of transaction, use all leftover data
2270 2268 trindex = len(self) - 1
2271 2269 dataoff = self.end(tiprev)
2272 2270
2273 2271 tr.add(self.datafile, dataoff)
2274 2272
2275 2273 if fp:
2276 2274 fp.flush()
2277 2275 fp.close()
2278 2276
2279 2277 with self._datafp('w') as df:
2280 2278 for r in self:
2281 2279 df.write(self._getsegmentforrevs(r, r)[1])
2282 2280
2283 2281 with self._indexfp('w') as fp:
2284 2282 self.version &= ~FLAG_INLINE_DATA
2285 2283 self._inline = False
2286 2284 io = self._io
2287 2285 for i in self:
2288 2286 e = io.packentry(self.index[i], self.node, self.version, i)
2289 2287 fp.write(e)
2290 2288
2291 2289 # the temp file replace the real index when we exit the context
2292 2290 # manager
2293 2291
2294 2292 tr.replace(self.indexfile, trindex * self._io.size)
2295 2293 self._chunkclear()
2296 2294
2297 2295 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2298 2296 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2299 2297 """add a revision to the log
2300 2298
2301 2299 text - the revision data to add
2302 2300 transaction - the transaction object used for rollback
2303 2301 link - the linkrev data to add
2304 2302 p1, p2 - the parent nodeids of the revision
2305 2303 cachedelta - an optional precomputed delta
2306 2304 node - nodeid of revision; typically node is not specified, and it is
2307 2305 computed by default as hash(text, p1, p2), however subclasses might
2308 2306 use different hashing method (and override checkhash() in such case)
2309 2307 flags - the known flags to set on the revision
2310 2308 deltacomputer - an optional _deltacomputer instance shared between
2311 2309 multiple calls
2312 2310 """
2313 2311 if link == nullrev:
2314 2312 raise RevlogError(_("attempted to add linkrev -1 to %s")
2315 2313 % self.indexfile)
2316 2314
2317 2315 if flags:
2318 2316 node = node or self.hash(text, p1, p2)
2319 2317
2320 2318 rawtext, validatehash = self._processflags(text, flags, 'write')
2321 2319
2322 2320 # If the flag processor modifies the revision data, ignore any provided
2323 2321 # cachedelta.
2324 2322 if rawtext != text:
2325 2323 cachedelta = None
2326 2324
2327 2325 if len(rawtext) > _maxentrysize:
2328 2326 raise RevlogError(
2329 2327 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2330 2328 % (self.indexfile, len(rawtext)))
2331 2329
2332 2330 node = node or self.hash(rawtext, p1, p2)
2333 2331 if node in self.nodemap:
2334 2332 return node
2335 2333
2336 2334 if validatehash:
2337 2335 self.checkhash(rawtext, node, p1=p1, p2=p2)
2338 2336
2339 2337 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2340 2338 flags, cachedelta=cachedelta,
2341 2339 deltacomputer=deltacomputer)
2342 2340
2343 2341 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2344 2342 cachedelta=None, deltacomputer=None):
2345 2343 """add a raw revision with known flags, node and parents
2346 2344 useful when reusing a revision not stored in this revlog (ex: received
2347 2345 over wire, or read from an external bundle).
2348 2346 """
2349 2347 dfh = None
2350 2348 if not self._inline:
2351 2349 dfh = self._datafp("a+")
2352 2350 ifh = self._indexfp("a+")
2353 2351 try:
2354 2352 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2355 2353 flags, cachedelta, ifh, dfh,
2356 2354 deltacomputer=deltacomputer)
2357 2355 finally:
2358 2356 if dfh:
2359 2357 dfh.close()
2360 2358 ifh.close()
2361 2359
2362 2360 def compress(self, data):
2363 2361 """Generate a possibly-compressed representation of data."""
2364 2362 if not data:
2365 2363 return '', data
2366 2364
2367 2365 compressed = self._compressor.compress(data)
2368 2366
2369 2367 if compressed:
2370 2368 # The revlog compressor added the header in the returned data.
2371 2369 return '', compressed
2372 2370
2373 2371 if data[0:1] == '\0':
2374 2372 return '', data
2375 2373 return 'u', data
2376 2374
2377 2375 def decompress(self, data):
2378 2376 """Decompress a revlog chunk.
2379 2377
2380 2378 The chunk is expected to begin with a header identifying the
2381 2379 format type so it can be routed to an appropriate decompressor.
2382 2380 """
2383 2381 if not data:
2384 2382 return data
2385 2383
2386 2384 # Revlogs are read much more frequently than they are written and many
2387 2385 # chunks only take microseconds to decompress, so performance is
2388 2386 # important here.
2389 2387 #
2390 2388 # We can make a few assumptions about revlogs:
2391 2389 #
2392 2390 # 1) the majority of chunks will be compressed (as opposed to inline
2393 2391 # raw data).
2394 2392 # 2) decompressing *any* data will likely by at least 10x slower than
2395 2393 # returning raw inline data.
2396 2394 # 3) we want to prioritize common and officially supported compression
2397 2395 # engines
2398 2396 #
2399 2397 # It follows that we want to optimize for "decompress compressed data
2400 2398 # when encoded with common and officially supported compression engines"
2401 2399 # case over "raw data" and "data encoded by less common or non-official
2402 2400 # compression engines." That is why we have the inline lookup first
2403 2401 # followed by the compengines lookup.
2404 2402 #
2405 2403 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2406 2404 # compressed chunks. And this matters for changelog and manifest reads.
2407 2405 t = data[0:1]
2408 2406
2409 2407 if t == 'x':
2410 2408 try:
2411 2409 return _zlibdecompress(data)
2412 2410 except zlib.error as e:
2413 2411 raise RevlogError(_('revlog decompress error: %s') %
2414 2412 stringutil.forcebytestr(e))
2415 2413 # '\0' is more common than 'u' so it goes first.
2416 2414 elif t == '\0':
2417 2415 return data
2418 2416 elif t == 'u':
2419 2417 return util.buffer(data, 1)
2420 2418
2421 2419 try:
2422 2420 compressor = self._decompressors[t]
2423 2421 except KeyError:
2424 2422 try:
2425 2423 engine = util.compengines.forrevlogheader(t)
2426 2424 compressor = engine.revlogcompressor()
2427 2425 self._decompressors[t] = compressor
2428 2426 except KeyError:
2429 2427 raise RevlogError(_('unknown compression type %r') % t)
2430 2428
2431 2429 return compressor.decompress(data)
2432 2430
2433 2431 def _isgooddeltainfo(self, deltainfo, revinfo):
2434 2432 """Returns True if the given delta is good. Good means that it is within
2435 2433 the disk span, disk size, and chain length bounds that we know to be
2436 2434 performant."""
2437 2435 if deltainfo is None:
2438 2436 return False
2439 2437
2440 2438 # - 'deltainfo.distance' is the distance from the base revision --
2441 2439 # bounding it limits the amount of I/O we need to do.
2442 2440 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2443 2441 # deltas we need to apply -- bounding it limits the amount of CPU
2444 2442 # we consume.
2445 2443
2446 2444 if self._sparserevlog:
2447 2445 # As sparse-read will be used, we can consider that the distance,
2448 2446 # instead of being the span of the whole chunk,
2449 2447 # is the span of the largest read chunk
2450 2448 base = deltainfo.base
2451 2449
2452 2450 if base != nullrev:
2453 2451 deltachain = self._deltachain(base)[0]
2454 2452 else:
2455 2453 deltachain = []
2456 2454
2457 2455 chunks = _slicechunk(self, deltachain, deltainfo)
2458 2456 distance = max(map(lambda revs:_segmentspan(self, revs), chunks))
2459 2457 else:
2460 2458 distance = deltainfo.distance
2461 2459
2462 2460 textlen = revinfo.textlen
2463 2461 defaultmax = textlen * 4
2464 2462 maxdist = self._maxdeltachainspan
2465 2463 if not maxdist:
2466 2464 maxdist = distance # ensure the conditional pass
2467 2465 maxdist = max(maxdist, defaultmax)
2468 2466 if self._sparserevlog and maxdist < self._srmingapsize:
2469 2467 # In multiple place, we are ignoring irrelevant data range below a
2470 2468 # certain size. Be also apply this tradeoff here and relax span
2471 2469 # constraint for small enought content.
2472 2470 maxdist = self._srmingapsize
2473 2471 if (distance > maxdist or deltainfo.deltalen > textlen or
2474 2472 deltainfo.compresseddeltalen > textlen * 2 or
2475 2473 (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
2476 2474 return False
2477 2475
2478 2476 return True
2479 2477
2480 2478 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2481 2479 cachedelta, ifh, dfh, alwayscache=False,
2482 2480 deltacomputer=None):
2483 2481 """internal function to add revisions to the log
2484 2482
2485 2483 see addrevision for argument descriptions.
2486 2484
2487 2485 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2488 2486
2489 2487 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2490 2488 be used.
2491 2489
2492 2490 invariants:
2493 2491 - rawtext is optional (can be None); if not set, cachedelta must be set.
2494 2492 if both are set, they must correspond to each other.
2495 2493 """
2496 2494 if node == nullid:
2497 2495 raise RevlogError(_("%s: attempt to add null revision") %
2498 2496 (self.indexfile))
2499 2497 if node == wdirid or node in wdirfilenodeids:
2500 2498 raise RevlogError(_("%s: attempt to add wdir revision") %
2501 2499 (self.indexfile))
2502 2500
2503 2501 if self._inline:
2504 2502 fh = ifh
2505 2503 else:
2506 2504 fh = dfh
2507 2505
2508 2506 btext = [rawtext]
2509 2507
2510 2508 curr = len(self)
2511 2509 prev = curr - 1
2512 2510 offset = self.end(prev)
2513 2511 p1r, p2r = self.rev(p1), self.rev(p2)
2514 2512
2515 2513 # full versions are inserted when the needed deltas
2516 2514 # become comparable to the uncompressed text
2517 2515 if rawtext is None:
2518 2516 # need rawtext size, before changed by flag processors, which is
2519 2517 # the non-raw size. use revlog explicitly to avoid filelog's extra
2520 2518 # logic that might remove metadata size.
2521 2519 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2522 2520 cachedelta[1])
2523 2521 else:
2524 2522 textlen = len(rawtext)
2525 2523
2526 2524 if deltacomputer is None:
2527 2525 deltacomputer = _deltacomputer(self)
2528 2526
2529 2527 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2530 2528
2531 2529 # no delta for flag processor revision (see "candelta" for why)
2532 2530 # not calling candelta since only one revision needs test, also to
2533 2531 # avoid overhead fetching flags again.
2534 2532 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2535 2533 deltainfo = None
2536 2534 else:
2537 2535 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2538 2536
2539 2537 if deltainfo is not None:
2540 2538 base = deltainfo.base
2541 2539 chainbase = deltainfo.chainbase
2542 2540 data = deltainfo.data
2543 2541 l = deltainfo.deltalen
2544 2542 else:
2545 2543 rawtext = deltacomputer.buildtext(revinfo, fh)
2546 2544 data = self.compress(rawtext)
2547 2545 l = len(data[1]) + len(data[0])
2548 2546 base = chainbase = curr
2549 2547
2550 2548 e = (offset_type(offset, flags), l, textlen,
2551 2549 base, link, p1r, p2r, node)
2552 2550 self.index.append(e)
2553 2551 self.nodemap[node] = curr
2554 2552
2555 2553 entry = self._io.packentry(e, self.node, self.version, curr)
2556 2554 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2557 2555
2558 2556 if alwayscache and rawtext is None:
2559 2557 rawtext = deltacomputer._buildtext(revinfo, fh)
2560 2558
2561 2559 if type(rawtext) == bytes: # only accept immutable objects
2562 2560 self._cache = (node, curr, rawtext)
2563 2561 self._chainbasecache[curr] = chainbase
2564 2562 return node
2565 2563
2566 2564 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2567 2565 # Files opened in a+ mode have inconsistent behavior on various
2568 2566 # platforms. Windows requires that a file positioning call be made
2569 2567 # when the file handle transitions between reads and writes. See
2570 2568 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2571 2569 # platforms, Python or the platform itself can be buggy. Some versions
2572 2570 # of Solaris have been observed to not append at the end of the file
2573 2571 # if the file was seeked to before the end. See issue4943 for more.
2574 2572 #
2575 2573 # We work around this issue by inserting a seek() before writing.
2576 2574 # Note: This is likely not necessary on Python 3.
2577 2575 ifh.seek(0, os.SEEK_END)
2578 2576 if dfh:
2579 2577 dfh.seek(0, os.SEEK_END)
2580 2578
2581 2579 curr = len(self) - 1
2582 2580 if not self._inline:
2583 2581 transaction.add(self.datafile, offset)
2584 2582 transaction.add(self.indexfile, curr * len(entry))
2585 2583 if data[0]:
2586 2584 dfh.write(data[0])
2587 2585 dfh.write(data[1])
2588 2586 ifh.write(entry)
2589 2587 else:
2590 2588 offset += curr * self._io.size
2591 2589 transaction.add(self.indexfile, offset, curr)
2592 2590 ifh.write(entry)
2593 2591 ifh.write(data[0])
2594 2592 ifh.write(data[1])
2595 2593 self._enforceinlinesize(transaction, ifh)
2596 2594
2597 2595 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2598 2596 """
2599 2597 add a delta group
2600 2598
2601 2599 given a set of deltas, add them to the revision log. the
2602 2600 first delta is against its parent, which should be in our
2603 2601 log, the rest are against the previous delta.
2604 2602
2605 2603 If ``addrevisioncb`` is defined, it will be called with arguments of
2606 2604 this revlog and the node that was added.
2607 2605 """
2608 2606
2609 2607 nodes = []
2610 2608
2611 2609 r = len(self)
2612 2610 end = 0
2613 2611 if r:
2614 2612 end = self.end(r - 1)
2615 2613 ifh = self._indexfp("a+")
2616 2614 isize = r * self._io.size
2617 2615 if self._inline:
2618 2616 transaction.add(self.indexfile, end + isize, r)
2619 2617 dfh = None
2620 2618 else:
2621 2619 transaction.add(self.indexfile, isize, r)
2622 2620 transaction.add(self.datafile, end)
2623 2621 dfh = self._datafp("a+")
2624 2622 def flush():
2625 2623 if dfh:
2626 2624 dfh.flush()
2627 2625 ifh.flush()
2628 2626 try:
2629 2627 deltacomputer = _deltacomputer(self)
2630 2628 # loop through our set of deltas
2631 2629 for data in deltas:
2632 2630 node, p1, p2, linknode, deltabase, delta, flags = data
2633 2631 link = linkmapper(linknode)
2634 2632 flags = flags or REVIDX_DEFAULT_FLAGS
2635 2633
2636 2634 nodes.append(node)
2637 2635
2638 2636 if node in self.nodemap:
2639 2637 # this can happen if two branches make the same change
2640 2638 continue
2641 2639
2642 2640 for p in (p1, p2):
2643 2641 if p not in self.nodemap:
2644 2642 raise LookupError(p, self.indexfile,
2645 2643 _('unknown parent'))
2646 2644
2647 2645 if deltabase not in self.nodemap:
2648 2646 raise LookupError(deltabase, self.indexfile,
2649 2647 _('unknown delta base'))
2650 2648
2651 2649 baserev = self.rev(deltabase)
2652 2650
2653 2651 if baserev != nullrev and self.iscensored(baserev):
2654 2652 # if base is censored, delta must be full replacement in a
2655 2653 # single patch operation
2656 2654 hlen = struct.calcsize(">lll")
2657 2655 oldlen = self.rawsize(baserev)
2658 2656 newlen = len(delta) - hlen
2659 2657 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2660 2658 raise error.CensoredBaseError(self.indexfile,
2661 2659 self.node(baserev))
2662 2660
2663 2661 if not flags and self._peek_iscensored(baserev, delta, flush):
2664 2662 flags |= REVIDX_ISCENSORED
2665 2663
2666 2664 # We assume consumers of addrevisioncb will want to retrieve
2667 2665 # the added revision, which will require a call to
2668 2666 # revision(). revision() will fast path if there is a cache
2669 2667 # hit. So, we tell _addrevision() to always cache in this case.
2670 2668 # We're only using addgroup() in the context of changegroup
2671 2669 # generation so the revision data can always be handled as raw
2672 2670 # by the flagprocessor.
2673 2671 self._addrevision(node, None, transaction, link,
2674 2672 p1, p2, flags, (baserev, delta),
2675 2673 ifh, dfh,
2676 2674 alwayscache=bool(addrevisioncb),
2677 2675 deltacomputer=deltacomputer)
2678 2676
2679 2677 if addrevisioncb:
2680 2678 addrevisioncb(self, node)
2681 2679
2682 2680 if not dfh and not self._inline:
2683 2681 # addrevision switched from inline to conventional
2684 2682 # reopen the index
2685 2683 ifh.close()
2686 2684 dfh = self._datafp("a+")
2687 2685 ifh = self._indexfp("a+")
2688 2686 finally:
2689 2687 if dfh:
2690 2688 dfh.close()
2691 2689 ifh.close()
2692 2690
2693 2691 return nodes
2694 2692
2695 2693 def iscensored(self, rev):
2696 2694 """Check if a file revision is censored."""
2697 2695 if not self._censorable:
2698 2696 return False
2699 2697
2700 2698 return self.flags(rev) & REVIDX_ISCENSORED
2701 2699
2702 2700 def _peek_iscensored(self, baserev, delta, flush):
2703 2701 """Quickly check if a delta produces a censored revision."""
2704 2702 if not self._censorable:
2705 2703 return False
2706 2704
2707 2705 # Fragile heuristic: unless new file meta keys are added alphabetically
2708 2706 # preceding "censored", all censored revisions are prefixed by
2709 2707 # "\1\ncensored:". A delta producing such a censored revision must be a
2710 2708 # full-replacement delta, so we inspect the first and only patch in the
2711 2709 # delta for this prefix.
2712 2710 hlen = struct.calcsize(">lll")
2713 2711 if len(delta) <= hlen:
2714 2712 return False
2715 2713
2716 2714 oldlen = self.rawsize(baserev)
2717 2715 newlen = len(delta) - hlen
2718 2716 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2719 2717 return False
2720 2718
2721 2719 add = "\1\ncensored:"
2722 2720 addlen = len(add)
2723 2721 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2724 2722
2725 2723 def getstrippoint(self, minlink):
2726 2724 """find the minimum rev that must be stripped to strip the linkrev
2727 2725
2728 2726 Returns a tuple containing the minimum rev and a set of all revs that
2729 2727 have linkrevs that will be broken by this strip.
2730 2728 """
2731 2729 brokenrevs = set()
2732 2730 strippoint = len(self)
2733 2731
2734 2732 heads = {}
2735 2733 futurelargelinkrevs = set()
2736 2734 for head in self.headrevs():
2737 2735 headlinkrev = self.linkrev(head)
2738 2736 heads[head] = headlinkrev
2739 2737 if headlinkrev >= minlink:
2740 2738 futurelargelinkrevs.add(headlinkrev)
2741 2739
2742 2740 # This algorithm involves walking down the rev graph, starting at the
2743 2741 # heads. Since the revs are topologically sorted according to linkrev,
2744 2742 # once all head linkrevs are below the minlink, we know there are
2745 2743 # no more revs that could have a linkrev greater than minlink.
2746 2744 # So we can stop walking.
2747 2745 while futurelargelinkrevs:
2748 2746 strippoint -= 1
2749 2747 linkrev = heads.pop(strippoint)
2750 2748
2751 2749 if linkrev < minlink:
2752 2750 brokenrevs.add(strippoint)
2753 2751 else:
2754 2752 futurelargelinkrevs.remove(linkrev)
2755 2753
2756 2754 for p in self.parentrevs(strippoint):
2757 2755 if p != nullrev:
2758 2756 plinkrev = self.linkrev(p)
2759 2757 heads[p] = plinkrev
2760 2758 if plinkrev >= minlink:
2761 2759 futurelargelinkrevs.add(plinkrev)
2762 2760
2763 2761 return strippoint, brokenrevs
2764 2762
2765 2763 def strip(self, minlink, transaction):
2766 2764 """truncate the revlog on the first revision with a linkrev >= minlink
2767 2765
2768 2766 This function is called when we're stripping revision minlink and
2769 2767 its descendants from the repository.
2770 2768
2771 2769 We have to remove all revisions with linkrev >= minlink, because
2772 2770 the equivalent changelog revisions will be renumbered after the
2773 2771 strip.
2774 2772
2775 2773 So we truncate the revlog on the first of these revisions, and
2776 2774 trust that the caller has saved the revisions that shouldn't be
2777 2775 removed and that it'll re-add them after this truncation.
2778 2776 """
2779 2777 if len(self) == 0:
2780 2778 return
2781 2779
2782 2780 rev, _ = self.getstrippoint(minlink)
2783 2781 if rev == len(self):
2784 2782 return
2785 2783
2786 2784 # first truncate the files on disk
2787 2785 end = self.start(rev)
2788 2786 if not self._inline:
2789 2787 transaction.add(self.datafile, end)
2790 2788 end = rev * self._io.size
2791 2789 else:
2792 2790 end += rev * self._io.size
2793 2791
2794 2792 transaction.add(self.indexfile, end)
2795 2793
2796 2794 # then reset internal state in memory to forget those revisions
2797 2795 self._cache = None
2798 2796 self._chaininfocache = {}
2799 2797 self._chunkclear()
2800 2798 for x in pycompat.xrange(rev, len(self)):
2801 2799 del self.nodemap[self.node(x)]
2802 2800
2803 2801 del self.index[rev:-1]
2804 2802 self._nodepos = None
2805 2803
2806 2804 def checksize(self):
2807 2805 expected = 0
2808 2806 if len(self):
2809 2807 expected = max(0, self.end(len(self) - 1))
2810 2808
2811 2809 try:
2812 2810 with self._datafp() as f:
2813 2811 f.seek(0, 2)
2814 2812 actual = f.tell()
2815 2813 dd = actual - expected
2816 2814 except IOError as inst:
2817 2815 if inst.errno != errno.ENOENT:
2818 2816 raise
2819 2817 dd = 0
2820 2818
2821 2819 try:
2822 2820 f = self.opener(self.indexfile)
2823 2821 f.seek(0, 2)
2824 2822 actual = f.tell()
2825 2823 f.close()
2826 2824 s = self._io.size
2827 2825 i = max(0, actual // s)
2828 2826 di = actual - (i * s)
2829 2827 if self._inline:
2830 2828 databytes = 0
2831 2829 for r in self:
2832 2830 databytes += max(0, self.length(r))
2833 2831 dd = 0
2834 2832 di = actual - len(self) * s - databytes
2835 2833 except IOError as inst:
2836 2834 if inst.errno != errno.ENOENT:
2837 2835 raise
2838 2836 di = 0
2839 2837
2840 2838 return (dd, di)
2841 2839
2842 2840 def files(self):
2843 2841 res = [self.indexfile]
2844 2842 if not self._inline:
2845 2843 res.append(self.datafile)
2846 2844 return res
2847 2845
2848 2846 DELTAREUSEALWAYS = 'always'
2849 2847 DELTAREUSESAMEREVS = 'samerevs'
2850 2848 DELTAREUSENEVER = 'never'
2851 2849
2852 2850 DELTAREUSEFULLADD = 'fulladd'
2853 2851
2854 2852 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2855 2853
2856 2854 def clone(self, tr, destrevlog, addrevisioncb=None,
2857 2855 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2858 2856 """Copy this revlog to another, possibly with format changes.
2859 2857
2860 2858 The destination revlog will contain the same revisions and nodes.
2861 2859 However, it may not be bit-for-bit identical due to e.g. delta encoding
2862 2860 differences.
2863 2861
2864 2862 The ``deltareuse`` argument control how deltas from the existing revlog
2865 2863 are preserved in the destination revlog. The argument can have the
2866 2864 following values:
2867 2865
2868 2866 DELTAREUSEALWAYS
2869 2867 Deltas will always be reused (if possible), even if the destination
2870 2868 revlog would not select the same revisions for the delta. This is the
2871 2869 fastest mode of operation.
2872 2870 DELTAREUSESAMEREVS
2873 2871 Deltas will be reused if the destination revlog would pick the same
2874 2872 revisions for the delta. This mode strikes a balance between speed
2875 2873 and optimization.
2876 2874 DELTAREUSENEVER
2877 2875 Deltas will never be reused. This is the slowest mode of execution.
2878 2876 This mode can be used to recompute deltas (e.g. if the diff/delta
2879 2877 algorithm changes).
2880 2878
2881 2879 Delta computation can be slow, so the choice of delta reuse policy can
2882 2880 significantly affect run time.
2883 2881
2884 2882 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2885 2883 two extremes. Deltas will be reused if they are appropriate. But if the
2886 2884 delta could choose a better revision, it will do so. This means if you
2887 2885 are converting a non-generaldelta revlog to a generaldelta revlog,
2888 2886 deltas will be recomputed if the delta's parent isn't a parent of the
2889 2887 revision.
2890 2888
2891 2889 In addition to the delta policy, the ``deltabothparents`` argument
2892 2890 controls whether to compute deltas against both parents for merges.
2893 2891 By default, the current default is used.
2894 2892 """
2895 2893 if deltareuse not in self.DELTAREUSEALL:
2896 2894 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2897 2895
2898 2896 if len(destrevlog):
2899 2897 raise ValueError(_('destination revlog is not empty'))
2900 2898
2901 2899 if getattr(self, 'filteredrevs', None):
2902 2900 raise ValueError(_('source revlog has filtered revisions'))
2903 2901 if getattr(destrevlog, 'filteredrevs', None):
2904 2902 raise ValueError(_('destination revlog has filtered revisions'))
2905 2903
2906 2904 # lazydeltabase controls whether to reuse a cached delta, if possible.
2907 2905 oldlazydeltabase = destrevlog._lazydeltabase
2908 2906 oldamd = destrevlog._deltabothparents
2909 2907
2910 2908 try:
2911 2909 if deltareuse == self.DELTAREUSEALWAYS:
2912 2910 destrevlog._lazydeltabase = True
2913 2911 elif deltareuse == self.DELTAREUSESAMEREVS:
2914 2912 destrevlog._lazydeltabase = False
2915 2913
2916 2914 destrevlog._deltabothparents = deltabothparents or oldamd
2917 2915
2918 2916 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2919 2917 self.DELTAREUSESAMEREVS)
2920 2918
2921 2919 deltacomputer = _deltacomputer(destrevlog)
2922 2920 index = self.index
2923 2921 for rev in self:
2924 2922 entry = index[rev]
2925 2923
2926 2924 # Some classes override linkrev to take filtered revs into
2927 2925 # account. Use raw entry from index.
2928 2926 flags = entry[0] & 0xffff
2929 2927 linkrev = entry[4]
2930 2928 p1 = index[entry[5]][7]
2931 2929 p2 = index[entry[6]][7]
2932 2930 node = entry[7]
2933 2931
2934 2932 # (Possibly) reuse the delta from the revlog if allowed and
2935 2933 # the revlog chunk is a delta.
2936 2934 cachedelta = None
2937 2935 rawtext = None
2938 2936 if populatecachedelta:
2939 2937 dp = self.deltaparent(rev)
2940 2938 if dp != nullrev:
2941 2939 cachedelta = (dp, bytes(self._chunk(rev)))
2942 2940
2943 2941 if not cachedelta:
2944 2942 rawtext = self.revision(rev, raw=True)
2945 2943
2946 2944
2947 2945 if deltareuse == self.DELTAREUSEFULLADD:
2948 2946 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2949 2947 cachedelta=cachedelta,
2950 2948 node=node, flags=flags,
2951 2949 deltacomputer=deltacomputer)
2952 2950 else:
2953 2951 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2954 2952 checkambig=False)
2955 2953 dfh = None
2956 2954 if not destrevlog._inline:
2957 2955 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2958 2956 try:
2959 2957 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2960 2958 p2, flags, cachedelta, ifh, dfh,
2961 2959 deltacomputer=deltacomputer)
2962 2960 finally:
2963 2961 if dfh:
2964 2962 dfh.close()
2965 2963 ifh.close()
2966 2964
2967 2965 if addrevisioncb:
2968 2966 addrevisioncb(self, rev, node)
2969 2967 finally:
2970 2968 destrevlog._lazydeltabase = oldlazydeltabase
2971 2969 destrevlog._deltabothparents = oldamd
General Comments 0
You need to be logged in to leave comments. Login now