##// END OF EJS Templates
parsers: a C implementation of the new ancestors algorithm...
Bryan O'Sullivan -
r18988:5bae9367 default
parent child Browse files
Show More
@@ -1,1573 +1,1935 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #include <Python.h>
11 11 #include <ctype.h>
12 12 #include <stddef.h>
13 13 #include <string.h>
14 14
15 15 #include "util.h"
16 16
17 17 static inline int hexdigit(const char *p, Py_ssize_t off)
18 18 {
19 19 char c = p[off];
20 20
21 21 if (c >= '0' && c <= '9')
22 22 return c - '0';
23 23 if (c >= 'a' && c <= 'f')
24 24 return c - 'a' + 10;
25 25 if (c >= 'A' && c <= 'F')
26 26 return c - 'A' + 10;
27 27
28 28 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
29 29 return 0;
30 30 }
31 31
32 32 /*
33 33 * Turn a hex-encoded string into binary.
34 34 */
35 35 static PyObject *unhexlify(const char *str, int len)
36 36 {
37 37 PyObject *ret;
38 38 char *d;
39 39 int i;
40 40
41 41 ret = PyBytes_FromStringAndSize(NULL, len / 2);
42 42
43 43 if (!ret)
44 44 return NULL;
45 45
46 46 d = PyBytes_AsString(ret);
47 47
48 48 for (i = 0; i < len;) {
49 49 int hi = hexdigit(str, i++);
50 50 int lo = hexdigit(str, i++);
51 51 *d++ = (hi << 4) | lo;
52 52 }
53 53
54 54 return ret;
55 55 }
56 56
57 57 /*
58 58 * This code assumes that a manifest is stitched together with newline
59 59 * ('\n') characters.
60 60 */
61 61 static PyObject *parse_manifest(PyObject *self, PyObject *args)
62 62 {
63 63 PyObject *mfdict, *fdict;
64 64 char *str, *cur, *start, *zero;
65 65 int len;
66 66
67 67 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
68 68 &PyDict_Type, &mfdict,
69 69 &PyDict_Type, &fdict,
70 70 &str, &len))
71 71 goto quit;
72 72
73 73 for (start = cur = str, zero = NULL; cur < str + len; cur++) {
74 74 PyObject *file = NULL, *node = NULL;
75 75 PyObject *flags = NULL;
76 76 ptrdiff_t nlen;
77 77
78 78 if (!*cur) {
79 79 zero = cur;
80 80 continue;
81 81 }
82 82 else if (*cur != '\n')
83 83 continue;
84 84
85 85 if (!zero) {
86 86 PyErr_SetString(PyExc_ValueError,
87 87 "manifest entry has no separator");
88 88 goto quit;
89 89 }
90 90
91 91 file = PyBytes_FromStringAndSize(start, zero - start);
92 92
93 93 if (!file)
94 94 goto bail;
95 95
96 96 nlen = cur - zero - 1;
97 97
98 98 node = unhexlify(zero + 1, nlen > 40 ? 40 : (int)nlen);
99 99 if (!node)
100 100 goto bail;
101 101
102 102 if (nlen > 40) {
103 103 flags = PyBytes_FromStringAndSize(zero + 41,
104 104 nlen - 40);
105 105 if (!flags)
106 106 goto bail;
107 107
108 108 if (PyDict_SetItem(fdict, file, flags) == -1)
109 109 goto bail;
110 110 }
111 111
112 112 if (PyDict_SetItem(mfdict, file, node) == -1)
113 113 goto bail;
114 114
115 115 start = cur + 1;
116 116 zero = NULL;
117 117
118 118 Py_XDECREF(flags);
119 119 Py_XDECREF(node);
120 120 Py_XDECREF(file);
121 121 continue;
122 122 bail:
123 123 Py_XDECREF(flags);
124 124 Py_XDECREF(node);
125 125 Py_XDECREF(file);
126 126 goto quit;
127 127 }
128 128
129 129 if (len > 0 && *(cur - 1) != '\n') {
130 130 PyErr_SetString(PyExc_ValueError,
131 131 "manifest contains trailing garbage");
132 132 goto quit;
133 133 }
134 134
135 135 Py_INCREF(Py_None);
136 136 return Py_None;
137 137 quit:
138 138 return NULL;
139 139 }
140 140
141 141 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
142 142 {
143 143 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
144 144 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
145 145 char *str, *cur, *end, *cpos;
146 146 int state, mode, size, mtime;
147 147 unsigned int flen;
148 148 int len;
149 149
150 150 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
151 151 &PyDict_Type, &dmap,
152 152 &PyDict_Type, &cmap,
153 153 &str, &len))
154 154 goto quit;
155 155
156 156 /* read parents */
157 157 if (len < 40)
158 158 goto quit;
159 159
160 160 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
161 161 if (!parents)
162 162 goto quit;
163 163
164 164 /* read filenames */
165 165 cur = str + 40;
166 166 end = str + len;
167 167
168 168 while (cur < end - 17) {
169 169 /* unpack header */
170 170 state = *cur;
171 171 mode = getbe32(cur + 1);
172 172 size = getbe32(cur + 5);
173 173 mtime = getbe32(cur + 9);
174 174 flen = getbe32(cur + 13);
175 175 cur += 17;
176 176 if (cur + flen > end || cur + flen < cur) {
177 177 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
178 178 goto quit;
179 179 }
180 180
181 181 entry = Py_BuildValue("ciii", state, mode, size, mtime);
182 182 if (!entry)
183 183 goto quit;
184 184 PyObject_GC_UnTrack(entry); /* don't waste time with this */
185 185
186 186 cpos = memchr(cur, 0, flen);
187 187 if (cpos) {
188 188 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
189 189 cname = PyBytes_FromStringAndSize(cpos + 1,
190 190 flen - (cpos - cur) - 1);
191 191 if (!fname || !cname ||
192 192 PyDict_SetItem(cmap, fname, cname) == -1 ||
193 193 PyDict_SetItem(dmap, fname, entry) == -1)
194 194 goto quit;
195 195 Py_DECREF(cname);
196 196 } else {
197 197 fname = PyBytes_FromStringAndSize(cur, flen);
198 198 if (!fname ||
199 199 PyDict_SetItem(dmap, fname, entry) == -1)
200 200 goto quit;
201 201 }
202 202 cur += flen;
203 203 Py_DECREF(fname);
204 204 Py_DECREF(entry);
205 205 fname = cname = entry = NULL;
206 206 }
207 207
208 208 ret = parents;
209 209 Py_INCREF(ret);
210 210 quit:
211 211 Py_XDECREF(fname);
212 212 Py_XDECREF(cname);
213 213 Py_XDECREF(entry);
214 214 Py_XDECREF(parents);
215 215 return ret;
216 216 }
217 217
218 218 static inline int getintat(PyObject *tuple, int off, uint32_t *v)
219 219 {
220 220 PyObject *o = PyTuple_GET_ITEM(tuple, off);
221 221 long val;
222 222
223 223 if (PyInt_Check(o))
224 224 val = PyInt_AS_LONG(o);
225 225 else if (PyLong_Check(o)) {
226 226 val = PyLong_AsLong(o);
227 227 if (val == -1 && PyErr_Occurred())
228 228 return -1;
229 229 } else {
230 230 PyErr_SetString(PyExc_TypeError, "expected an int or long");
231 231 return -1;
232 232 }
233 233 if (LONG_MAX > INT_MAX && (val > INT_MAX || val < INT_MIN)) {
234 234 PyErr_SetString(PyExc_OverflowError,
235 235 "Python value to large to convert to uint32_t");
236 236 return -1;
237 237 }
238 238 *v = (uint32_t)val;
239 239 return 0;
240 240 }
241 241
242 242 static PyObject *dirstate_unset;
243 243
244 244 /*
245 245 * Efficiently pack a dirstate object into its on-disk format.
246 246 */
247 247 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
248 248 {
249 249 PyObject *packobj = NULL;
250 250 PyObject *map, *copymap, *pl;
251 251 Py_ssize_t nbytes, pos, l;
252 252 PyObject *k, *v, *pn;
253 253 char *p, *s;
254 254 double now;
255 255
256 256 if (!PyArg_ParseTuple(args, "O!O!Od:pack_dirstate",
257 257 &PyDict_Type, &map, &PyDict_Type, &copymap,
258 258 &pl, &now))
259 259 return NULL;
260 260
261 261 if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) {
262 262 PyErr_SetString(PyExc_TypeError, "expected 2-element sequence");
263 263 return NULL;
264 264 }
265 265
266 266 /* Figure out how much we need to allocate. */
267 267 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
268 268 PyObject *c;
269 269 if (!PyString_Check(k)) {
270 270 PyErr_SetString(PyExc_TypeError, "expected string key");
271 271 goto bail;
272 272 }
273 273 nbytes += PyString_GET_SIZE(k) + 17;
274 274 c = PyDict_GetItem(copymap, k);
275 275 if (c) {
276 276 if (!PyString_Check(c)) {
277 277 PyErr_SetString(PyExc_TypeError,
278 278 "expected string key");
279 279 goto bail;
280 280 }
281 281 nbytes += PyString_GET_SIZE(c) + 1;
282 282 }
283 283 }
284 284
285 285 packobj = PyString_FromStringAndSize(NULL, nbytes);
286 286 if (packobj == NULL)
287 287 goto bail;
288 288
289 289 p = PyString_AS_STRING(packobj);
290 290
291 291 pn = PySequence_ITEM(pl, 0);
292 292 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
293 293 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
294 294 goto bail;
295 295 }
296 296 memcpy(p, s, l);
297 297 p += 20;
298 298 pn = PySequence_ITEM(pl, 1);
299 299 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
300 300 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
301 301 goto bail;
302 302 }
303 303 memcpy(p, s, l);
304 304 p += 20;
305 305
306 306 for (pos = 0; PyDict_Next(map, &pos, &k, &v); ) {
307 307 uint32_t mode, size, mtime;
308 308 Py_ssize_t len, l;
309 309 PyObject *o;
310 310 char *s, *t;
311 311
312 312 if (!PyTuple_Check(v) || PyTuple_GET_SIZE(v) != 4) {
313 313 PyErr_SetString(PyExc_TypeError, "expected a 4-tuple");
314 314 goto bail;
315 315 }
316 316 o = PyTuple_GET_ITEM(v, 0);
317 317 if (PyString_AsStringAndSize(o, &s, &l) == -1 || l != 1) {
318 318 PyErr_SetString(PyExc_TypeError, "expected one byte");
319 319 goto bail;
320 320 }
321 321 *p++ = *s;
322 322 if (getintat(v, 1, &mode) == -1)
323 323 goto bail;
324 324 if (getintat(v, 2, &size) == -1)
325 325 goto bail;
326 326 if (getintat(v, 3, &mtime) == -1)
327 327 goto bail;
328 328 if (*s == 'n' && mtime == (uint32_t)now) {
329 329 /* See pure/parsers.py:pack_dirstate for why we do
330 330 * this. */
331 331 if (PyDict_SetItem(map, k, dirstate_unset) == -1)
332 332 goto bail;
333 333 mode = 0, size = -1, mtime = -1;
334 334 }
335 335 putbe32(mode, p);
336 336 putbe32(size, p + 4);
337 337 putbe32(mtime, p + 8);
338 338 t = p + 12;
339 339 p += 16;
340 340 len = PyString_GET_SIZE(k);
341 341 memcpy(p, PyString_AS_STRING(k), len);
342 342 p += len;
343 343 o = PyDict_GetItem(copymap, k);
344 344 if (o) {
345 345 *p++ = '\0';
346 346 l = PyString_GET_SIZE(o);
347 347 memcpy(p, PyString_AS_STRING(o), l);
348 348 p += l;
349 349 len += l + 1;
350 350 }
351 351 putbe32((uint32_t)len, t);
352 352 }
353 353
354 354 pos = p - PyString_AS_STRING(packobj);
355 355 if (pos != nbytes) {
356 356 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
357 357 (long)pos, (long)nbytes);
358 358 goto bail;
359 359 }
360 360
361 361 return packobj;
362 362 bail:
363 363 Py_XDECREF(packobj);
364 364 return NULL;
365 365 }
366 366
367 367 /*
368 368 * A base-16 trie for fast node->rev mapping.
369 369 *
370 370 * Positive value is index of the next node in the trie
371 371 * Negative value is a leaf: -(rev + 1)
372 372 * Zero is empty
373 373 */
374 374 typedef struct {
375 375 int children[16];
376 376 } nodetree;
377 377
378 378 /*
379 379 * This class has two behaviours.
380 380 *
381 381 * When used in a list-like way (with integer keys), we decode an
382 382 * entry in a RevlogNG index file on demand. Our last entry is a
383 383 * sentinel, always a nullid. We have limited support for
384 384 * integer-keyed insert and delete, only at elements right before the
385 385 * sentinel.
386 386 *
387 387 * With string keys, we lazily perform a reverse mapping from node to
388 388 * rev, using a base-16 trie.
389 389 */
390 390 typedef struct {
391 391 PyObject_HEAD
392 392 /* Type-specific fields go here. */
393 393 PyObject *data; /* raw bytes of index */
394 394 PyObject **cache; /* cached tuples */
395 395 const char **offsets; /* populated on demand */
396 396 Py_ssize_t raw_length; /* original number of elements */
397 397 Py_ssize_t length; /* current number of elements */
398 398 PyObject *added; /* populated on demand */
399 399 PyObject *headrevs; /* cache, invalidated on changes */
400 400 nodetree *nt; /* base-16 trie */
401 401 int ntlength; /* # nodes in use */
402 402 int ntcapacity; /* # nodes allocated */
403 403 int ntdepth; /* maximum depth of tree */
404 404 int ntsplits; /* # splits performed */
405 405 int ntrev; /* last rev scanned */
406 406 int ntlookups; /* # lookups */
407 407 int ntmisses; /* # lookups that miss the cache */
408 408 int inlined;
409 409 } indexObject;
410 410
411 411 static Py_ssize_t index_length(const indexObject *self)
412 412 {
413 413 if (self->added == NULL)
414 414 return self->length;
415 415 return self->length + PyList_GET_SIZE(self->added);
416 416 }
417 417
418 418 static PyObject *nullentry;
419 419 static const char nullid[20];
420 420
421 421 static long inline_scan(indexObject *self, const char **offsets);
422 422
423 423 #if LONG_MAX == 0x7fffffffL
424 424 static char *tuple_format = "Kiiiiiis#";
425 425 #else
426 426 static char *tuple_format = "kiiiiiis#";
427 427 #endif
428 428
429 429 /* A RevlogNG v1 index entry is 64 bytes long. */
430 430 static const long v1_hdrsize = 64;
431 431
432 432 /*
433 433 * Return a pointer to the beginning of a RevlogNG record.
434 434 */
435 435 static const char *index_deref(indexObject *self, Py_ssize_t pos)
436 436 {
437 437 if (self->inlined && pos > 0) {
438 438 if (self->offsets == NULL) {
439 439 self->offsets = malloc(self->raw_length *
440 440 sizeof(*self->offsets));
441 441 if (self->offsets == NULL)
442 442 return (const char *)PyErr_NoMemory();
443 443 inline_scan(self, self->offsets);
444 444 }
445 445 return self->offsets[pos];
446 446 }
447 447
448 448 return PyString_AS_STRING(self->data) + pos * v1_hdrsize;
449 449 }
450 450
451 451 /*
452 452 * RevlogNG format (all in big endian, data may be inlined):
453 453 * 6 bytes: offset
454 454 * 2 bytes: flags
455 455 * 4 bytes: compressed length
456 456 * 4 bytes: uncompressed length
457 457 * 4 bytes: base revision
458 458 * 4 bytes: link revision
459 459 * 4 bytes: parent 1 revision
460 460 * 4 bytes: parent 2 revision
461 461 * 32 bytes: nodeid (only 20 bytes used)
462 462 */
463 463 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
464 464 {
465 465 uint64_t offset_flags;
466 466 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
467 467 const char *c_node_id;
468 468 const char *data;
469 469 Py_ssize_t length = index_length(self);
470 470 PyObject *entry;
471 471
472 472 if (pos < 0)
473 473 pos += length;
474 474
475 475 if (pos < 0 || pos >= length) {
476 476 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
477 477 return NULL;
478 478 }
479 479
480 480 if (pos == length - 1) {
481 481 Py_INCREF(nullentry);
482 482 return nullentry;
483 483 }
484 484
485 485 if (pos >= self->length - 1) {
486 486 PyObject *obj;
487 487 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
488 488 Py_INCREF(obj);
489 489 return obj;
490 490 }
491 491
492 492 if (self->cache) {
493 493 if (self->cache[pos]) {
494 494 Py_INCREF(self->cache[pos]);
495 495 return self->cache[pos];
496 496 }
497 497 } else {
498 498 self->cache = calloc(self->raw_length, sizeof(PyObject *));
499 499 if (self->cache == NULL)
500 500 return PyErr_NoMemory();
501 501 }
502 502
503 503 data = index_deref(self, pos);
504 504 if (data == NULL)
505 505 return NULL;
506 506
507 507 offset_flags = getbe32(data + 4);
508 508 if (pos == 0) /* mask out version number for the first entry */
509 509 offset_flags &= 0xFFFF;
510 510 else {
511 511 uint32_t offset_high = getbe32(data);
512 512 offset_flags |= ((uint64_t)offset_high) << 32;
513 513 }
514 514
515 515 comp_len = getbe32(data + 8);
516 516 uncomp_len = getbe32(data + 12);
517 517 base_rev = getbe32(data + 16);
518 518 link_rev = getbe32(data + 20);
519 519 parent_1 = getbe32(data + 24);
520 520 parent_2 = getbe32(data + 28);
521 521 c_node_id = data + 32;
522 522
523 523 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
524 524 uncomp_len, base_rev, link_rev,
525 525 parent_1, parent_2, c_node_id, 20);
526 526
527 527 if (entry)
528 528 PyObject_GC_UnTrack(entry);
529 529
530 530 self->cache[pos] = entry;
531 531 Py_INCREF(entry);
532 532
533 533 return entry;
534 534 }
535 535
536 536 /*
537 537 * Return the 20-byte SHA of the node corresponding to the given rev.
538 538 */
539 539 static const char *index_node(indexObject *self, Py_ssize_t pos)
540 540 {
541 541 Py_ssize_t length = index_length(self);
542 542 const char *data;
543 543
544 544 if (pos == length - 1 || pos == INT_MAX)
545 545 return nullid;
546 546
547 547 if (pos >= length)
548 548 return NULL;
549 549
550 550 if (pos >= self->length - 1) {
551 551 PyObject *tuple, *str;
552 552 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
553 553 str = PyTuple_GetItem(tuple, 7);
554 554 return str ? PyString_AS_STRING(str) : NULL;
555 555 }
556 556
557 557 data = index_deref(self, pos);
558 558 return data ? data + 32 : NULL;
559 559 }
560 560
561 561 static int nt_insert(indexObject *self, const char *node, int rev);
562 562
563 563 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
564 564 {
565 565 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
566 566 return -1;
567 567 if (*nodelen == 20)
568 568 return 0;
569 569 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
570 570 return -1;
571 571 }
572 572
573 573 static PyObject *index_insert(indexObject *self, PyObject *args)
574 574 {
575 575 PyObject *obj;
576 576 char *node;
577 577 long offset;
578 578 Py_ssize_t len, nodelen;
579 579
580 580 if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
581 581 return NULL;
582 582
583 583 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
584 584 PyErr_SetString(PyExc_TypeError, "8-tuple required");
585 585 return NULL;
586 586 }
587 587
588 588 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
589 589 return NULL;
590 590
591 591 len = index_length(self);
592 592
593 593 if (offset < 0)
594 594 offset += len;
595 595
596 596 if (offset != len - 1) {
597 597 PyErr_SetString(PyExc_IndexError,
598 598 "insert only supported at index -1");
599 599 return NULL;
600 600 }
601 601
602 602 if (offset > INT_MAX) {
603 603 PyErr_SetString(PyExc_ValueError,
604 604 "currently only 2**31 revs supported");
605 605 return NULL;
606 606 }
607 607
608 608 if (self->added == NULL) {
609 609 self->added = PyList_New(0);
610 610 if (self->added == NULL)
611 611 return NULL;
612 612 }
613 613
614 614 if (PyList_Append(self->added, obj) == -1)
615 615 return NULL;
616 616
617 617 if (self->nt)
618 618 nt_insert(self, node, (int)offset);
619 619
620 620 Py_CLEAR(self->headrevs);
621 621 Py_RETURN_NONE;
622 622 }
623 623
624 624 static void _index_clearcaches(indexObject *self)
625 625 {
626 626 if (self->cache) {
627 627 Py_ssize_t i;
628 628
629 629 for (i = 0; i < self->raw_length; i++)
630 630 Py_CLEAR(self->cache[i]);
631 631 free(self->cache);
632 632 self->cache = NULL;
633 633 }
634 634 if (self->offsets) {
635 635 free(self->offsets);
636 636 self->offsets = NULL;
637 637 }
638 638 if (self->nt) {
639 639 free(self->nt);
640 640 self->nt = NULL;
641 641 }
642 642 Py_CLEAR(self->headrevs);
643 643 }
644 644
645 645 static PyObject *index_clearcaches(indexObject *self)
646 646 {
647 647 _index_clearcaches(self);
648 648 self->ntlength = self->ntcapacity = 0;
649 649 self->ntdepth = self->ntsplits = 0;
650 650 self->ntrev = -1;
651 651 self->ntlookups = self->ntmisses = 0;
652 652 Py_RETURN_NONE;
653 653 }
654 654
655 655 static PyObject *index_stats(indexObject *self)
656 656 {
657 657 PyObject *obj = PyDict_New();
658 658
659 659 if (obj == NULL)
660 660 return NULL;
661 661
662 662 #define istat(__n, __d) \
663 663 if (PyDict_SetItemString(obj, __d, PyInt_FromSsize_t(self->__n)) == -1) \
664 664 goto bail;
665 665
666 666 if (self->added) {
667 667 Py_ssize_t len = PyList_GET_SIZE(self->added);
668 668 if (PyDict_SetItemString(obj, "index entries added",
669 669 PyInt_FromSsize_t(len)) == -1)
670 670 goto bail;
671 671 }
672 672
673 673 if (self->raw_length != self->length - 1)
674 674 istat(raw_length, "revs on disk");
675 675 istat(length, "revs in memory");
676 676 istat(ntcapacity, "node trie capacity");
677 677 istat(ntdepth, "node trie depth");
678 678 istat(ntlength, "node trie count");
679 679 istat(ntlookups, "node trie lookups");
680 680 istat(ntmisses, "node trie misses");
681 681 istat(ntrev, "node trie last rev scanned");
682 682 istat(ntsplits, "node trie splits");
683 683
684 684 #undef istat
685 685
686 686 return obj;
687 687
688 688 bail:
689 689 Py_XDECREF(obj);
690 690 return NULL;
691 691 }
692 692
693 693 /*
694 694 * When we cache a list, we want to be sure the caller can't mutate
695 695 * the cached copy.
696 696 */
697 697 static PyObject *list_copy(PyObject *list)
698 698 {
699 699 Py_ssize_t len = PyList_GET_SIZE(list);
700 700 PyObject *newlist = PyList_New(len);
701 701 Py_ssize_t i;
702 702
703 703 if (newlist == NULL)
704 704 return NULL;
705 705
706 706 for (i = 0; i < len; i++) {
707 707 PyObject *obj = PyList_GET_ITEM(list, i);
708 708 Py_INCREF(obj);
709 709 PyList_SET_ITEM(newlist, i, obj);
710 710 }
711 711
712 712 return newlist;
713 713 }
714 714
715 715 static PyObject *index_headrevs(indexObject *self)
716 716 {
717 717 Py_ssize_t i, len, addlen;
718 718 char *nothead = NULL;
719 719 PyObject *heads;
720 720
721 721 if (self->headrevs)
722 722 return list_copy(self->headrevs);
723 723
724 724 len = index_length(self) - 1;
725 725 heads = PyList_New(0);
726 726 if (heads == NULL)
727 727 goto bail;
728 728 if (len == 0) {
729 729 PyObject *nullid = PyInt_FromLong(-1);
730 730 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
731 731 Py_XDECREF(nullid);
732 732 goto bail;
733 733 }
734 734 goto done;
735 735 }
736 736
737 737 nothead = calloc(len, 1);
738 738 if (nothead == NULL)
739 739 goto bail;
740 740
741 741 for (i = 0; i < self->raw_length; i++) {
742 742 const char *data = index_deref(self, i);
743 743 int parent_1 = getbe32(data + 24);
744 744 int parent_2 = getbe32(data + 28);
745 745 if (parent_1 >= 0)
746 746 nothead[parent_1] = 1;
747 747 if (parent_2 >= 0)
748 748 nothead[parent_2] = 1;
749 749 }
750 750
751 751 addlen = self->added ? PyList_GET_SIZE(self->added) : 0;
752 752
753 753 for (i = 0; i < addlen; i++) {
754 754 PyObject *rev = PyList_GET_ITEM(self->added, i);
755 755 PyObject *p1 = PyTuple_GET_ITEM(rev, 5);
756 756 PyObject *p2 = PyTuple_GET_ITEM(rev, 6);
757 757 long parent_1, parent_2;
758 758
759 759 if (!PyInt_Check(p1) || !PyInt_Check(p2)) {
760 760 PyErr_SetString(PyExc_TypeError,
761 761 "revlog parents are invalid");
762 762 goto bail;
763 763 }
764 764 parent_1 = PyInt_AS_LONG(p1);
765 765 parent_2 = PyInt_AS_LONG(p2);
766 766 if (parent_1 >= 0)
767 767 nothead[parent_1] = 1;
768 768 if (parent_2 >= 0)
769 769 nothead[parent_2] = 1;
770 770 }
771 771
772 772 for (i = 0; i < len; i++) {
773 773 PyObject *head;
774 774
775 775 if (nothead[i])
776 776 continue;
777 777 head = PyInt_FromLong(i);
778 778 if (head == NULL || PyList_Append(heads, head) == -1) {
779 779 Py_XDECREF(head);
780 780 goto bail;
781 781 }
782 782 }
783 783
784 784 done:
785 785 self->headrevs = heads;
786 786 free(nothead);
787 787 return list_copy(self->headrevs);
788 788 bail:
789 789 Py_XDECREF(heads);
790 790 free(nothead);
791 791 return NULL;
792 792 }
793 793
794 794 static inline int nt_level(const char *node, Py_ssize_t level)
795 795 {
796 796 int v = node[level>>1];
797 797 if (!(level & 1))
798 798 v >>= 4;
799 799 return v & 0xf;
800 800 }
801 801
802 802 /*
803 803 * Return values:
804 804 *
805 805 * -4: match is ambiguous (multiple candidates)
806 806 * -2: not found
807 807 * rest: valid rev
808 808 */
809 809 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
810 810 int hex)
811 811 {
812 812 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
813 813 int level, maxlevel, off;
814 814
815 815 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
816 816 return -1;
817 817
818 818 if (self->nt == NULL)
819 819 return -2;
820 820
821 821 if (hex)
822 822 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
823 823 else
824 824 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
825 825
826 826 for (level = off = 0; level < maxlevel; level++) {
827 827 int k = getnybble(node, level);
828 828 nodetree *n = &self->nt[off];
829 829 int v = n->children[k];
830 830
831 831 if (v < 0) {
832 832 const char *n;
833 833 Py_ssize_t i;
834 834
835 835 v = -v - 1;
836 836 n = index_node(self, v);
837 837 if (n == NULL)
838 838 return -2;
839 839 for (i = level; i < maxlevel; i++)
840 840 if (getnybble(node, i) != nt_level(n, i))
841 841 return -2;
842 842 return v;
843 843 }
844 844 if (v == 0)
845 845 return -2;
846 846 off = v;
847 847 }
848 848 /* multiple matches against an ambiguous prefix */
849 849 return -4;
850 850 }
851 851
852 852 static int nt_new(indexObject *self)
853 853 {
854 854 if (self->ntlength == self->ntcapacity) {
855 855 self->ntcapacity *= 2;
856 856 self->nt = realloc(self->nt,
857 857 self->ntcapacity * sizeof(nodetree));
858 858 if (self->nt == NULL) {
859 859 PyErr_SetString(PyExc_MemoryError, "out of memory");
860 860 return -1;
861 861 }
862 862 memset(&self->nt[self->ntlength], 0,
863 863 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
864 864 }
865 865 return self->ntlength++;
866 866 }
867 867
868 868 static int nt_insert(indexObject *self, const char *node, int rev)
869 869 {
870 870 int level = 0;
871 871 int off = 0;
872 872
873 873 while (level < 40) {
874 874 int k = nt_level(node, level);
875 875 nodetree *n;
876 876 int v;
877 877
878 878 n = &self->nt[off];
879 879 v = n->children[k];
880 880
881 881 if (v == 0) {
882 882 n->children[k] = -rev - 1;
883 883 return 0;
884 884 }
885 885 if (v < 0) {
886 886 const char *oldnode = index_node(self, -v - 1);
887 887 int noff;
888 888
889 889 if (!oldnode || !memcmp(oldnode, node, 20)) {
890 890 n->children[k] = -rev - 1;
891 891 return 0;
892 892 }
893 893 noff = nt_new(self);
894 894 if (noff == -1)
895 895 return -1;
896 896 /* self->nt may have been changed by realloc */
897 897 self->nt[off].children[k] = noff;
898 898 off = noff;
899 899 n = &self->nt[off];
900 900 n->children[nt_level(oldnode, ++level)] = v;
901 901 if (level > self->ntdepth)
902 902 self->ntdepth = level;
903 903 self->ntsplits += 1;
904 904 } else {
905 905 level += 1;
906 906 off = v;
907 907 }
908 908 }
909 909
910 910 return -1;
911 911 }
912 912
913 913 static int nt_init(indexObject *self)
914 914 {
915 915 if (self->nt == NULL) {
916 916 self->ntcapacity = self->raw_length < 4
917 917 ? 4 : self->raw_length / 2;
918 918 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
919 919 if (self->nt == NULL) {
920 920 PyErr_NoMemory();
921 921 return -1;
922 922 }
923 923 self->ntlength = 1;
924 924 self->ntrev = (int)index_length(self) - 1;
925 925 self->ntlookups = 1;
926 926 self->ntmisses = 0;
927 927 if (nt_insert(self, nullid, INT_MAX) == -1)
928 928 return -1;
929 929 }
930 930 return 0;
931 931 }
932 932
933 933 /*
934 934 * Return values:
935 935 *
936 936 * -3: error (exception set)
937 937 * -2: not found (no exception set)
938 938 * rest: valid rev
939 939 */
940 940 static int index_find_node(indexObject *self,
941 941 const char *node, Py_ssize_t nodelen)
942 942 {
943 943 int rev;
944 944
945 945 self->ntlookups++;
946 946 rev = nt_find(self, node, nodelen, 0);
947 947 if (rev >= -1)
948 948 return rev;
949 949
950 950 if (nt_init(self) == -1)
951 951 return -3;
952 952
953 953 /*
954 954 * For the first handful of lookups, we scan the entire index,
955 955 * and cache only the matching nodes. This optimizes for cases
956 956 * like "hg tip", where only a few nodes are accessed.
957 957 *
958 958 * After that, we cache every node we visit, using a single
959 959 * scan amortized over multiple lookups. This gives the best
960 960 * bulk performance, e.g. for "hg log".
961 961 */
962 962 if (self->ntmisses++ < 4) {
963 963 for (rev = self->ntrev - 1; rev >= 0; rev--) {
964 964 const char *n = index_node(self, rev);
965 965 if (n == NULL)
966 966 return -2;
967 967 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
968 968 if (nt_insert(self, n, rev) == -1)
969 969 return -3;
970 970 break;
971 971 }
972 972 }
973 973 } else {
974 974 for (rev = self->ntrev - 1; rev >= 0; rev--) {
975 975 const char *n = index_node(self, rev);
976 976 if (n == NULL) {
977 977 self->ntrev = rev + 1;
978 978 return -2;
979 979 }
980 980 if (nt_insert(self, n, rev) == -1) {
981 981 self->ntrev = rev + 1;
982 982 return -3;
983 983 }
984 984 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
985 985 break;
986 986 }
987 987 }
988 988 self->ntrev = rev;
989 989 }
990 990
991 991 if (rev >= 0)
992 992 return rev;
993 993 return -2;
994 994 }
995 995
996 996 static PyObject *raise_revlog_error(void)
997 997 {
998 998 static PyObject *errclass;
999 999 PyObject *mod = NULL, *errobj;
1000 1000
1001 1001 if (errclass == NULL) {
1002 1002 PyObject *dict;
1003 1003
1004 1004 mod = PyImport_ImportModule("mercurial.error");
1005 1005 if (mod == NULL)
1006 1006 goto classfail;
1007 1007
1008 1008 dict = PyModule_GetDict(mod);
1009 1009 if (dict == NULL)
1010 1010 goto classfail;
1011 1011
1012 1012 errclass = PyDict_GetItemString(dict, "RevlogError");
1013 1013 if (errclass == NULL) {
1014 1014 PyErr_SetString(PyExc_SystemError,
1015 1015 "could not find RevlogError");
1016 1016 goto classfail;
1017 1017 }
1018 1018 Py_INCREF(errclass);
1019 1019 }
1020 1020
1021 1021 errobj = PyObject_CallFunction(errclass, NULL);
1022 1022 if (errobj == NULL)
1023 1023 return NULL;
1024 1024 PyErr_SetObject(errclass, errobj);
1025 1025 return errobj;
1026 1026
1027 1027 classfail:
1028 1028 Py_XDECREF(mod);
1029 1029 return NULL;
1030 1030 }
1031 1031
1032 1032 static PyObject *index_getitem(indexObject *self, PyObject *value)
1033 1033 {
1034 1034 char *node;
1035 1035 Py_ssize_t nodelen;
1036 1036 int rev;
1037 1037
1038 1038 if (PyInt_Check(value))
1039 1039 return index_get(self, PyInt_AS_LONG(value));
1040 1040
1041 1041 if (node_check(value, &node, &nodelen) == -1)
1042 1042 return NULL;
1043 1043 rev = index_find_node(self, node, nodelen);
1044 1044 if (rev >= -1)
1045 1045 return PyInt_FromLong(rev);
1046 1046 if (rev == -2)
1047 1047 raise_revlog_error();
1048 1048 return NULL;
1049 1049 }
1050 1050
1051 1051 static int nt_partialmatch(indexObject *self, const char *node,
1052 1052 Py_ssize_t nodelen)
1053 1053 {
1054 1054 int rev;
1055 1055
1056 1056 if (nt_init(self) == -1)
1057 1057 return -3;
1058 1058
1059 1059 if (self->ntrev > 0) {
1060 1060 /* ensure that the radix tree is fully populated */
1061 1061 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1062 1062 const char *n = index_node(self, rev);
1063 1063 if (n == NULL)
1064 1064 return -2;
1065 1065 if (nt_insert(self, n, rev) == -1)
1066 1066 return -3;
1067 1067 }
1068 1068 self->ntrev = rev;
1069 1069 }
1070 1070
1071 1071 return nt_find(self, node, nodelen, 1);
1072 1072 }
1073 1073
1074 1074 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1075 1075 {
1076 1076 const char *fullnode;
1077 1077 int nodelen;
1078 1078 char *node;
1079 1079 int rev, i;
1080 1080
1081 1081 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
1082 1082 return NULL;
1083 1083
1084 1084 if (nodelen < 4) {
1085 1085 PyErr_SetString(PyExc_ValueError, "key too short");
1086 1086 return NULL;
1087 1087 }
1088 1088
1089 1089 if (nodelen > 40) {
1090 1090 PyErr_SetString(PyExc_ValueError, "key too long");
1091 1091 return NULL;
1092 1092 }
1093 1093
1094 1094 for (i = 0; i < nodelen; i++)
1095 1095 hexdigit(node, i);
1096 1096 if (PyErr_Occurred()) {
1097 1097 /* input contains non-hex characters */
1098 1098 PyErr_Clear();
1099 1099 Py_RETURN_NONE;
1100 1100 }
1101 1101
1102 1102 rev = nt_partialmatch(self, node, nodelen);
1103 1103
1104 1104 switch (rev) {
1105 1105 case -4:
1106 1106 raise_revlog_error();
1107 1107 case -3:
1108 1108 return NULL;
1109 1109 case -2:
1110 1110 Py_RETURN_NONE;
1111 1111 case -1:
1112 1112 return PyString_FromStringAndSize(nullid, 20);
1113 1113 }
1114 1114
1115 1115 fullnode = index_node(self, rev);
1116 1116 if (fullnode == NULL) {
1117 1117 PyErr_Format(PyExc_IndexError,
1118 1118 "could not access rev %d", rev);
1119 1119 return NULL;
1120 1120 }
1121 1121 return PyString_FromStringAndSize(fullnode, 20);
1122 1122 }
1123 1123
1124 1124 static PyObject *index_m_get(indexObject *self, PyObject *args)
1125 1125 {
1126 1126 Py_ssize_t nodelen;
1127 1127 PyObject *val;
1128 1128 char *node;
1129 1129 int rev;
1130 1130
1131 1131 if (!PyArg_ParseTuple(args, "O", &val))
1132 1132 return NULL;
1133 1133 if (node_check(val, &node, &nodelen) == -1)
1134 1134 return NULL;
1135 1135 rev = index_find_node(self, node, nodelen);
1136 1136 if (rev == -3)
1137 1137 return NULL;
1138 1138 if (rev == -2)
1139 1139 Py_RETURN_NONE;
1140 1140 return PyInt_FromLong(rev);
1141 1141 }
1142 1142
1143 1143 static int index_contains(indexObject *self, PyObject *value)
1144 1144 {
1145 1145 char *node;
1146 1146 Py_ssize_t nodelen;
1147 1147
1148 1148 if (PyInt_Check(value)) {
1149 1149 long rev = PyInt_AS_LONG(value);
1150 1150 return rev >= -1 && rev < index_length(self);
1151 1151 }
1152 1152
1153 1153 if (node_check(value, &node, &nodelen) == -1)
1154 1154 return -1;
1155 1155
1156 1156 switch (index_find_node(self, node, nodelen)) {
1157 1157 case -3:
1158 1158 return -1;
1159 1159 case -2:
1160 1160 return 0;
1161 1161 default:
1162 1162 return 1;
1163 1163 }
1164 1164 }
1165 1165
1166 static inline void index_get_parents(indexObject *self, int rev, int *ps)
1167 {
1168 if (rev >= self->length - 1) {
1169 PyObject *tuple = PyList_GET_ITEM(self->added,
1170 rev - self->length + 1);
1171 ps[0] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 5));
1172 ps[1] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 6));
1173 } else {
1174 const char *data = index_deref(self, rev);
1175 ps[0] = getbe32(data + 24);
1176 ps[1] = getbe32(data + 28);
1177 }
1178 }
1179
1180 typedef uint64_t bitmask;
1181
1182 /*
1183 * Given a disjoint set of revs, return all candidates for the
1184 * greatest common ancestor. In revset notation, this is the set
1185 * "heads(::a and ::b and ...)"
1186 */
1187 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
1188 int revcount)
1189 {
1190 const bitmask allseen = (1ull << revcount) - 1;
1191 const bitmask poison = 1ull << revcount;
1192 PyObject *gca = PyList_New(0);
1193 int i, v, interesting, left;
1194 int maxrev = -1;
1195 bitmask *seen;
1196
1197 for (i = 0; i < revcount; i++) {
1198 if (revs[i] > maxrev)
1199 maxrev = revs[i];
1200 }
1201
1202 seen = calloc(sizeof(*seen), maxrev + 1);
1203 if (seen == NULL)
1204 return PyErr_NoMemory();
1205
1206 for (i = 0; i < revcount; i++)
1207 seen[revs[i]] = 1ull << i;
1208
1209 interesting = left = revcount;
1210
1211 for (v = maxrev; v >= 0 && interesting; v--) {
1212 long sv = seen[v];
1213 int parents[2];
1214
1215 if (!sv)
1216 continue;
1217
1218 if (sv < poison) {
1219 interesting -= 1;
1220 if (sv == allseen) {
1221 PyObject *obj = PyInt_FromLong(v);
1222 if (obj == NULL)
1223 goto bail;
1224 if (PyList_Append(gca, obj) == -1) {
1225 Py_DECREF(obj);
1226 goto bail;
1227 }
1228 sv |= poison;
1229 for (i = 0; i < revcount; i++) {
1230 if (revs[i] == v) {
1231 if (--left <= 1)
1232 goto done;
1233 break;
1234 }
1235 }
1236 }
1237 }
1238 index_get_parents(self, v, parents);
1239
1240 for (i = 0; i < 2; i++) {
1241 int p = parents[i];
1242 if (p == -1)
1243 continue;
1244 const long sp = seen[p];
1245 if (sv < poison) {
1246 if (sp == 0) {
1247 seen[p] = sv;
1248 interesting++;
1249 }
1250 else if (sp != sv)
1251 seen[p] |= sv;
1252 } else {
1253 if (sp && sp < poison)
1254 interesting--;
1255 seen[p] = sv;
1256 }
1257 }
1258 }
1259
1260 done:
1261 free(seen);
1262 return gca;
1263 bail:
1264 free(seen);
1265 Py_XDECREF(gca);
1266 return NULL;
1267 }
1268
1269 /*
1270 * Given a disjoint set of revs, return the subset with the longest
1271 * path to the root.
1272 */
1273 static PyObject *find_deepest(indexObject *self, PyObject *revs)
1274 {
1275 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
1276 static const Py_ssize_t capacity = 24;
1277 int *depth, *interesting = NULL;
1278 int i, j, v, ninteresting;
1279 PyObject *dict = NULL, *keys;
1280 long *seen = NULL;
1281 int maxrev = -1;
1282 long final;
1283
1284 if (revcount > capacity) {
1285 PyErr_Format(PyExc_OverflowError,
1286 "bitset size (%ld) > capacity (%ld)",
1287 revcount, capacity);
1288 return NULL;
1289 }
1290
1291 for (i = 0; i < revcount; i++) {
1292 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1293 if (n > maxrev)
1294 maxrev = n;
1295 }
1296
1297 depth = calloc(sizeof(*depth), maxrev + 1);
1298 if (depth == NULL)
1299 return PyErr_NoMemory();
1300
1301 seen = calloc(sizeof(*seen), maxrev + 1);
1302 if (seen == NULL) {
1303 PyErr_NoMemory();
1304 goto bail;
1305 }
1306
1307 interesting = calloc(sizeof(*interesting), 2 << revcount);
1308 if (interesting == NULL) {
1309 PyErr_NoMemory();
1310 goto bail;
1311 }
1312
1313 for (i = 0; i < revcount; i++) {
1314 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1315 long b = 1l << i;
1316 depth[n] = 1;
1317 seen[n] = b;
1318 interesting[b] = 1;
1319 }
1320
1321 ninteresting = (int)revcount;
1322
1323 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
1324 int dv = depth[v];
1325 int parents[2];
1326 long sv;
1327
1328 if (dv == 0)
1329 continue;
1330
1331 sv = seen[v];
1332 index_get_parents(self, v, parents);
1333
1334 for (i = 0; i < 2; i++) {
1335 int p = parents[i];
1336 long nsp, sp;
1337 int dp;
1338
1339 if (p == -1)
1340 continue;
1341
1342 dp = depth[p];
1343 nsp = sp = seen[p];
1344 if (dp <= dv) {
1345 depth[p] = dv + 1;
1346 if (sp != sv) {
1347 interesting[sv] += 1;
1348 nsp = seen[p] = sv;
1349 if (sp) {
1350 interesting[sp] -= 1;
1351 if (interesting[sp] == 0)
1352 ninteresting -= 1;
1353 }
1354 }
1355 }
1356 else if (dv == dp - 1) {
1357 nsp = sp | sv;
1358 if (nsp == sp)
1359 continue;
1360 seen[p] = nsp;
1361 interesting[nsp] += 1;
1362 interesting[sp] -= 1;
1363 if (interesting[sp] == 0)
1364 ninteresting -= 1;
1365 }
1366 }
1367 interesting[sv] -= 1;
1368 if (interesting[sv] == 0)
1369 ninteresting -= 1;
1370 }
1371
1372 final = 0;
1373 j = ninteresting;
1374 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
1375 if (interesting[i] == 0)
1376 continue;
1377 final |= i;
1378 j -= 1;
1379 }
1380 if (final == 0)
1381 return PyList_New(0);
1382
1383 dict = PyDict_New();
1384 if (dict == NULL)
1385 goto bail;
1386
1387 j = ninteresting;
1388 for (i = 0; i < revcount && j > 0; i++) {
1389 PyObject *key;
1390
1391 if ((final & (1 << i)) == 0)
1392 continue;
1393
1394 key = PyList_GET_ITEM(revs, i);
1395 Py_INCREF(key);
1396 Py_INCREF(Py_None);
1397 if (PyDict_SetItem(dict, key, Py_None) == -1) {
1398 Py_DECREF(key);
1399 Py_DECREF(Py_None);
1400 goto bail;
1401 }
1402 j -= 1;
1403 }
1404
1405 keys = PyDict_Keys(dict);
1406
1407 free(depth);
1408 free(seen);
1409 free(interesting);
1410 Py_DECREF(dict);
1411
1412 return keys;
1413 bail:
1414 free(depth);
1415 free(seen);
1416 free(interesting);
1417 Py_XDECREF(dict);
1418
1419 return NULL;
1420 }
1421
1422 /*
1423 * Given a (possibly overlapping) set of revs, return the greatest
1424 * common ancestors: those with the longest path to the root.
1425 */
1426 static PyObject *index_ancestors(indexObject *self, PyObject *args)
1427 {
1428 PyObject *ret = NULL, *gca = NULL;
1429 Py_ssize_t argcount, i, len;
1430 bitmask repeat = 0;
1431 int revcount = 0;
1432 int *revs;
1433
1434 argcount = PySequence_Length(args);
1435 revs = malloc(argcount * sizeof(*revs));
1436 if (argcount > 0 && revs == NULL)
1437 return PyErr_NoMemory();
1438 len = index_length(self) - 1;
1439
1440 for (i = 0; i < argcount; i++) {
1441 static const int capacity = 24;
1442 PyObject *obj = PySequence_GetItem(args, i);
1443 bitmask x;
1444 long val;
1445
1446 if (!PyInt_Check(obj)) {
1447 PyErr_SetString(PyExc_TypeError,
1448 "arguments must all be ints");
1449 goto bail;
1450 }
1451 val = PyInt_AsLong(obj);
1452 if (val == -1) {
1453 ret = PyList_New(0);
1454 goto done;
1455 }
1456 if (val < 0 || val >= len) {
1457 PyErr_SetString(PyExc_IndexError,
1458 "index out of range");
1459 goto bail;
1460 }
1461 /* this cheesy bloom filter lets us avoid some more
1462 * expensive duplicate checks in the common set-is-disjoint
1463 * case */
1464 x = 1ull << (val & 0x3f);
1465 if (repeat & x) {
1466 int k;
1467 for (k = 0; k < revcount; k++) {
1468 if (val == revs[k])
1469 goto duplicate;
1470 }
1471 }
1472 else repeat |= x;
1473 if (revcount >= capacity) {
1474 PyErr_Format(PyExc_OverflowError,
1475 "bitset size (%d) > capacity (%d)",
1476 revcount, capacity);
1477 goto bail;
1478 }
1479 revs[revcount++] = (int)val;
1480 duplicate:;
1481 }
1482
1483 if (revcount == 0) {
1484 ret = PyList_New(0);
1485 goto done;
1486 }
1487 if (revcount == 1) {
1488 PyObject *obj;
1489 ret = PyList_New(1);
1490 if (ret == NULL)
1491 goto bail;
1492 obj = PyInt_FromLong(revs[0]);
1493 if (obj == NULL)
1494 goto bail;
1495 PyList_SET_ITEM(ret, 0, obj);
1496 goto done;
1497 }
1498
1499 gca = find_gca_candidates(self, revs, revcount);
1500 if (gca == NULL)
1501 goto bail;
1502
1503 if (PyList_GET_SIZE(gca) <= 1) {
1504 ret = gca;
1505 Py_INCREF(gca);
1506 }
1507 else if (PyList_GET_SIZE(gca) == 1) {
1508 ret = PyList_GET_ITEM(gca, 0);
1509 Py_INCREF(ret);
1510 }
1511 else ret = find_deepest(self, gca);
1512
1513 done:
1514 free(revs);
1515 Py_XDECREF(gca);
1516
1517 return ret;
1518
1519 bail:
1520 free(revs);
1521 Py_XDECREF(gca);
1522 Py_XDECREF(ret);
1523 return NULL;
1524 }
1525
1166 1526 /*
1167 1527 * Invalidate any trie entries introduced by added revs.
1168 1528 */
1169 1529 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
1170 1530 {
1171 1531 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
1172 1532
1173 1533 for (i = start; i < len; i++) {
1174 1534 PyObject *tuple = PyList_GET_ITEM(self->added, i);
1175 1535 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
1176 1536
1177 1537 nt_insert(self, PyString_AS_STRING(node), -1);
1178 1538 }
1179 1539
1180 1540 if (start == 0)
1181 1541 Py_CLEAR(self->added);
1182 1542 }
1183 1543
1184 1544 /*
1185 1545 * Delete a numeric range of revs, which must be at the end of the
1186 1546 * range, but exclude the sentinel nullid entry.
1187 1547 */
1188 1548 static int index_slice_del(indexObject *self, PyObject *item)
1189 1549 {
1190 1550 Py_ssize_t start, stop, step, slicelength;
1191 1551 Py_ssize_t length = index_length(self);
1192 1552 int ret = 0;
1193 1553
1194 1554 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
1195 1555 &start, &stop, &step, &slicelength) < 0)
1196 1556 return -1;
1197 1557
1198 1558 if (slicelength <= 0)
1199 1559 return 0;
1200 1560
1201 1561 if ((step < 0 && start < stop) || (step > 0 && start > stop))
1202 1562 stop = start;
1203 1563
1204 1564 if (step < 0) {
1205 1565 stop = start + 1;
1206 1566 start = stop + step*(slicelength - 1) - 1;
1207 1567 step = -step;
1208 1568 }
1209 1569
1210 1570 if (step != 1) {
1211 1571 PyErr_SetString(PyExc_ValueError,
1212 1572 "revlog index delete requires step size of 1");
1213 1573 return -1;
1214 1574 }
1215 1575
1216 1576 if (stop != length - 1) {
1217 1577 PyErr_SetString(PyExc_IndexError,
1218 1578 "revlog index deletion indices are invalid");
1219 1579 return -1;
1220 1580 }
1221 1581
1222 1582 if (start < self->length - 1) {
1223 1583 if (self->nt) {
1224 1584 Py_ssize_t i;
1225 1585
1226 1586 for (i = start + 1; i < self->length - 1; i++) {
1227 1587 const char *node = index_node(self, i);
1228 1588
1229 1589 if (node)
1230 1590 nt_insert(self, node, -1);
1231 1591 }
1232 1592 if (self->added)
1233 1593 nt_invalidate_added(self, 0);
1234 1594 if (self->ntrev > start)
1235 1595 self->ntrev = (int)start;
1236 1596 }
1237 1597 self->length = start + 1;
1238 1598 if (start < self->raw_length) {
1239 1599 if (self->cache) {
1240 1600 Py_ssize_t i;
1241 1601 for (i = start; i < self->raw_length; i++)
1242 1602 Py_CLEAR(self->cache[i]);
1243 1603 }
1244 1604 self->raw_length = start;
1245 1605 }
1246 1606 goto done;
1247 1607 }
1248 1608
1249 1609 if (self->nt) {
1250 1610 nt_invalidate_added(self, start - self->length + 1);
1251 1611 if (self->ntrev > start)
1252 1612 self->ntrev = (int)start;
1253 1613 }
1254 1614 if (self->added)
1255 1615 ret = PyList_SetSlice(self->added, start - self->length + 1,
1256 1616 PyList_GET_SIZE(self->added), NULL);
1257 1617 done:
1258 1618 Py_CLEAR(self->headrevs);
1259 1619 return ret;
1260 1620 }
1261 1621
1262 1622 /*
1263 1623 * Supported ops:
1264 1624 *
1265 1625 * slice deletion
1266 1626 * string assignment (extend node->rev mapping)
1267 1627 * string deletion (shrink node->rev mapping)
1268 1628 */
1269 1629 static int index_assign_subscript(indexObject *self, PyObject *item,
1270 1630 PyObject *value)
1271 1631 {
1272 1632 char *node;
1273 1633 Py_ssize_t nodelen;
1274 1634 long rev;
1275 1635
1276 1636 if (PySlice_Check(item) && value == NULL)
1277 1637 return index_slice_del(self, item);
1278 1638
1279 1639 if (node_check(item, &node, &nodelen) == -1)
1280 1640 return -1;
1281 1641
1282 1642 if (value == NULL)
1283 1643 return self->nt ? nt_insert(self, node, -1) : 0;
1284 1644 rev = PyInt_AsLong(value);
1285 1645 if (rev > INT_MAX || rev < 0) {
1286 1646 if (!PyErr_Occurred())
1287 1647 PyErr_SetString(PyExc_ValueError, "rev out of range");
1288 1648 return -1;
1289 1649 }
1290 1650 return nt_insert(self, node, (int)rev);
1291 1651 }
1292 1652
1293 1653 /*
1294 1654 * Find all RevlogNG entries in an index that has inline data. Update
1295 1655 * the optional "offsets" table with those entries.
1296 1656 */
1297 1657 static long inline_scan(indexObject *self, const char **offsets)
1298 1658 {
1299 1659 const char *data = PyString_AS_STRING(self->data);
1300 1660 const char *end = data + PyString_GET_SIZE(self->data);
1301 1661 long incr = v1_hdrsize;
1302 1662 Py_ssize_t len = 0;
1303 1663
1304 1664 while (data + v1_hdrsize <= end) {
1305 1665 uint32_t comp_len;
1306 1666 const char *old_data;
1307 1667 /* 3rd element of header is length of compressed inline data */
1308 1668 comp_len = getbe32(data + 8);
1309 1669 incr = v1_hdrsize + comp_len;
1310 1670 if (incr < v1_hdrsize)
1311 1671 break;
1312 1672 if (offsets)
1313 1673 offsets[len] = data;
1314 1674 len++;
1315 1675 old_data = data;
1316 1676 data += incr;
1317 1677 if (data <= old_data)
1318 1678 break;
1319 1679 }
1320 1680
1321 1681 if (data != end && data + v1_hdrsize != end) {
1322 1682 if (!PyErr_Occurred())
1323 1683 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1324 1684 return -1;
1325 1685 }
1326 1686
1327 1687 return len;
1328 1688 }
1329 1689
1330 1690 static int index_init(indexObject *self, PyObject *args)
1331 1691 {
1332 1692 PyObject *data_obj, *inlined_obj;
1333 1693 Py_ssize_t size;
1334 1694
1335 1695 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
1336 1696 return -1;
1337 1697 if (!PyString_Check(data_obj)) {
1338 1698 PyErr_SetString(PyExc_TypeError, "data is not a string");
1339 1699 return -1;
1340 1700 }
1341 1701 size = PyString_GET_SIZE(data_obj);
1342 1702
1343 1703 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
1344 1704 self->data = data_obj;
1345 1705 self->cache = NULL;
1346 1706
1347 1707 self->added = NULL;
1348 1708 self->headrevs = NULL;
1349 1709 self->offsets = NULL;
1350 1710 self->nt = NULL;
1351 1711 self->ntlength = self->ntcapacity = 0;
1352 1712 self->ntdepth = self->ntsplits = 0;
1353 1713 self->ntlookups = self->ntmisses = 0;
1354 1714 self->ntrev = -1;
1355 1715 Py_INCREF(self->data);
1356 1716
1357 1717 if (self->inlined) {
1358 1718 long len = inline_scan(self, NULL);
1359 1719 if (len == -1)
1360 1720 goto bail;
1361 1721 self->raw_length = len;
1362 1722 self->length = len + 1;
1363 1723 } else {
1364 1724 if (size % v1_hdrsize) {
1365 1725 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1366 1726 goto bail;
1367 1727 }
1368 1728 self->raw_length = size / v1_hdrsize;
1369 1729 self->length = self->raw_length + 1;
1370 1730 }
1371 1731
1372 1732 return 0;
1373 1733 bail:
1374 1734 return -1;
1375 1735 }
1376 1736
1377 1737 static PyObject *index_nodemap(indexObject *self)
1378 1738 {
1379 1739 Py_INCREF(self);
1380 1740 return (PyObject *)self;
1381 1741 }
1382 1742
1383 1743 static void index_dealloc(indexObject *self)
1384 1744 {
1385 1745 _index_clearcaches(self);
1386 1746 Py_DECREF(self->data);
1387 1747 Py_XDECREF(self->added);
1388 1748 PyObject_Del(self);
1389 1749 }
1390 1750
1391 1751 static PySequenceMethods index_sequence_methods = {
1392 1752 (lenfunc)index_length, /* sq_length */
1393 1753 0, /* sq_concat */
1394 1754 0, /* sq_repeat */
1395 1755 (ssizeargfunc)index_get, /* sq_item */
1396 1756 0, /* sq_slice */
1397 1757 0, /* sq_ass_item */
1398 1758 0, /* sq_ass_slice */
1399 1759 (objobjproc)index_contains, /* sq_contains */
1400 1760 };
1401 1761
1402 1762 static PyMappingMethods index_mapping_methods = {
1403 1763 (lenfunc)index_length, /* mp_length */
1404 1764 (binaryfunc)index_getitem, /* mp_subscript */
1405 1765 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
1406 1766 };
1407 1767
1408 1768 static PyMethodDef index_methods[] = {
1769 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
1770 "return the gca set of the given revs"},
1409 1771 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
1410 1772 "clear the index caches"},
1411 1773 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1412 1774 "get an index entry"},
1413 1775 {"headrevs", (PyCFunction)index_headrevs, METH_NOARGS,
1414 1776 "get head revisions"},
1415 1777 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1416 1778 "insert an index entry"},
1417 1779 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
1418 1780 "match a potentially ambiguous node ID"},
1419 1781 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1420 1782 "stats for the index"},
1421 1783 {NULL} /* Sentinel */
1422 1784 };
1423 1785
1424 1786 static PyGetSetDef index_getset[] = {
1425 1787 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
1426 1788 {NULL} /* Sentinel */
1427 1789 };
1428 1790
1429 1791 static PyTypeObject indexType = {
1430 1792 PyObject_HEAD_INIT(NULL)
1431 1793 0, /* ob_size */
1432 1794 "parsers.index", /* tp_name */
1433 1795 sizeof(indexObject), /* tp_basicsize */
1434 1796 0, /* tp_itemsize */
1435 1797 (destructor)index_dealloc, /* tp_dealloc */
1436 1798 0, /* tp_print */
1437 1799 0, /* tp_getattr */
1438 1800 0, /* tp_setattr */
1439 1801 0, /* tp_compare */
1440 1802 0, /* tp_repr */
1441 1803 0, /* tp_as_number */
1442 1804 &index_sequence_methods, /* tp_as_sequence */
1443 1805 &index_mapping_methods, /* tp_as_mapping */
1444 1806 0, /* tp_hash */
1445 1807 0, /* tp_call */
1446 1808 0, /* tp_str */
1447 1809 0, /* tp_getattro */
1448 1810 0, /* tp_setattro */
1449 1811 0, /* tp_as_buffer */
1450 1812 Py_TPFLAGS_DEFAULT, /* tp_flags */
1451 1813 "revlog index", /* tp_doc */
1452 1814 0, /* tp_traverse */
1453 1815 0, /* tp_clear */
1454 1816 0, /* tp_richcompare */
1455 1817 0, /* tp_weaklistoffset */
1456 1818 0, /* tp_iter */
1457 1819 0, /* tp_iternext */
1458 1820 index_methods, /* tp_methods */
1459 1821 0, /* tp_members */
1460 1822 index_getset, /* tp_getset */
1461 1823 0, /* tp_base */
1462 1824 0, /* tp_dict */
1463 1825 0, /* tp_descr_get */
1464 1826 0, /* tp_descr_set */
1465 1827 0, /* tp_dictoffset */
1466 1828 (initproc)index_init, /* tp_init */
1467 1829 0, /* tp_alloc */
1468 1830 };
1469 1831
1470 1832 /*
1471 1833 * returns a tuple of the form (index, index, cache) with elements as
1472 1834 * follows:
1473 1835 *
1474 1836 * index: an index object that lazily parses RevlogNG records
1475 1837 * cache: if data is inlined, a tuple (index_file_content, 0), else None
1476 1838 *
1477 1839 * added complications are for backwards compatibility
1478 1840 */
1479 1841 static PyObject *parse_index2(PyObject *self, PyObject *args)
1480 1842 {
1481 1843 PyObject *tuple = NULL, *cache = NULL;
1482 1844 indexObject *idx;
1483 1845 int ret;
1484 1846
1485 1847 idx = PyObject_New(indexObject, &indexType);
1486 1848 if (idx == NULL)
1487 1849 goto bail;
1488 1850
1489 1851 ret = index_init(idx, args);
1490 1852 if (ret == -1)
1491 1853 goto bail;
1492 1854
1493 1855 if (idx->inlined) {
1494 1856 cache = Py_BuildValue("iO", 0, idx->data);
1495 1857 if (cache == NULL)
1496 1858 goto bail;
1497 1859 } else {
1498 1860 cache = Py_None;
1499 1861 Py_INCREF(cache);
1500 1862 }
1501 1863
1502 1864 tuple = Py_BuildValue("NN", idx, cache);
1503 1865 if (!tuple)
1504 1866 goto bail;
1505 1867 return tuple;
1506 1868
1507 1869 bail:
1508 1870 Py_XDECREF(idx);
1509 1871 Py_XDECREF(cache);
1510 1872 Py_XDECREF(tuple);
1511 1873 return NULL;
1512 1874 }
1513 1875
1514 1876 static char parsers_doc[] = "Efficient content parsing.";
1515 1877
1516 1878 PyObject *encodedir(PyObject *self, PyObject *args);
1517 1879 PyObject *pathencode(PyObject *self, PyObject *args);
1518 1880 PyObject *lowerencode(PyObject *self, PyObject *args);
1519 1881
1520 1882 static PyMethodDef methods[] = {
1521 1883 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
1522 1884 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
1523 1885 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1524 1886 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
1525 1887 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
1526 1888 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
1527 1889 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
1528 1890 {NULL, NULL}
1529 1891 };
1530 1892
1531 1893 void dirs_module_init(PyObject *mod);
1532 1894
1533 1895 static void module_init(PyObject *mod)
1534 1896 {
1535 1897 dirs_module_init(mod);
1536 1898
1537 1899 indexType.tp_new = PyType_GenericNew;
1538 1900 if (PyType_Ready(&indexType) < 0)
1539 1901 return;
1540 1902 Py_INCREF(&indexType);
1541 1903
1542 1904 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
1543 1905
1544 1906 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
1545 1907 -1, -1, -1, -1, nullid, 20);
1546 1908 if (nullentry)
1547 1909 PyObject_GC_UnTrack(nullentry);
1548 1910
1549 1911 dirstate_unset = Py_BuildValue("ciii", 'n', 0, -1, -1);
1550 1912 }
1551 1913
1552 1914 #ifdef IS_PY3K
1553 1915 static struct PyModuleDef parsers_module = {
1554 1916 PyModuleDef_HEAD_INIT,
1555 1917 "parsers",
1556 1918 parsers_doc,
1557 1919 -1,
1558 1920 methods
1559 1921 };
1560 1922
1561 1923 PyMODINIT_FUNC PyInit_parsers(void)
1562 1924 {
1563 1925 PyObject *mod = PyModule_Create(&parsers_module);
1564 1926 module_init(mod);
1565 1927 return mod;
1566 1928 }
1567 1929 #else
1568 1930 PyMODINIT_FUNC initparsers(void)
1569 1931 {
1570 1932 PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc);
1571 1933 module_init(mod);
1572 1934 }
1573 1935 #endif
@@ -1,1337 +1,1343 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from node import bin, hex, nullid, nullrev
16 16 from i18n import _
17 17 import ancestor, mdiff, parsers, error, util, dagutil
18 18 import struct, zlib, errno
19 19
20 20 _pack = struct.pack
21 21 _unpack = struct.unpack
22 22 _compress = zlib.compress
23 23 _decompress = zlib.decompress
24 24 _sha = util.sha1
25 25
26 26 # revlog header flags
27 27 REVLOGV0 = 0
28 28 REVLOGNG = 1
29 29 REVLOGNGINLINEDATA = (1 << 16)
30 30 REVLOGGENERALDELTA = (1 << 17)
31 31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35 35
36 36 # revlog index flags
37 37 REVIDX_KNOWN_FLAGS = 0
38 38
39 39 # max size of revlog with inline data
40 40 _maxinline = 131072
41 41 _chunksize = 1048576
42 42
43 43 RevlogError = error.RevlogError
44 44 LookupError = error.LookupError
45 45
46 46 def getoffset(q):
47 47 return int(q >> 16)
48 48
49 49 def gettype(q):
50 50 return int(q & 0xFFFF)
51 51
52 52 def offset_type(offset, type):
53 53 return long(long(offset) << 16 | type)
54 54
55 55 nullhash = _sha(nullid)
56 56
57 57 def hash(text, p1, p2):
58 58 """generate a hash from the given text and its parent hashes
59 59
60 60 This hash combines both the current file contents and its history
61 61 in a manner that makes it easy to distinguish nodes with the same
62 62 content in the revision graph.
63 63 """
64 64 # As of now, if one of the parent node is null, p2 is null
65 65 if p2 == nullid:
66 66 # deep copy of a hash is faster than creating one
67 67 s = nullhash.copy()
68 68 s.update(p1)
69 69 else:
70 70 # none of the parent nodes are nullid
71 71 l = [p1, p2]
72 72 l.sort()
73 73 s = _sha(l[0])
74 74 s.update(l[1])
75 75 s.update(text)
76 76 return s.digest()
77 77
78 78 def decompress(bin):
79 79 """ decompress the given input """
80 80 if not bin:
81 81 return bin
82 82 t = bin[0]
83 83 if t == '\0':
84 84 return bin
85 85 if t == 'x':
86 86 try:
87 87 return _decompress(bin)
88 88 except zlib.error, e:
89 89 raise RevlogError(_("revlog decompress error: %s") % str(e))
90 90 if t == 'u':
91 91 return bin[1:]
92 92 raise RevlogError(_("unknown compression type %r") % t)
93 93
94 94 # index v0:
95 95 # 4 bytes: offset
96 96 # 4 bytes: compressed length
97 97 # 4 bytes: base rev
98 98 # 4 bytes: link rev
99 99 # 32 bytes: parent 1 nodeid
100 100 # 32 bytes: parent 2 nodeid
101 101 # 32 bytes: nodeid
102 102 indexformatv0 = ">4l20s20s20s"
103 103 v0shaoffset = 56
104 104
105 105 class revlogoldio(object):
106 106 def __init__(self):
107 107 self.size = struct.calcsize(indexformatv0)
108 108
109 109 def parseindex(self, data, inline):
110 110 s = self.size
111 111 index = []
112 112 nodemap = {nullid: nullrev}
113 113 n = off = 0
114 114 l = len(data)
115 115 while off + s <= l:
116 116 cur = data[off:off + s]
117 117 off += s
118 118 e = _unpack(indexformatv0, cur)
119 119 # transform to revlogv1 format
120 120 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
121 121 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
122 122 index.append(e2)
123 123 nodemap[e[6]] = n
124 124 n += 1
125 125
126 126 # add the magic null revision at -1
127 127 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
128 128
129 129 return index, nodemap, None
130 130
131 131 def packentry(self, entry, node, version, rev):
132 132 if gettype(entry[0]):
133 133 raise RevlogError(_("index entry flags need RevlogNG"))
134 134 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
135 135 node(entry[5]), node(entry[6]), entry[7])
136 136 return _pack(indexformatv0, *e2)
137 137
138 138 # index ng:
139 139 # 6 bytes: offset
140 140 # 2 bytes: flags
141 141 # 4 bytes: compressed length
142 142 # 4 bytes: uncompressed length
143 143 # 4 bytes: base rev
144 144 # 4 bytes: link rev
145 145 # 4 bytes: parent 1 rev
146 146 # 4 bytes: parent 2 rev
147 147 # 32 bytes: nodeid
148 148 indexformatng = ">Qiiiiii20s12x"
149 149 ngshaoffset = 32
150 150 versionformat = ">I"
151 151
152 152 class revlogio(object):
153 153 def __init__(self):
154 154 self.size = struct.calcsize(indexformatng)
155 155
156 156 def parseindex(self, data, inline):
157 157 # call the C implementation to parse the index data
158 158 index, cache = parsers.parse_index2(data, inline)
159 159 return index, getattr(index, 'nodemap', None), cache
160 160
161 161 def packentry(self, entry, node, version, rev):
162 162 p = _pack(indexformatng, *entry)
163 163 if rev == 0:
164 164 p = _pack(versionformat, version) + p[4:]
165 165 return p
166 166
167 167 class revlog(object):
168 168 """
169 169 the underlying revision storage object
170 170
171 171 A revlog consists of two parts, an index and the revision data.
172 172
173 173 The index is a file with a fixed record size containing
174 174 information on each revision, including its nodeid (hash), the
175 175 nodeids of its parents, the position and offset of its data within
176 176 the data file, and the revision it's based on. Finally, each entry
177 177 contains a linkrev entry that can serve as a pointer to external
178 178 data.
179 179
180 180 The revision data itself is a linear collection of data chunks.
181 181 Each chunk represents a revision and is usually represented as a
182 182 delta against the previous chunk. To bound lookup time, runs of
183 183 deltas are limited to about 2 times the length of the original
184 184 version data. This makes retrieval of a version proportional to
185 185 its size, or O(1) relative to the number of revisions.
186 186
187 187 Both pieces of the revlog are written to in an append-only
188 188 fashion, which means we never need to rewrite a file to insert or
189 189 remove data, and can use some simple techniques to avoid the need
190 190 for locking while reading.
191 191 """
192 192 def __init__(self, opener, indexfile):
193 193 """
194 194 create a revlog object
195 195
196 196 opener is a function that abstracts the file opening operation
197 197 and can be used to implement COW semantics or the like.
198 198 """
199 199 self.indexfile = indexfile
200 200 self.datafile = indexfile[:-2] + ".d"
201 201 self.opener = opener
202 202 self._cache = None
203 203 self._basecache = (0, 0)
204 204 self._chunkcache = (0, '')
205 205 self.index = []
206 206 self._pcache = {}
207 207 self._nodecache = {nullid: nullrev}
208 208 self._nodepos = None
209 209
210 210 v = REVLOG_DEFAULT_VERSION
211 211 opts = getattr(opener, 'options', None)
212 212 if opts is not None:
213 213 if 'revlogv1' in opts:
214 214 if 'generaldelta' in opts:
215 215 v |= REVLOGGENERALDELTA
216 216 else:
217 217 v = 0
218 218
219 219 i = ''
220 220 self._initempty = True
221 221 try:
222 222 f = self.opener(self.indexfile)
223 223 i = f.read()
224 224 f.close()
225 225 if len(i) > 0:
226 226 v = struct.unpack(versionformat, i[:4])[0]
227 227 self._initempty = False
228 228 except IOError, inst:
229 229 if inst.errno != errno.ENOENT:
230 230 raise
231 231
232 232 self.version = v
233 233 self._inline = v & REVLOGNGINLINEDATA
234 234 self._generaldelta = v & REVLOGGENERALDELTA
235 235 flags = v & ~0xFFFF
236 236 fmt = v & 0xFFFF
237 237 if fmt == REVLOGV0 and flags:
238 238 raise RevlogError(_("index %s unknown flags %#04x for format v0")
239 239 % (self.indexfile, flags >> 16))
240 240 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
241 241 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
242 242 % (self.indexfile, flags >> 16))
243 243 elif fmt > REVLOGNG:
244 244 raise RevlogError(_("index %s unknown format %d")
245 245 % (self.indexfile, fmt))
246 246
247 247 self._io = revlogio()
248 248 if self.version == REVLOGV0:
249 249 self._io = revlogoldio()
250 250 try:
251 251 d = self._io.parseindex(i, self._inline)
252 252 except (ValueError, IndexError):
253 253 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
254 254 self.index, nodemap, self._chunkcache = d
255 255 if nodemap is not None:
256 256 self.nodemap = self._nodecache = nodemap
257 257 if not self._chunkcache:
258 258 self._chunkclear()
259 259
260 260 def tip(self):
261 261 return self.node(len(self.index) - 2)
262 262 def __len__(self):
263 263 return len(self.index) - 1
264 264 def __iter__(self):
265 265 return iter(xrange(len(self)))
266 266 def revs(self, start=0, stop=None):
267 267 """iterate over all rev in this revlog (from start to stop)"""
268 268 step = 1
269 269 if stop is not None:
270 270 if start > stop:
271 271 step = -1
272 272 stop += step
273 273 else:
274 274 stop = len(self)
275 275 return xrange(start, stop, step)
276 276
277 277 @util.propertycache
278 278 def nodemap(self):
279 279 self.rev(self.node(0))
280 280 return self._nodecache
281 281
282 282 def hasnode(self, node):
283 283 try:
284 284 self.rev(node)
285 285 return True
286 286 except KeyError:
287 287 return False
288 288
289 289 def clearcaches(self):
290 290 try:
291 291 self._nodecache.clearcaches()
292 292 except AttributeError:
293 293 self._nodecache = {nullid: nullrev}
294 294 self._nodepos = None
295 295
296 296 def rev(self, node):
297 297 try:
298 298 return self._nodecache[node]
299 299 except RevlogError:
300 300 # parsers.c radix tree lookup failed
301 301 raise LookupError(node, self.indexfile, _('no node'))
302 302 except KeyError:
303 303 # pure python cache lookup failed
304 304 n = self._nodecache
305 305 i = self.index
306 306 p = self._nodepos
307 307 if p is None:
308 308 p = len(i) - 2
309 309 for r in xrange(p, -1, -1):
310 310 v = i[r][7]
311 311 n[v] = r
312 312 if v == node:
313 313 self._nodepos = r - 1
314 314 return r
315 315 raise LookupError(node, self.indexfile, _('no node'))
316 316
317 317 def node(self, rev):
318 318 return self.index[rev][7]
319 319 def linkrev(self, rev):
320 320 return self.index[rev][4]
321 321 def parents(self, node):
322 322 i = self.index
323 323 d = i[self.rev(node)]
324 324 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
325 325 def parentrevs(self, rev):
326 326 return self.index[rev][5:7]
327 327 def start(self, rev):
328 328 return int(self.index[rev][0] >> 16)
329 329 def end(self, rev):
330 330 return self.start(rev) + self.length(rev)
331 331 def length(self, rev):
332 332 return self.index[rev][1]
333 333 def chainbase(self, rev):
334 334 index = self.index
335 335 base = index[rev][3]
336 336 while base != rev:
337 337 rev = base
338 338 base = index[rev][3]
339 339 return base
340 340 def flags(self, rev):
341 341 return self.index[rev][0] & 0xFFFF
342 342 def rawsize(self, rev):
343 343 """return the length of the uncompressed text for a given revision"""
344 344 l = self.index[rev][2]
345 345 if l >= 0:
346 346 return l
347 347
348 348 t = self.revision(self.node(rev))
349 349 return len(t)
350 350 size = rawsize
351 351
352 352 def ancestors(self, revs, stoprev=0, inclusive=False):
353 353 """Generate the ancestors of 'revs' in reverse topological order.
354 354 Does not generate revs lower than stoprev.
355 355
356 356 See the documentation for ancestor.lazyancestors for more details."""
357 357
358 358 return ancestor.lazyancestors(self, revs, stoprev=stoprev,
359 359 inclusive=inclusive)
360 360
361 361 def descendants(self, revs):
362 362 """Generate the descendants of 'revs' in revision order.
363 363
364 364 Yield a sequence of revision numbers starting with a child of
365 365 some rev in revs, i.e., each revision is *not* considered a
366 366 descendant of itself. Results are ordered by revision number (a
367 367 topological sort)."""
368 368 first = min(revs)
369 369 if first == nullrev:
370 370 for i in self:
371 371 yield i
372 372 return
373 373
374 374 seen = set(revs)
375 375 for i in self.revs(start=first + 1):
376 376 for x in self.parentrevs(i):
377 377 if x != nullrev and x in seen:
378 378 seen.add(i)
379 379 yield i
380 380 break
381 381
382 382 def findcommonmissing(self, common=None, heads=None):
383 383 """Return a tuple of the ancestors of common and the ancestors of heads
384 384 that are not ancestors of common. In revset terminology, we return the
385 385 tuple:
386 386
387 387 ::common, (::heads) - (::common)
388 388
389 389 The list is sorted by revision number, meaning it is
390 390 topologically sorted.
391 391
392 392 'heads' and 'common' are both lists of node IDs. If heads is
393 393 not supplied, uses all of the revlog's heads. If common is not
394 394 supplied, uses nullid."""
395 395 if common is None:
396 396 common = [nullid]
397 397 if heads is None:
398 398 heads = self.heads()
399 399
400 400 common = [self.rev(n) for n in common]
401 401 heads = [self.rev(n) for n in heads]
402 402
403 403 # we want the ancestors, but inclusive
404 404 has = set(self.ancestors(common))
405 405 has.add(nullrev)
406 406 has.update(common)
407 407
408 408 # take all ancestors from heads that aren't in has
409 409 missing = set()
410 410 visit = util.deque(r for r in heads if r not in has)
411 411 while visit:
412 412 r = visit.popleft()
413 413 if r in missing:
414 414 continue
415 415 else:
416 416 missing.add(r)
417 417 for p in self.parentrevs(r):
418 418 if p not in has:
419 419 visit.append(p)
420 420 missing = list(missing)
421 421 missing.sort()
422 422 return has, [self.node(r) for r in missing]
423 423
424 424 def findmissingrevs(self, common=None, heads=None):
425 425 """Return the revision numbers of the ancestors of heads that
426 426 are not ancestors of common.
427 427
428 428 More specifically, return a list of revision numbers corresponding to
429 429 nodes N such that every N satisfies the following constraints:
430 430
431 431 1. N is an ancestor of some node in 'heads'
432 432 2. N is not an ancestor of any node in 'common'
433 433
434 434 The list is sorted by revision number, meaning it is
435 435 topologically sorted.
436 436
437 437 'heads' and 'common' are both lists of revision numbers. If heads is
438 438 not supplied, uses all of the revlog's heads. If common is not
439 439 supplied, uses nullid."""
440 440 if common is None:
441 441 common = [nullrev]
442 442 if heads is None:
443 443 heads = self.headrevs()
444 444
445 445 return ancestor.missingancestors(heads, common, self.parentrevs)
446 446
447 447 def findmissing(self, common=None, heads=None):
448 448 """Return the ancestors of heads that are not ancestors of common.
449 449
450 450 More specifically, return a list of nodes N such that every N
451 451 satisfies the following constraints:
452 452
453 453 1. N is an ancestor of some node in 'heads'
454 454 2. N is not an ancestor of any node in 'common'
455 455
456 456 The list is sorted by revision number, meaning it is
457 457 topologically sorted.
458 458
459 459 'heads' and 'common' are both lists of node IDs. If heads is
460 460 not supplied, uses all of the revlog's heads. If common is not
461 461 supplied, uses nullid."""
462 462 if common is None:
463 463 common = [nullid]
464 464 if heads is None:
465 465 heads = self.heads()
466 466
467 467 common = [self.rev(n) for n in common]
468 468 heads = [self.rev(n) for n in heads]
469 469
470 470 return [self.node(r) for r in
471 471 ancestor.missingancestors(heads, common, self.parentrevs)]
472 472
473 473 def nodesbetween(self, roots=None, heads=None):
474 474 """Return a topological path from 'roots' to 'heads'.
475 475
476 476 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
477 477 topologically sorted list of all nodes N that satisfy both of
478 478 these constraints:
479 479
480 480 1. N is a descendant of some node in 'roots'
481 481 2. N is an ancestor of some node in 'heads'
482 482
483 483 Every node is considered to be both a descendant and an ancestor
484 484 of itself, so every reachable node in 'roots' and 'heads' will be
485 485 included in 'nodes'.
486 486
487 487 'outroots' is the list of reachable nodes in 'roots', i.e., the
488 488 subset of 'roots' that is returned in 'nodes'. Likewise,
489 489 'outheads' is the subset of 'heads' that is also in 'nodes'.
490 490
491 491 'roots' and 'heads' are both lists of node IDs. If 'roots' is
492 492 unspecified, uses nullid as the only root. If 'heads' is
493 493 unspecified, uses list of all of the revlog's heads."""
494 494 nonodes = ([], [], [])
495 495 if roots is not None:
496 496 roots = list(roots)
497 497 if not roots:
498 498 return nonodes
499 499 lowestrev = min([self.rev(n) for n in roots])
500 500 else:
501 501 roots = [nullid] # Everybody's a descendant of nullid
502 502 lowestrev = nullrev
503 503 if (lowestrev == nullrev) and (heads is None):
504 504 # We want _all_ the nodes!
505 505 return ([self.node(r) for r in self], [nullid], list(self.heads()))
506 506 if heads is None:
507 507 # All nodes are ancestors, so the latest ancestor is the last
508 508 # node.
509 509 highestrev = len(self) - 1
510 510 # Set ancestors to None to signal that every node is an ancestor.
511 511 ancestors = None
512 512 # Set heads to an empty dictionary for later discovery of heads
513 513 heads = {}
514 514 else:
515 515 heads = list(heads)
516 516 if not heads:
517 517 return nonodes
518 518 ancestors = set()
519 519 # Turn heads into a dictionary so we can remove 'fake' heads.
520 520 # Also, later we will be using it to filter out the heads we can't
521 521 # find from roots.
522 522 heads = dict.fromkeys(heads, False)
523 523 # Start at the top and keep marking parents until we're done.
524 524 nodestotag = set(heads)
525 525 # Remember where the top was so we can use it as a limit later.
526 526 highestrev = max([self.rev(n) for n in nodestotag])
527 527 while nodestotag:
528 528 # grab a node to tag
529 529 n = nodestotag.pop()
530 530 # Never tag nullid
531 531 if n == nullid:
532 532 continue
533 533 # A node's revision number represents its place in a
534 534 # topologically sorted list of nodes.
535 535 r = self.rev(n)
536 536 if r >= lowestrev:
537 537 if n not in ancestors:
538 538 # If we are possibly a descendant of one of the roots
539 539 # and we haven't already been marked as an ancestor
540 540 ancestors.add(n) # Mark as ancestor
541 541 # Add non-nullid parents to list of nodes to tag.
542 542 nodestotag.update([p for p in self.parents(n) if
543 543 p != nullid])
544 544 elif n in heads: # We've seen it before, is it a fake head?
545 545 # So it is, real heads should not be the ancestors of
546 546 # any other heads.
547 547 heads.pop(n)
548 548 if not ancestors:
549 549 return nonodes
550 550 # Now that we have our set of ancestors, we want to remove any
551 551 # roots that are not ancestors.
552 552
553 553 # If one of the roots was nullid, everything is included anyway.
554 554 if lowestrev > nullrev:
555 555 # But, since we weren't, let's recompute the lowest rev to not
556 556 # include roots that aren't ancestors.
557 557
558 558 # Filter out roots that aren't ancestors of heads
559 559 roots = [n for n in roots if n in ancestors]
560 560 # Recompute the lowest revision
561 561 if roots:
562 562 lowestrev = min([self.rev(n) for n in roots])
563 563 else:
564 564 # No more roots? Return empty list
565 565 return nonodes
566 566 else:
567 567 # We are descending from nullid, and don't need to care about
568 568 # any other roots.
569 569 lowestrev = nullrev
570 570 roots = [nullid]
571 571 # Transform our roots list into a set.
572 572 descendants = set(roots)
573 573 # Also, keep the original roots so we can filter out roots that aren't
574 574 # 'real' roots (i.e. are descended from other roots).
575 575 roots = descendants.copy()
576 576 # Our topologically sorted list of output nodes.
577 577 orderedout = []
578 578 # Don't start at nullid since we don't want nullid in our output list,
579 579 # and if nullid shows up in descendants, empty parents will look like
580 580 # they're descendants.
581 581 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
582 582 n = self.node(r)
583 583 isdescendant = False
584 584 if lowestrev == nullrev: # Everybody is a descendant of nullid
585 585 isdescendant = True
586 586 elif n in descendants:
587 587 # n is already a descendant
588 588 isdescendant = True
589 589 # This check only needs to be done here because all the roots
590 590 # will start being marked is descendants before the loop.
591 591 if n in roots:
592 592 # If n was a root, check if it's a 'real' root.
593 593 p = tuple(self.parents(n))
594 594 # If any of its parents are descendants, it's not a root.
595 595 if (p[0] in descendants) or (p[1] in descendants):
596 596 roots.remove(n)
597 597 else:
598 598 p = tuple(self.parents(n))
599 599 # A node is a descendant if either of its parents are
600 600 # descendants. (We seeded the dependents list with the roots
601 601 # up there, remember?)
602 602 if (p[0] in descendants) or (p[1] in descendants):
603 603 descendants.add(n)
604 604 isdescendant = True
605 605 if isdescendant and ((ancestors is None) or (n in ancestors)):
606 606 # Only include nodes that are both descendants and ancestors.
607 607 orderedout.append(n)
608 608 if (ancestors is not None) and (n in heads):
609 609 # We're trying to figure out which heads are reachable
610 610 # from roots.
611 611 # Mark this head as having been reached
612 612 heads[n] = True
613 613 elif ancestors is None:
614 614 # Otherwise, we're trying to discover the heads.
615 615 # Assume this is a head because if it isn't, the next step
616 616 # will eventually remove it.
617 617 heads[n] = True
618 618 # But, obviously its parents aren't.
619 619 for p in self.parents(n):
620 620 heads.pop(p, None)
621 621 heads = [n for n, flag in heads.iteritems() if flag]
622 622 roots = list(roots)
623 623 assert orderedout
624 624 assert roots
625 625 assert heads
626 626 return (orderedout, roots, heads)
627 627
628 628 def headrevs(self):
629 629 try:
630 630 return self.index.headrevs()
631 631 except AttributeError:
632 632 return self._headrevs()
633 633
634 634 def _headrevs(self):
635 635 count = len(self)
636 636 if not count:
637 637 return [nullrev]
638 638 # we won't iter over filtered rev so nobody is a head at start
639 639 ishead = [0] * (count + 1)
640 640 index = self.index
641 641 for r in self:
642 642 ishead[r] = 1 # I may be an head
643 643 e = index[r]
644 644 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
645 645 return [r for r, val in enumerate(ishead) if val]
646 646
647 647 def heads(self, start=None, stop=None):
648 648 """return the list of all nodes that have no children
649 649
650 650 if start is specified, only heads that are descendants of
651 651 start will be returned
652 652 if stop is specified, it will consider all the revs from stop
653 653 as if they had no children
654 654 """
655 655 if start is None and stop is None:
656 656 if not len(self):
657 657 return [nullid]
658 658 return [self.node(r) for r in self.headrevs()]
659 659
660 660 if start is None:
661 661 start = nullid
662 662 if stop is None:
663 663 stop = []
664 664 stoprevs = set([self.rev(n) for n in stop])
665 665 startrev = self.rev(start)
666 666 reachable = set((startrev,))
667 667 heads = set((startrev,))
668 668
669 669 parentrevs = self.parentrevs
670 670 for r in self.revs(start=startrev + 1):
671 671 for p in parentrevs(r):
672 672 if p in reachable:
673 673 if r not in stoprevs:
674 674 reachable.add(r)
675 675 heads.add(r)
676 676 if p in heads and p not in stoprevs:
677 677 heads.remove(p)
678 678
679 679 return [self.node(r) for r in heads]
680 680
681 681 def children(self, node):
682 682 """find the children of a given node"""
683 683 c = []
684 684 p = self.rev(node)
685 685 for r in self.revs(start=p + 1):
686 686 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
687 687 if prevs:
688 688 for pr in prevs:
689 689 if pr == p:
690 690 c.append(self.node(r))
691 691 elif p == nullrev:
692 692 c.append(self.node(r))
693 693 return c
694 694
695 695 def descendant(self, start, end):
696 696 if start == nullrev:
697 697 return True
698 698 for i in self.descendants([start]):
699 699 if i == end:
700 700 return True
701 701 elif i > end:
702 702 break
703 703 return False
704 704
705 705 def ancestor(self, a, b):
706 706 """calculate the least common ancestor of nodes a and b"""
707 707
708 708 a, b = self.rev(a), self.rev(b)
709 ancs = ancestor.ancestors(self.parentrevs, a, b)
709 try:
710 ancs = self.index.ancestors(a, b)
711 old = ancestor.ancestors(self.parentrevs, a, b)
712 assert set(ancs) == old, ('opinions differ over ancestor(%d, %d)' %
713 (a, b))
714 except (AttributeError, OverflowError):
715 ancs = ancestor.ancestors(self.parentrevs, a, b)
710 716 if ancs:
711 717 # choose a consistent winner when there's a tie
712 718 return min(map(self.node, ancs))
713 719 return nullid
714 720
715 721 def _match(self, id):
716 722 if isinstance(id, int):
717 723 # rev
718 724 return self.node(id)
719 725 if len(id) == 20:
720 726 # possibly a binary node
721 727 # odds of a binary node being all hex in ASCII are 1 in 10**25
722 728 try:
723 729 node = id
724 730 self.rev(node) # quick search the index
725 731 return node
726 732 except LookupError:
727 733 pass # may be partial hex id
728 734 try:
729 735 # str(rev)
730 736 rev = int(id)
731 737 if str(rev) != id:
732 738 raise ValueError
733 739 if rev < 0:
734 740 rev = len(self) + rev
735 741 if rev < 0 or rev >= len(self):
736 742 raise ValueError
737 743 return self.node(rev)
738 744 except (ValueError, OverflowError):
739 745 pass
740 746 if len(id) == 40:
741 747 try:
742 748 # a full hex nodeid?
743 749 node = bin(id)
744 750 self.rev(node)
745 751 return node
746 752 except (TypeError, LookupError):
747 753 pass
748 754
749 755 def _partialmatch(self, id):
750 756 try:
751 757 return self.index.partialmatch(id)
752 758 except RevlogError:
753 759 # parsers.c radix tree lookup gave multiple matches
754 760 raise LookupError(id, self.indexfile, _("ambiguous identifier"))
755 761 except (AttributeError, ValueError):
756 762 # we are pure python, or key was too short to search radix tree
757 763 pass
758 764
759 765 if id in self._pcache:
760 766 return self._pcache[id]
761 767
762 768 if len(id) < 40:
763 769 try:
764 770 # hex(node)[:...]
765 771 l = len(id) // 2 # grab an even number of digits
766 772 prefix = bin(id[:l * 2])
767 773 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
768 774 nl = [n for n in nl if hex(n).startswith(id)]
769 775 if len(nl) > 0:
770 776 if len(nl) == 1:
771 777 self._pcache[id] = nl[0]
772 778 return nl[0]
773 779 raise LookupError(id, self.indexfile,
774 780 _('ambiguous identifier'))
775 781 return None
776 782 except TypeError:
777 783 pass
778 784
779 785 def lookup(self, id):
780 786 """locate a node based on:
781 787 - revision number or str(revision number)
782 788 - nodeid or subset of hex nodeid
783 789 """
784 790 n = self._match(id)
785 791 if n is not None:
786 792 return n
787 793 n = self._partialmatch(id)
788 794 if n:
789 795 return n
790 796
791 797 raise LookupError(id, self.indexfile, _('no match found'))
792 798
793 799 def cmp(self, node, text):
794 800 """compare text with a given file revision
795 801
796 802 returns True if text is different than what is stored.
797 803 """
798 804 p1, p2 = self.parents(node)
799 805 return hash(text, p1, p2) != node
800 806
801 807 def _addchunk(self, offset, data):
802 808 o, d = self._chunkcache
803 809 # try to add to existing cache
804 810 if o + len(d) == offset and len(d) + len(data) < _chunksize:
805 811 self._chunkcache = o, d + data
806 812 else:
807 813 self._chunkcache = offset, data
808 814
809 815 def _loadchunk(self, offset, length):
810 816 if self._inline:
811 817 df = self.opener(self.indexfile)
812 818 else:
813 819 df = self.opener(self.datafile)
814 820
815 821 readahead = max(65536, length)
816 822 df.seek(offset)
817 823 d = df.read(readahead)
818 824 df.close()
819 825 self._addchunk(offset, d)
820 826 if readahead > length:
821 827 return util.buffer(d, 0, length)
822 828 return d
823 829
824 830 def _getchunk(self, offset, length):
825 831 o, d = self._chunkcache
826 832 l = len(d)
827 833
828 834 # is it in the cache?
829 835 cachestart = offset - o
830 836 cacheend = cachestart + length
831 837 if cachestart >= 0 and cacheend <= l:
832 838 if cachestart == 0 and cacheend == l:
833 839 return d # avoid a copy
834 840 return util.buffer(d, cachestart, cacheend - cachestart)
835 841
836 842 return self._loadchunk(offset, length)
837 843
838 844 def _chunkraw(self, startrev, endrev):
839 845 start = self.start(startrev)
840 846 length = self.end(endrev) - start
841 847 if self._inline:
842 848 start += (startrev + 1) * self._io.size
843 849 return self._getchunk(start, length)
844 850
845 851 def _chunk(self, rev):
846 852 return decompress(self._chunkraw(rev, rev))
847 853
848 854 def _chunkbase(self, rev):
849 855 return self._chunk(rev)
850 856
851 857 def _chunkclear(self):
852 858 self._chunkcache = (0, '')
853 859
854 860 def deltaparent(self, rev):
855 861 """return deltaparent of the given revision"""
856 862 base = self.index[rev][3]
857 863 if base == rev:
858 864 return nullrev
859 865 elif self._generaldelta:
860 866 return base
861 867 else:
862 868 return rev - 1
863 869
864 870 def revdiff(self, rev1, rev2):
865 871 """return or calculate a delta between two revisions"""
866 872 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
867 873 return str(self._chunk(rev2))
868 874
869 875 return mdiff.textdiff(self.revision(rev1),
870 876 self.revision(rev2))
871 877
872 878 def revision(self, nodeorrev):
873 879 """return an uncompressed revision of a given node or revision
874 880 number.
875 881 """
876 882 if isinstance(nodeorrev, int):
877 883 rev = nodeorrev
878 884 node = self.node(rev)
879 885 else:
880 886 node = nodeorrev
881 887 rev = None
882 888
883 889 cachedrev = None
884 890 if node == nullid:
885 891 return ""
886 892 if self._cache:
887 893 if self._cache[0] == node:
888 894 return self._cache[2]
889 895 cachedrev = self._cache[1]
890 896
891 897 # look up what we need to read
892 898 text = None
893 899 if rev is None:
894 900 rev = self.rev(node)
895 901
896 902 # check rev flags
897 903 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
898 904 raise RevlogError(_('incompatible revision flag %x') %
899 905 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
900 906
901 907 # build delta chain
902 908 chain = []
903 909 index = self.index # for performance
904 910 generaldelta = self._generaldelta
905 911 iterrev = rev
906 912 e = index[iterrev]
907 913 while iterrev != e[3] and iterrev != cachedrev:
908 914 chain.append(iterrev)
909 915 if generaldelta:
910 916 iterrev = e[3]
911 917 else:
912 918 iterrev -= 1
913 919 e = index[iterrev]
914 920 chain.reverse()
915 921 base = iterrev
916 922
917 923 if iterrev == cachedrev:
918 924 # cache hit
919 925 text = self._cache[2]
920 926
921 927 # drop cache to save memory
922 928 self._cache = None
923 929
924 930 self._chunkraw(base, rev)
925 931 if text is None:
926 932 text = str(self._chunkbase(base))
927 933
928 934 bins = [self._chunk(r) for r in chain]
929 935 text = mdiff.patches(text, bins)
930 936
931 937 text = self._checkhash(text, node, rev)
932 938
933 939 self._cache = (node, rev, text)
934 940 return text
935 941
936 942 def _checkhash(self, text, node, rev):
937 943 p1, p2 = self.parents(node)
938 944 if node != hash(text, p1, p2):
939 945 raise RevlogError(_("integrity check failed on %s:%d")
940 946 % (self.indexfile, rev))
941 947 return text
942 948
943 949 def checkinlinesize(self, tr, fp=None):
944 950 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
945 951 return
946 952
947 953 trinfo = tr.find(self.indexfile)
948 954 if trinfo is None:
949 955 raise RevlogError(_("%s not found in the transaction")
950 956 % self.indexfile)
951 957
952 958 trindex = trinfo[2]
953 959 dataoff = self.start(trindex)
954 960
955 961 tr.add(self.datafile, dataoff)
956 962
957 963 if fp:
958 964 fp.flush()
959 965 fp.close()
960 966
961 967 df = self.opener(self.datafile, 'w')
962 968 try:
963 969 for r in self:
964 970 df.write(self._chunkraw(r, r))
965 971 finally:
966 972 df.close()
967 973
968 974 fp = self.opener(self.indexfile, 'w', atomictemp=True)
969 975 self.version &= ~(REVLOGNGINLINEDATA)
970 976 self._inline = False
971 977 for i in self:
972 978 e = self._io.packentry(self.index[i], self.node, self.version, i)
973 979 fp.write(e)
974 980
975 981 # if we don't call close, the temp file will never replace the
976 982 # real index
977 983 fp.close()
978 984
979 985 tr.replace(self.indexfile, trindex * self._io.size)
980 986 self._chunkclear()
981 987
982 988 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
983 989 """add a revision to the log
984 990
985 991 text - the revision data to add
986 992 transaction - the transaction object used for rollback
987 993 link - the linkrev data to add
988 994 p1, p2 - the parent nodeids of the revision
989 995 cachedelta - an optional precomputed delta
990 996 """
991 997 node = hash(text, p1, p2)
992 998 if node in self.nodemap:
993 999 return node
994 1000
995 1001 dfh = None
996 1002 if not self._inline:
997 1003 dfh = self.opener(self.datafile, "a")
998 1004 ifh = self.opener(self.indexfile, "a+")
999 1005 try:
1000 1006 return self._addrevision(node, text, transaction, link, p1, p2,
1001 1007 cachedelta, ifh, dfh)
1002 1008 finally:
1003 1009 if dfh:
1004 1010 dfh.close()
1005 1011 ifh.close()
1006 1012
1007 1013 def compress(self, text):
1008 1014 """ generate a possibly-compressed representation of text """
1009 1015 if not text:
1010 1016 return ("", text)
1011 1017 l = len(text)
1012 1018 bin = None
1013 1019 if l < 44:
1014 1020 pass
1015 1021 elif l > 1000000:
1016 1022 # zlib makes an internal copy, thus doubling memory usage for
1017 1023 # large files, so lets do this in pieces
1018 1024 z = zlib.compressobj()
1019 1025 p = []
1020 1026 pos = 0
1021 1027 while pos < l:
1022 1028 pos2 = pos + 2**20
1023 1029 p.append(z.compress(text[pos:pos2]))
1024 1030 pos = pos2
1025 1031 p.append(z.flush())
1026 1032 if sum(map(len, p)) < l:
1027 1033 bin = "".join(p)
1028 1034 else:
1029 1035 bin = _compress(text)
1030 1036 if bin is None or len(bin) > l:
1031 1037 if text[0] == '\0':
1032 1038 return ("", text)
1033 1039 return ('u', text)
1034 1040 return ("", bin)
1035 1041
1036 1042 def _addrevision(self, node, text, transaction, link, p1, p2,
1037 1043 cachedelta, ifh, dfh):
1038 1044 """internal function to add revisions to the log
1039 1045
1040 1046 see addrevision for argument descriptions.
1041 1047 invariants:
1042 1048 - text is optional (can be None); if not set, cachedelta must be set.
1043 1049 if both are set, they must correspond to each other.
1044 1050 """
1045 1051 btext = [text]
1046 1052 def buildtext():
1047 1053 if btext[0] is not None:
1048 1054 return btext[0]
1049 1055 # flush any pending writes here so we can read it in revision
1050 1056 if dfh:
1051 1057 dfh.flush()
1052 1058 ifh.flush()
1053 1059 basetext = self.revision(self.node(cachedelta[0]))
1054 1060 btext[0] = mdiff.patch(basetext, cachedelta[1])
1055 1061 chk = hash(btext[0], p1, p2)
1056 1062 if chk != node:
1057 1063 raise RevlogError(_("consistency error in delta"))
1058 1064 return btext[0]
1059 1065
1060 1066 def builddelta(rev):
1061 1067 # can we use the cached delta?
1062 1068 if cachedelta and cachedelta[0] == rev:
1063 1069 delta = cachedelta[1]
1064 1070 else:
1065 1071 t = buildtext()
1066 1072 ptext = self.revision(self.node(rev))
1067 1073 delta = mdiff.textdiff(ptext, t)
1068 1074 data = self.compress(delta)
1069 1075 l = len(data[1]) + len(data[0])
1070 1076 if basecache[0] == rev:
1071 1077 chainbase = basecache[1]
1072 1078 else:
1073 1079 chainbase = self.chainbase(rev)
1074 1080 dist = l + offset - self.start(chainbase)
1075 1081 if self._generaldelta:
1076 1082 base = rev
1077 1083 else:
1078 1084 base = chainbase
1079 1085 return dist, l, data, base, chainbase
1080 1086
1081 1087 curr = len(self)
1082 1088 prev = curr - 1
1083 1089 base = chainbase = curr
1084 1090 offset = self.end(prev)
1085 1091 flags = 0
1086 1092 d = None
1087 1093 basecache = self._basecache
1088 1094 p1r, p2r = self.rev(p1), self.rev(p2)
1089 1095
1090 1096 # should we try to build a delta?
1091 1097 if prev != nullrev:
1092 1098 if self._generaldelta:
1093 1099 if p1r >= basecache[1]:
1094 1100 d = builddelta(p1r)
1095 1101 elif p2r >= basecache[1]:
1096 1102 d = builddelta(p2r)
1097 1103 else:
1098 1104 d = builddelta(prev)
1099 1105 else:
1100 1106 d = builddelta(prev)
1101 1107 dist, l, data, base, chainbase = d
1102 1108
1103 1109 # full versions are inserted when the needed deltas
1104 1110 # become comparable to the uncompressed text
1105 1111 if text is None:
1106 1112 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1107 1113 cachedelta[1])
1108 1114 else:
1109 1115 textlen = len(text)
1110 1116 if d is None or dist > textlen * 2:
1111 1117 text = buildtext()
1112 1118 data = self.compress(text)
1113 1119 l = len(data[1]) + len(data[0])
1114 1120 base = chainbase = curr
1115 1121
1116 1122 e = (offset_type(offset, flags), l, textlen,
1117 1123 base, link, p1r, p2r, node)
1118 1124 self.index.insert(-1, e)
1119 1125 self.nodemap[node] = curr
1120 1126
1121 1127 entry = self._io.packentry(e, self.node, self.version, curr)
1122 1128 if not self._inline:
1123 1129 transaction.add(self.datafile, offset)
1124 1130 transaction.add(self.indexfile, curr * len(entry))
1125 1131 if data[0]:
1126 1132 dfh.write(data[0])
1127 1133 dfh.write(data[1])
1128 1134 dfh.flush()
1129 1135 ifh.write(entry)
1130 1136 else:
1131 1137 offset += curr * self._io.size
1132 1138 transaction.add(self.indexfile, offset, curr)
1133 1139 ifh.write(entry)
1134 1140 ifh.write(data[0])
1135 1141 ifh.write(data[1])
1136 1142 self.checkinlinesize(transaction, ifh)
1137 1143
1138 1144 if type(text) == str: # only accept immutable objects
1139 1145 self._cache = (node, curr, text)
1140 1146 self._basecache = (curr, chainbase)
1141 1147 return node
1142 1148
1143 1149 def group(self, nodelist, bundler, reorder=None):
1144 1150 """Calculate a delta group, yielding a sequence of changegroup chunks
1145 1151 (strings).
1146 1152
1147 1153 Given a list of changeset revs, return a set of deltas and
1148 1154 metadata corresponding to nodes. The first delta is
1149 1155 first parent(nodelist[0]) -> nodelist[0], the receiver is
1150 1156 guaranteed to have this parent as it has all history before
1151 1157 these changesets. In the case firstparent is nullrev the
1152 1158 changegroup starts with a full revision.
1153 1159 """
1154 1160
1155 1161 # if we don't have any revisions touched by these changesets, bail
1156 1162 if len(nodelist) == 0:
1157 1163 yield bundler.close()
1158 1164 return
1159 1165
1160 1166 # for generaldelta revlogs, we linearize the revs; this will both be
1161 1167 # much quicker and generate a much smaller bundle
1162 1168 if (self._generaldelta and reorder is not False) or reorder:
1163 1169 dag = dagutil.revlogdag(self)
1164 1170 revs = set(self.rev(n) for n in nodelist)
1165 1171 revs = dag.linearize(revs)
1166 1172 else:
1167 1173 revs = sorted([self.rev(n) for n in nodelist])
1168 1174
1169 1175 # add the parent of the first rev
1170 1176 p = self.parentrevs(revs[0])[0]
1171 1177 revs.insert(0, p)
1172 1178
1173 1179 # build deltas
1174 1180 for r in xrange(len(revs) - 1):
1175 1181 prev, curr = revs[r], revs[r + 1]
1176 1182 for c in bundler.revchunk(self, curr, prev):
1177 1183 yield c
1178 1184
1179 1185 yield bundler.close()
1180 1186
1181 1187 def addgroup(self, bundle, linkmapper, transaction):
1182 1188 """
1183 1189 add a delta group
1184 1190
1185 1191 given a set of deltas, add them to the revision log. the
1186 1192 first delta is against its parent, which should be in our
1187 1193 log, the rest are against the previous delta.
1188 1194 """
1189 1195
1190 1196 # track the base of the current delta log
1191 1197 content = []
1192 1198 node = None
1193 1199
1194 1200 r = len(self)
1195 1201 end = 0
1196 1202 if r:
1197 1203 end = self.end(r - 1)
1198 1204 ifh = self.opener(self.indexfile, "a+")
1199 1205 isize = r * self._io.size
1200 1206 if self._inline:
1201 1207 transaction.add(self.indexfile, end + isize, r)
1202 1208 dfh = None
1203 1209 else:
1204 1210 transaction.add(self.indexfile, isize, r)
1205 1211 transaction.add(self.datafile, end)
1206 1212 dfh = self.opener(self.datafile, "a")
1207 1213
1208 1214 try:
1209 1215 # loop through our set of deltas
1210 1216 chain = None
1211 1217 while True:
1212 1218 chunkdata = bundle.deltachunk(chain)
1213 1219 if not chunkdata:
1214 1220 break
1215 1221 node = chunkdata['node']
1216 1222 p1 = chunkdata['p1']
1217 1223 p2 = chunkdata['p2']
1218 1224 cs = chunkdata['cs']
1219 1225 deltabase = chunkdata['deltabase']
1220 1226 delta = chunkdata['delta']
1221 1227
1222 1228 content.append(node)
1223 1229
1224 1230 link = linkmapper(cs)
1225 1231 if node in self.nodemap:
1226 1232 # this can happen if two branches make the same change
1227 1233 chain = node
1228 1234 continue
1229 1235
1230 1236 for p in (p1, p2):
1231 1237 if p not in self.nodemap:
1232 1238 raise LookupError(p, self.indexfile,
1233 1239 _('unknown parent'))
1234 1240
1235 1241 if deltabase not in self.nodemap:
1236 1242 raise LookupError(deltabase, self.indexfile,
1237 1243 _('unknown delta base'))
1238 1244
1239 1245 baserev = self.rev(deltabase)
1240 1246 chain = self._addrevision(node, None, transaction, link,
1241 1247 p1, p2, (baserev, delta), ifh, dfh)
1242 1248 if not dfh and not self._inline:
1243 1249 # addrevision switched from inline to conventional
1244 1250 # reopen the index
1245 1251 ifh.close()
1246 1252 dfh = self.opener(self.datafile, "a")
1247 1253 ifh = self.opener(self.indexfile, "a")
1248 1254 finally:
1249 1255 if dfh:
1250 1256 dfh.close()
1251 1257 ifh.close()
1252 1258
1253 1259 return content
1254 1260
1255 1261 def strip(self, minlink, transaction):
1256 1262 """truncate the revlog on the first revision with a linkrev >= minlink
1257 1263
1258 1264 This function is called when we're stripping revision minlink and
1259 1265 its descendants from the repository.
1260 1266
1261 1267 We have to remove all revisions with linkrev >= minlink, because
1262 1268 the equivalent changelog revisions will be renumbered after the
1263 1269 strip.
1264 1270
1265 1271 So we truncate the revlog on the first of these revisions, and
1266 1272 trust that the caller has saved the revisions that shouldn't be
1267 1273 removed and that it'll re-add them after this truncation.
1268 1274 """
1269 1275 if len(self) == 0:
1270 1276 return
1271 1277
1272 1278 for rev in self:
1273 1279 if self.index[rev][4] >= minlink:
1274 1280 break
1275 1281 else:
1276 1282 return
1277 1283
1278 1284 # first truncate the files on disk
1279 1285 end = self.start(rev)
1280 1286 if not self._inline:
1281 1287 transaction.add(self.datafile, end)
1282 1288 end = rev * self._io.size
1283 1289 else:
1284 1290 end += rev * self._io.size
1285 1291
1286 1292 transaction.add(self.indexfile, end)
1287 1293
1288 1294 # then reset internal state in memory to forget those revisions
1289 1295 self._cache = None
1290 1296 self._chunkclear()
1291 1297 for x in xrange(rev, len(self)):
1292 1298 del self.nodemap[self.node(x)]
1293 1299
1294 1300 del self.index[rev:-1]
1295 1301
1296 1302 def checksize(self):
1297 1303 expected = 0
1298 1304 if len(self):
1299 1305 expected = max(0, self.end(len(self) - 1))
1300 1306
1301 1307 try:
1302 1308 f = self.opener(self.datafile)
1303 1309 f.seek(0, 2)
1304 1310 actual = f.tell()
1305 1311 f.close()
1306 1312 dd = actual - expected
1307 1313 except IOError, inst:
1308 1314 if inst.errno != errno.ENOENT:
1309 1315 raise
1310 1316 dd = 0
1311 1317
1312 1318 try:
1313 1319 f = self.opener(self.indexfile)
1314 1320 f.seek(0, 2)
1315 1321 actual = f.tell()
1316 1322 f.close()
1317 1323 s = self._io.size
1318 1324 i = max(0, actual // s)
1319 1325 di = actual - (i * s)
1320 1326 if self._inline:
1321 1327 databytes = 0
1322 1328 for r in self:
1323 1329 databytes += max(0, self.length(r))
1324 1330 dd = 0
1325 1331 di = actual - len(self) * s - databytes
1326 1332 except IOError, inst:
1327 1333 if inst.errno != errno.ENOENT:
1328 1334 raise
1329 1335 di = 0
1330 1336
1331 1337 return (dd, di)
1332 1338
1333 1339 def files(self):
1334 1340 res = [self.indexfile]
1335 1341 if not self._inline:
1336 1342 res.append(self.datafile)
1337 1343 return res
General Comments 0
You need to be logged in to leave comments. Login now