##// END OF EJS Templates
revlog: speed up prefix matching against nodes...
Bryan O'Sullivan -
r16665:e410be86 default
parent child Browse files
Show More
@@ -1,1220 +1,1293 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #include <Python.h>
11 11 #include <ctype.h>
12 12 #include <string.h>
13 13
14 14 #include "util.h"
15 15
16 16 static inline int hexdigit(const char *p, Py_ssize_t off)
17 17 {
18 18 char c = p[off];
19 19
20 20 if (c >= '0' && c <= '9')
21 21 return c - '0';
22 22 if (c >= 'a' && c <= 'f')
23 23 return c - 'a' + 10;
24 24 if (c >= 'A' && c <= 'F')
25 25 return c - 'A' + 10;
26 26
27 27 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
28 28 return 0;
29 29 }
30 30
31 31 /*
32 32 * Turn a hex-encoded string into binary.
33 33 */
34 34 static PyObject *unhexlify(const char *str, int len)
35 35 {
36 36 PyObject *ret;
37 37 char *d;
38 38 int i;
39 39
40 40 ret = PyBytes_FromStringAndSize(NULL, len / 2);
41 41
42 42 if (!ret)
43 43 return NULL;
44 44
45 45 d = PyBytes_AsString(ret);
46 46
47 47 for (i = 0; i < len;) {
48 48 int hi = hexdigit(str, i++);
49 49 int lo = hexdigit(str, i++);
50 50 *d++ = (hi << 4) | lo;
51 51 }
52 52
53 53 return ret;
54 54 }
55 55
56 56 /*
57 57 * This code assumes that a manifest is stitched together with newline
58 58 * ('\n') characters.
59 59 */
60 60 static PyObject *parse_manifest(PyObject *self, PyObject *args)
61 61 {
62 62 PyObject *mfdict, *fdict;
63 63 char *str, *cur, *start, *zero;
64 64 int len;
65 65
66 66 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
67 67 &PyDict_Type, &mfdict,
68 68 &PyDict_Type, &fdict,
69 69 &str, &len))
70 70 goto quit;
71 71
72 72 for (start = cur = str, zero = NULL; cur < str + len; cur++) {
73 73 PyObject *file = NULL, *node = NULL;
74 74 PyObject *flags = NULL;
75 75 int nlen;
76 76
77 77 if (!*cur) {
78 78 zero = cur;
79 79 continue;
80 80 }
81 81 else if (*cur != '\n')
82 82 continue;
83 83
84 84 if (!zero) {
85 85 PyErr_SetString(PyExc_ValueError,
86 86 "manifest entry has no separator");
87 87 goto quit;
88 88 }
89 89
90 90 file = PyBytes_FromStringAndSize(start, zero - start);
91 91
92 92 if (!file)
93 93 goto bail;
94 94
95 95 nlen = cur - zero - 1;
96 96
97 97 node = unhexlify(zero + 1, nlen > 40 ? 40 : nlen);
98 98 if (!node)
99 99 goto bail;
100 100
101 101 if (nlen > 40) {
102 102 flags = PyBytes_FromStringAndSize(zero + 41,
103 103 nlen - 40);
104 104 if (!flags)
105 105 goto bail;
106 106
107 107 if (PyDict_SetItem(fdict, file, flags) == -1)
108 108 goto bail;
109 109 }
110 110
111 111 if (PyDict_SetItem(mfdict, file, node) == -1)
112 112 goto bail;
113 113
114 114 start = cur + 1;
115 115 zero = NULL;
116 116
117 117 Py_XDECREF(flags);
118 118 Py_XDECREF(node);
119 119 Py_XDECREF(file);
120 120 continue;
121 121 bail:
122 122 Py_XDECREF(flags);
123 123 Py_XDECREF(node);
124 124 Py_XDECREF(file);
125 125 goto quit;
126 126 }
127 127
128 128 if (len > 0 && *(cur - 1) != '\n') {
129 129 PyErr_SetString(PyExc_ValueError,
130 130 "manifest contains trailing garbage");
131 131 goto quit;
132 132 }
133 133
134 134 Py_INCREF(Py_None);
135 135 return Py_None;
136 136 quit:
137 137 return NULL;
138 138 }
139 139
140 140 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
141 141 {
142 142 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
143 143 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
144 144 char *str, *cur, *end, *cpos;
145 145 int state, mode, size, mtime;
146 146 unsigned int flen;
147 147 int len;
148 148
149 149 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
150 150 &PyDict_Type, &dmap,
151 151 &PyDict_Type, &cmap,
152 152 &str, &len))
153 153 goto quit;
154 154
155 155 /* read parents */
156 156 if (len < 40)
157 157 goto quit;
158 158
159 159 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
160 160 if (!parents)
161 161 goto quit;
162 162
163 163 /* read filenames */
164 164 cur = str + 40;
165 165 end = str + len;
166 166
167 167 while (cur < end - 17) {
168 168 /* unpack header */
169 169 state = *cur;
170 170 mode = getbe32(cur + 1);
171 171 size = getbe32(cur + 5);
172 172 mtime = getbe32(cur + 9);
173 173 flen = getbe32(cur + 13);
174 174 cur += 17;
175 175 if (cur + flen > end || cur + flen < cur) {
176 176 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
177 177 goto quit;
178 178 }
179 179
180 180 entry = Py_BuildValue("ciii", state, mode, size, mtime);
181 181 if (!entry)
182 182 goto quit;
183 183 PyObject_GC_UnTrack(entry); /* don't waste time with this */
184 184
185 185 cpos = memchr(cur, 0, flen);
186 186 if (cpos) {
187 187 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
188 188 cname = PyBytes_FromStringAndSize(cpos + 1,
189 189 flen - (cpos - cur) - 1);
190 190 if (!fname || !cname ||
191 191 PyDict_SetItem(cmap, fname, cname) == -1 ||
192 192 PyDict_SetItem(dmap, fname, entry) == -1)
193 193 goto quit;
194 194 Py_DECREF(cname);
195 195 } else {
196 196 fname = PyBytes_FromStringAndSize(cur, flen);
197 197 if (!fname ||
198 198 PyDict_SetItem(dmap, fname, entry) == -1)
199 199 goto quit;
200 200 }
201 201 cur += flen;
202 202 Py_DECREF(fname);
203 203 Py_DECREF(entry);
204 204 fname = cname = entry = NULL;
205 205 }
206 206
207 207 ret = parents;
208 208 Py_INCREF(ret);
209 209 quit:
210 210 Py_XDECREF(fname);
211 211 Py_XDECREF(cname);
212 212 Py_XDECREF(entry);
213 213 Py_XDECREF(parents);
214 214 return ret;
215 215 }
216 216
217 217 /*
218 218 * A base-16 trie for fast node->rev mapping.
219 219 *
220 220 * Positive value is index of the next node in the trie
221 221 * Negative value is a leaf: -(rev + 1)
222 222 * Zero is empty
223 223 */
224 224 typedef struct {
225 225 int children[16];
226 226 } nodetree;
227 227
228 228 /*
229 229 * This class has two behaviours.
230 230 *
231 231 * When used in a list-like way (with integer keys), we decode an
232 232 * entry in a RevlogNG index file on demand. Our last entry is a
233 233 * sentinel, always a nullid. We have limited support for
234 234 * integer-keyed insert and delete, only at elements right before the
235 235 * sentinel.
236 236 *
237 237 * With string keys, we lazily perform a reverse mapping from node to
238 238 * rev, using a base-16 trie.
239 239 */
240 240 typedef struct {
241 241 PyObject_HEAD
242 242 /* Type-specific fields go here. */
243 243 PyObject *data; /* raw bytes of index */
244 244 PyObject **cache; /* cached tuples */
245 245 const char **offsets; /* populated on demand */
246 246 Py_ssize_t raw_length; /* original number of elements */
247 247 Py_ssize_t length; /* current number of elements */
248 248 PyObject *added; /* populated on demand */
249 249 nodetree *nt; /* base-16 trie */
250 250 int ntlength; /* # nodes in use */
251 251 int ntcapacity; /* # nodes allocated */
252 252 int ntdepth; /* maximum depth of tree */
253 253 int ntsplits; /* # splits performed */
254 254 int ntrev; /* last rev scanned */
255 255 int ntlookups; /* # lookups */
256 256 int ntmisses; /* # lookups that miss the cache */
257 257 int inlined;
258 258 } indexObject;
259 259
260 260 static Py_ssize_t index_length(const indexObject *self)
261 261 {
262 262 if (self->added == NULL)
263 263 return self->length;
264 264 return self->length + PyList_GET_SIZE(self->added);
265 265 }
266 266
267 267 static PyObject *nullentry;
268 268 static const char nullid[20];
269 269
270 270 static long inline_scan(indexObject *self, const char **offsets);
271 271
272 272 #if LONG_MAX == 0x7fffffffL
273 273 static char *tuple_format = "Kiiiiiis#";
274 274 #else
275 275 static char *tuple_format = "kiiiiiis#";
276 276 #endif
277 277
278 278 /*
279 279 * Return a pointer to the beginning of a RevlogNG record.
280 280 */
281 281 static const char *index_deref(indexObject *self, Py_ssize_t pos)
282 282 {
283 283 if (self->inlined && pos > 0) {
284 284 if (self->offsets == NULL) {
285 285 self->offsets = malloc(self->raw_length *
286 286 sizeof(*self->offsets));
287 287 if (self->offsets == NULL)
288 288 return (const char *)PyErr_NoMemory();
289 289 inline_scan(self, self->offsets);
290 290 }
291 291 return self->offsets[pos];
292 292 }
293 293
294 294 return PyString_AS_STRING(self->data) + pos * 64;
295 295 }
296 296
297 297 /*
298 298 * RevlogNG format (all in big endian, data may be inlined):
299 299 * 6 bytes: offset
300 300 * 2 bytes: flags
301 301 * 4 bytes: compressed length
302 302 * 4 bytes: uncompressed length
303 303 * 4 bytes: base revision
304 304 * 4 bytes: link revision
305 305 * 4 bytes: parent 1 revision
306 306 * 4 bytes: parent 2 revision
307 307 * 32 bytes: nodeid (only 20 bytes used)
308 308 */
309 309 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
310 310 {
311 311 uint64_t offset_flags;
312 312 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
313 313 const char *c_node_id;
314 314 const char *data;
315 315 Py_ssize_t length = index_length(self);
316 316 PyObject *entry;
317 317
318 318 if (pos < 0)
319 319 pos += length;
320 320
321 321 if (pos < 0 || pos >= length) {
322 322 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
323 323 return NULL;
324 324 }
325 325
326 326 if (pos == length - 1) {
327 327 Py_INCREF(nullentry);
328 328 return nullentry;
329 329 }
330 330
331 331 if (pos >= self->length - 1) {
332 332 PyObject *obj;
333 333 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
334 334 Py_INCREF(obj);
335 335 return obj;
336 336 }
337 337
338 338 if (self->cache) {
339 339 if (self->cache[pos]) {
340 340 Py_INCREF(self->cache[pos]);
341 341 return self->cache[pos];
342 342 }
343 343 } else {
344 344 self->cache = calloc(self->raw_length, sizeof(PyObject *));
345 345 if (self->cache == NULL)
346 346 return PyErr_NoMemory();
347 347 }
348 348
349 349 data = index_deref(self, pos);
350 350 if (data == NULL)
351 351 return NULL;
352 352
353 353 offset_flags = getbe32(data + 4);
354 354 if (pos == 0) /* mask out version number for the first entry */
355 355 offset_flags &= 0xFFFF;
356 356 else {
357 357 uint32_t offset_high = getbe32(data);
358 358 offset_flags |= ((uint64_t)offset_high) << 32;
359 359 }
360 360
361 361 comp_len = getbe32(data + 8);
362 362 uncomp_len = getbe32(data + 12);
363 363 base_rev = getbe32(data + 16);
364 364 link_rev = getbe32(data + 20);
365 365 parent_1 = getbe32(data + 24);
366 366 parent_2 = getbe32(data + 28);
367 367 c_node_id = data + 32;
368 368
369 369 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
370 370 uncomp_len, base_rev, link_rev,
371 371 parent_1, parent_2, c_node_id, 20);
372 372
373 373 if (entry)
374 374 PyObject_GC_UnTrack(entry);
375 375
376 376 self->cache[pos] = entry;
377 377 Py_INCREF(entry);
378 378
379 379 return entry;
380 380 }
381 381
382 382 /*
383 383 * Return the 20-byte SHA of the node corresponding to the given rev.
384 384 */
385 385 static const char *index_node(indexObject *self, Py_ssize_t pos)
386 386 {
387 387 Py_ssize_t length = index_length(self);
388 388 const char *data;
389 389
390 390 if (pos == length - 1 || pos == INT_MAX)
391 391 return nullid;
392 392
393 393 if (pos >= length)
394 394 return NULL;
395 395
396 396 if (pos >= self->length - 1) {
397 397 PyObject *tuple, *str;
398 398 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
399 399 str = PyTuple_GetItem(tuple, 7);
400 400 return str ? PyString_AS_STRING(str) : NULL;
401 401 }
402 402
403 403 data = index_deref(self, pos);
404 404 return data ? data + 32 : NULL;
405 405 }
406 406
407 407 static int nt_insert(indexObject *self, const char *node, int rev);
408 408
409 409 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
410 410 {
411 411 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
412 412 return -1;
413 413 if (*nodelen == 20)
414 414 return 0;
415 415 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
416 416 return -1;
417 417 }
418 418
419 419 static PyObject *index_insert(indexObject *self, PyObject *args)
420 420 {
421 421 PyObject *obj;
422 422 char *node;
423 423 long offset;
424 424 Py_ssize_t len, nodelen;
425 425
426 426 if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
427 427 return NULL;
428 428
429 429 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
430 430 PyErr_SetString(PyExc_TypeError, "8-tuple required");
431 431 return NULL;
432 432 }
433 433
434 434 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
435 435 return NULL;
436 436
437 437 len = index_length(self);
438 438
439 439 if (offset < 0)
440 440 offset += len;
441 441
442 442 if (offset != len - 1) {
443 443 PyErr_SetString(PyExc_IndexError,
444 444 "insert only supported at index -1");
445 445 return NULL;
446 446 }
447 447
448 448 if (offset > INT_MAX) {
449 449 PyErr_SetString(PyExc_ValueError,
450 450 "currently only 2**31 revs supported");
451 451 return NULL;
452 452 }
453 453
454 454 if (self->added == NULL) {
455 455 self->added = PyList_New(0);
456 456 if (self->added == NULL)
457 457 return NULL;
458 458 }
459 459
460 460 if (PyList_Append(self->added, obj) == -1)
461 461 return NULL;
462 462
463 463 if (self->nt)
464 464 nt_insert(self, node, (int)offset);
465 465
466 466 Py_RETURN_NONE;
467 467 }
468 468
469 469 static void _index_clearcaches(indexObject *self)
470 470 {
471 471 if (self->cache) {
472 472 Py_ssize_t i;
473 473
474 474 for (i = 0; i < self->raw_length; i++) {
475 475 if (self->cache[i]) {
476 476 Py_DECREF(self->cache[i]);
477 477 self->cache[i] = NULL;
478 478 }
479 479 }
480 480 free(self->cache);
481 481 self->cache = NULL;
482 482 }
483 483 if (self->offsets) {
484 484 free(self->offsets);
485 485 self->offsets = NULL;
486 486 }
487 487 if (self->nt) {
488 488 free(self->nt);
489 489 self->nt = NULL;
490 490 }
491 491 }
492 492
493 493 static PyObject *index_clearcaches(indexObject *self)
494 494 {
495 495 _index_clearcaches(self);
496 496 self->ntlength = self->ntcapacity = 0;
497 497 self->ntdepth = self->ntsplits = 0;
498 498 self->ntrev = -1;
499 499 self->ntlookups = self->ntmisses = 0;
500 500 Py_RETURN_NONE;
501 501 }
502 502
503 503 static PyObject *index_stats(indexObject *self)
504 504 {
505 505 PyObject *obj = PyDict_New();
506 506
507 507 if (obj == NULL)
508 508 return NULL;
509 509
510 510 #define istat(__n, __d) \
511 511 if (PyDict_SetItemString(obj, __d, PyInt_FromSsize_t(self->__n)) == -1) \
512 512 goto bail;
513 513
514 514 if (self->added) {
515 515 Py_ssize_t len = PyList_GET_SIZE(self->added);
516 516 if (PyDict_SetItemString(obj, "index entries added",
517 517 PyInt_FromSsize_t(len)) == -1)
518 518 goto bail;
519 519 }
520 520
521 521 if (self->raw_length != self->length - 1)
522 522 istat(raw_length, "revs on disk");
523 523 istat(length, "revs in memory");
524 524 istat(ntcapacity, "node trie capacity");
525 525 istat(ntdepth, "node trie depth");
526 526 istat(ntlength, "node trie count");
527 527 istat(ntlookups, "node trie lookups");
528 528 istat(ntmisses, "node trie misses");
529 529 istat(ntrev, "node trie last rev scanned");
530 530 istat(ntsplits, "node trie splits");
531 531
532 532 #undef istat
533 533
534 534 return obj;
535 535
536 536 bail:
537 537 Py_XDECREF(obj);
538 538 return NULL;
539 539 }
540 540
541 541 static inline int nt_level(const char *node, Py_ssize_t level)
542 542 {
543 543 int v = node[level>>1];
544 544 if (!(level & 1))
545 545 v >>= 4;
546 546 return v & 0xf;
547 547 }
548 548
549 549 /*
550 550 * Return values:
551 551 *
552 552 * -4: match is ambiguous (multiple candidates)
553 553 * -2: not found
554 554 * rest: valid rev
555 555 */
556 556 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
557 557 int hex)
558 558 {
559 559 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
560 560 int level, maxlevel, off;
561 561
562 562 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
563 563 return -1;
564 564
565 565 if (self->nt == NULL)
566 566 return -2;
567 567
568 568 if (hex)
569 maxlevel = nodelen > 40 ? 40 : nodelen;
569 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
570 570 else
571 571 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
572 572
573 573 for (level = off = 0; level < maxlevel; level++) {
574 574 int k = getnybble(node, level);
575 575 nodetree *n = &self->nt[off];
576 576 int v = n->children[k];
577 577
578 578 if (v < 0) {
579 579 const char *n;
580 580 Py_ssize_t i;
581 581
582 582 v = -v - 1;
583 583 n = index_node(self, v);
584 584 if (n == NULL)
585 585 return -2;
586 586 for (i = level; i < maxlevel; i++)
587 587 if (getnybble(node, i) != nt_level(n, i))
588 588 return -2;
589 589 return v;
590 590 }
591 591 if (v == 0)
592 592 return -2;
593 593 off = v;
594 594 }
595 595 /* multiple matches against an ambiguous prefix */
596 596 return -4;
597 597 }
598 598
599 599 static int nt_new(indexObject *self)
600 600 {
601 601 if (self->ntlength == self->ntcapacity) {
602 602 self->ntcapacity *= 2;
603 603 self->nt = realloc(self->nt,
604 604 self->ntcapacity * sizeof(nodetree));
605 605 if (self->nt == NULL) {
606 606 PyErr_SetString(PyExc_MemoryError, "out of memory");
607 607 return -1;
608 608 }
609 609 memset(&self->nt[self->ntlength], 0,
610 610 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
611 611 }
612 612 return self->ntlength++;
613 613 }
614 614
615 615 static int nt_insert(indexObject *self, const char *node, int rev)
616 616 {
617 617 int level = 0;
618 618 int off = 0;
619 619
620 620 while (level < 40) {
621 621 int k = nt_level(node, level);
622 622 nodetree *n;
623 623 int v;
624 624
625 625 n = &self->nt[off];
626 626 v = n->children[k];
627 627
628 628 if (v == 0) {
629 629 n->children[k] = -rev - 1;
630 630 return 0;
631 631 }
632 632 if (v < 0) {
633 633 const char *oldnode = index_node(self, -v - 1);
634 634 int noff;
635 635
636 636 if (!oldnode || !memcmp(oldnode, node, 20)) {
637 637 n->children[k] = -rev - 1;
638 638 return 0;
639 639 }
640 640 noff = nt_new(self);
641 641 if (noff == -1)
642 642 return -1;
643 643 /* self->nt may have been changed by realloc */
644 644 self->nt[off].children[k] = noff;
645 645 off = noff;
646 646 n = &self->nt[off];
647 647 n->children[nt_level(oldnode, ++level)] = v;
648 648 if (level > self->ntdepth)
649 649 self->ntdepth = level;
650 650 self->ntsplits += 1;
651 651 } else {
652 652 level += 1;
653 653 off = v;
654 654 }
655 655 }
656 656
657 657 return -1;
658 658 }
659 659
660 660 static int nt_init(indexObject *self)
661 661 {
662 662 if (self->nt == NULL) {
663 663 self->ntcapacity = self->raw_length < 4
664 664 ? 4 : self->raw_length / 2;
665 665 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
666 666 if (self->nt == NULL) {
667 667 PyErr_NoMemory();
668 668 return -1;
669 669 }
670 670 self->ntlength = 1;
671 671 self->ntrev = (int)index_length(self) - 1;
672 672 self->ntlookups = 1;
673 673 self->ntmisses = 0;
674 674 if (nt_insert(self, nullid, INT_MAX) == -1)
675 675 return -1;
676 676 }
677 677 return 0;
678 678 }
679 679
680 680 /*
681 681 * Return values:
682 682 *
683 683 * -3: error (exception set)
684 684 * -2: not found (no exception set)
685 685 * rest: valid rev
686 686 */
687 687 static int index_find_node(indexObject *self,
688 688 const char *node, Py_ssize_t nodelen)
689 689 {
690 690 int rev;
691 691
692 692 self->ntlookups++;
693 693 rev = nt_find(self, node, nodelen, 0);
694 694 if (rev >= -1)
695 695 return rev;
696 696
697 697 if (nt_init(self) == -1)
698 698 return -3;
699 699
700 700 /*
701 701 * For the first handful of lookups, we scan the entire index,
702 702 * and cache only the matching nodes. This optimizes for cases
703 703 * like "hg tip", where only a few nodes are accessed.
704 704 *
705 705 * After that, we cache every node we visit, using a single
706 706 * scan amortized over multiple lookups. This gives the best
707 707 * bulk performance, e.g. for "hg log".
708 708 */
709 709 if (self->ntmisses++ < 4) {
710 710 for (rev = self->ntrev - 1; rev >= 0; rev--) {
711 711 const char *n = index_node(self, rev);
712 712 if (n == NULL)
713 713 return -2;
714 714 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
715 715 if (nt_insert(self, n, rev) == -1)
716 716 return -3;
717 717 break;
718 718 }
719 719 }
720 720 } else {
721 721 for (rev = self->ntrev - 1; rev >= 0; rev--) {
722 722 const char *n = index_node(self, rev);
723 723 if (n == NULL) {
724 724 self->ntrev = rev + 1;
725 725 return -2;
726 726 }
727 727 if (nt_insert(self, n, rev) == -1) {
728 728 self->ntrev = rev + 1;
729 729 return -3;
730 730 }
731 731 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
732 732 break;
733 733 }
734 734 }
735 735 self->ntrev = rev;
736 736 }
737 737
738 738 if (rev >= 0)
739 739 return rev;
740 740 return -2;
741 741 }
742 742
743 743 static PyObject *raise_revlog_error(void)
744 744 {
745 745 static PyObject *errclass;
746 746 PyObject *mod = NULL, *errobj;
747 747
748 748 if (errclass == NULL) {
749 749 PyObject *dict;
750 750
751 751 mod = PyImport_ImportModule("mercurial.error");
752 752 if (mod == NULL)
753 753 goto classfail;
754 754
755 755 dict = PyModule_GetDict(mod);
756 756 if (dict == NULL)
757 757 goto classfail;
758 758
759 759 errclass = PyDict_GetItemString(dict, "RevlogError");
760 760 if (errclass == NULL) {
761 761 PyErr_SetString(PyExc_SystemError,
762 762 "could not find RevlogError");
763 763 goto classfail;
764 764 }
765 765 Py_INCREF(errclass);
766 766 }
767 767
768 768 errobj = PyObject_CallFunction(errclass, NULL);
769 769 if (errobj == NULL)
770 770 return NULL;
771 771 PyErr_SetObject(errclass, errobj);
772 772 return errobj;
773 773
774 774 classfail:
775 775 Py_XDECREF(mod);
776 776 return NULL;
777 777 }
778 778
779 779 static PyObject *index_getitem(indexObject *self, PyObject *value)
780 780 {
781 781 char *node;
782 782 Py_ssize_t nodelen;
783 783 int rev;
784 784
785 785 if (PyInt_Check(value))
786 786 return index_get(self, PyInt_AS_LONG(value));
787 787
788 788 if (PyString_AsStringAndSize(value, &node, &nodelen) == -1)
789 789 return NULL;
790 790 rev = index_find_node(self, node, nodelen);
791 791 if (rev >= -1)
792 792 return PyInt_FromLong(rev);
793 793 if (rev == -2)
794 794 raise_revlog_error();
795 795 return NULL;
796 796 }
797 797
798 static int nt_partialmatch(indexObject *self, const char *node,
799 Py_ssize_t nodelen)
800 {
801 int rev;
802
803 if (nt_init(self) == -1)
804 return -3;
805
806 if (self->ntrev > 0) {
807 /* ensure that the radix tree is fully populated */
808 for (rev = self->ntrev - 1; rev >= 0; rev--) {
809 const char *n = index_node(self, rev);
810 if (n == NULL)
811 return -2;
812 if (nt_insert(self, n, rev) == -1)
813 return -3;
814 }
815 self->ntrev = rev;
816 }
817
818 return nt_find(self, node, nodelen, 1);
819 }
820
821 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
822 {
823 const char *fullnode;
824 int nodelen;
825 char *node;
826 int rev, i;
827
828 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
829 return NULL;
830
831 if (nodelen < 4) {
832 PyErr_SetString(PyExc_ValueError, "key too short");
833 return NULL;
834 }
835
836 if (nodelen > 40)
837 nodelen = 40;
838
839 for (i = 0; i < nodelen; i++)
840 hexdigit(node, i);
841 if (PyErr_Occurred()) {
842 /* input contains non-hex characters */
843 PyErr_Clear();
844 Py_RETURN_NONE;
845 }
846
847 rev = nt_partialmatch(self, node, nodelen);
848
849 switch (rev) {
850 case -4:
851 raise_revlog_error();
852 case -3:
853 return NULL;
854 case -2:
855 Py_RETURN_NONE;
856 case -1:
857 return PyString_FromStringAndSize(nullid, 20);
858 }
859
860 fullnode = index_node(self, rev);
861 if (fullnode == NULL) {
862 PyErr_Format(PyExc_IndexError,
863 "could not access rev %d", rev);
864 return NULL;
865 }
866 return PyString_FromStringAndSize(fullnode, 20);
867 }
868
798 869 static PyObject *index_m_get(indexObject *self, PyObject *args)
799 870 {
800 871 char *node;
801 872 int nodelen, rev;
802 873
803 874 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
804 875 return NULL;
805 876
806 877 rev = index_find_node(self, node, nodelen);
807 878 if (rev == -3)
808 879 return NULL;
809 880 if (rev == -2)
810 881 Py_RETURN_NONE;
811 882 return PyInt_FromLong(rev);
812 883 }
813 884
814 885 static int index_contains(indexObject *self, PyObject *value)
815 886 {
816 887 char *node;
817 888 Py_ssize_t nodelen;
818 889
819 890 if (PyInt_Check(value)) {
820 891 long rev = PyInt_AS_LONG(value);
821 892 return rev >= -1 && rev < index_length(self);
822 893 }
823 894
824 895 if (!PyString_Check(value))
825 896 return 0;
826 897
827 898 node = PyString_AS_STRING(value);
828 899 nodelen = PyString_GET_SIZE(value);
829 900
830 901 switch (index_find_node(self, node, nodelen)) {
831 902 case -3:
832 903 return -1;
833 904 case -2:
834 905 return 0;
835 906 default:
836 907 return 1;
837 908 }
838 909 }
839 910
840 911 /*
841 912 * Invalidate any trie entries introduced by added revs.
842 913 */
843 914 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
844 915 {
845 916 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
846 917
847 918 for (i = start; i < len; i++) {
848 919 PyObject *tuple = PyList_GET_ITEM(self->added, i);
849 920 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
850 921
851 922 nt_insert(self, PyString_AS_STRING(node), -1);
852 923 }
853 924
854 925 if (start == 0) {
855 926 Py_DECREF(self->added);
856 927 self->added = NULL;
857 928 }
858 929 }
859 930
860 931 /*
861 932 * Delete a numeric range of revs, which must be at the end of the
862 933 * range, but exclude the sentinel nullid entry.
863 934 */
864 935 static int index_slice_del(indexObject *self, PyObject *item)
865 936 {
866 937 Py_ssize_t start, stop, step, slicelength;
867 938 Py_ssize_t length = index_length(self);
868 939
869 940 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
870 941 &start, &stop, &step, &slicelength) < 0)
871 942 return -1;
872 943
873 944 if (slicelength <= 0)
874 945 return 0;
875 946
876 947 if ((step < 0 && start < stop) || (step > 0 && start > stop))
877 948 stop = start;
878 949
879 950 if (step < 0) {
880 951 stop = start + 1;
881 952 start = stop + step*(slicelength - 1) - 1;
882 953 step = -step;
883 954 }
884 955
885 956 if (step != 1) {
886 957 PyErr_SetString(PyExc_ValueError,
887 958 "revlog index delete requires step size of 1");
888 959 return -1;
889 960 }
890 961
891 962 if (stop != length - 1) {
892 963 PyErr_SetString(PyExc_IndexError,
893 964 "revlog index deletion indices are invalid");
894 965 return -1;
895 966 }
896 967
897 968 if (start < self->length - 1) {
898 969 if (self->nt) {
899 970 Py_ssize_t i;
900 971
901 972 for (i = start + 1; i < self->length - 1; i++) {
902 973 const char *node = index_node(self, i);
903 974
904 975 if (node)
905 976 nt_insert(self, node, -1);
906 977 }
907 978 if (self->added)
908 979 nt_invalidate_added(self, 0);
909 980 if (self->ntrev > start)
910 981 self->ntrev = (int)start;
911 982 }
912 983 self->length = start + 1;
913 984 return 0;
914 985 }
915 986
916 987 if (self->nt) {
917 988 nt_invalidate_added(self, start - self->length + 1);
918 989 if (self->ntrev > start)
919 990 self->ntrev = (int)start;
920 991 }
921 992 return self->added
922 993 ? PyList_SetSlice(self->added, start - self->length + 1,
923 994 PyList_GET_SIZE(self->added), NULL)
924 995 : 0;
925 996 }
926 997
927 998 /*
928 999 * Supported ops:
929 1000 *
930 1001 * slice deletion
931 1002 * string assignment (extend node->rev mapping)
932 1003 * string deletion (shrink node->rev mapping)
933 1004 */
934 1005 static int index_assign_subscript(indexObject *self, PyObject *item,
935 1006 PyObject *value)
936 1007 {
937 1008 char *node;
938 1009 Py_ssize_t nodelen;
939 1010 long rev;
940 1011
941 1012 if (PySlice_Check(item) && value == NULL)
942 1013 return index_slice_del(self, item);
943 1014
944 1015 if (node_check(item, &node, &nodelen) == -1)
945 1016 return -1;
946 1017
947 1018 if (value == NULL)
948 1019 return self->nt ? nt_insert(self, node, -1) : 0;
949 1020 rev = PyInt_AsLong(value);
950 1021 if (rev > INT_MAX || rev < 0) {
951 1022 if (!PyErr_Occurred())
952 1023 PyErr_SetString(PyExc_ValueError, "rev out of range");
953 1024 return -1;
954 1025 }
955 1026 return nt_insert(self, node, (int)rev);
956 1027 }
957 1028
958 1029 /*
959 1030 * Find all RevlogNG entries in an index that has inline data. Update
960 1031 * the optional "offsets" table with those entries.
961 1032 */
962 1033 static long inline_scan(indexObject *self, const char **offsets)
963 1034 {
964 1035 const char *data = PyString_AS_STRING(self->data);
965 1036 const char *end = data + PyString_GET_SIZE(self->data);
966 1037 const long hdrsize = 64;
967 1038 long incr = hdrsize;
968 1039 Py_ssize_t len = 0;
969 1040
970 1041 while (data + hdrsize <= end) {
971 1042 uint32_t comp_len;
972 1043 const char *old_data;
973 1044 /* 3rd element of header is length of compressed inline data */
974 1045 comp_len = getbe32(data + 8);
975 1046 incr = hdrsize + comp_len;
976 1047 if (incr < hdrsize)
977 1048 break;
978 1049 if (offsets)
979 1050 offsets[len] = data;
980 1051 len++;
981 1052 old_data = data;
982 1053 data += incr;
983 1054 if (data <= old_data)
984 1055 break;
985 1056 }
986 1057
987 1058 if (data != end && data + hdrsize != end) {
988 1059 if (!PyErr_Occurred())
989 1060 PyErr_SetString(PyExc_ValueError, "corrupt index file");
990 1061 return -1;
991 1062 }
992 1063
993 1064 return len;
994 1065 }
995 1066
996 1067 static int index_init(indexObject *self, PyObject *args)
997 1068 {
998 1069 PyObject *data_obj, *inlined_obj;
999 1070 Py_ssize_t size;
1000 1071
1001 1072 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
1002 1073 return -1;
1003 1074 if (!PyString_Check(data_obj)) {
1004 1075 PyErr_SetString(PyExc_TypeError, "data is not a string");
1005 1076 return -1;
1006 1077 }
1007 1078 size = PyString_GET_SIZE(data_obj);
1008 1079
1009 1080 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
1010 1081 self->data = data_obj;
1011 1082 self->cache = NULL;
1012 1083
1013 1084 self->added = NULL;
1014 1085 self->offsets = NULL;
1015 1086 self->nt = NULL;
1016 1087 self->ntlength = self->ntcapacity = 0;
1017 1088 self->ntdepth = self->ntsplits = 0;
1018 1089 self->ntlookups = self->ntmisses = 0;
1019 1090 self->ntrev = -1;
1020 1091 Py_INCREF(self->data);
1021 1092
1022 1093 if (self->inlined) {
1023 1094 long len = inline_scan(self, NULL);
1024 1095 if (len == -1)
1025 1096 goto bail;
1026 1097 self->raw_length = len;
1027 1098 self->length = len + 1;
1028 1099 } else {
1029 1100 if (size % 64) {
1030 1101 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1031 1102 goto bail;
1032 1103 }
1033 1104 self->raw_length = size / 64;
1034 1105 self->length = self->raw_length + 1;
1035 1106 }
1036 1107
1037 1108 return 0;
1038 1109 bail:
1039 1110 return -1;
1040 1111 }
1041 1112
1042 1113 static PyObject *index_nodemap(indexObject *self)
1043 1114 {
1044 1115 Py_INCREF(self);
1045 1116 return (PyObject *)self;
1046 1117 }
1047 1118
1048 1119 static void index_dealloc(indexObject *self)
1049 1120 {
1050 1121 _index_clearcaches(self);
1051 1122 Py_DECREF(self->data);
1052 1123 Py_XDECREF(self->added);
1053 1124 PyObject_Del(self);
1054 1125 }
1055 1126
1056 1127 static PySequenceMethods index_sequence_methods = {
1057 1128 (lenfunc)index_length, /* sq_length */
1058 1129 0, /* sq_concat */
1059 1130 0, /* sq_repeat */
1060 1131 (ssizeargfunc)index_get, /* sq_item */
1061 1132 0, /* sq_slice */
1062 1133 0, /* sq_ass_item */
1063 1134 0, /* sq_ass_slice */
1064 1135 (objobjproc)index_contains, /* sq_contains */
1065 1136 };
1066 1137
1067 1138 static PyMappingMethods index_mapping_methods = {
1068 1139 (lenfunc)index_length, /* mp_length */
1069 1140 (binaryfunc)index_getitem, /* mp_subscript */
1070 1141 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
1071 1142 };
1072 1143
1073 1144 static PyMethodDef index_methods[] = {
1074 1145 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
1075 1146 "clear the index caches"},
1076 1147 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1077 1148 "get an index entry"},
1078 1149 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1079 1150 "insert an index entry"},
1151 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
1152 "match a potentially ambiguous node ID"},
1080 1153 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1081 1154 "stats for the index"},
1082 1155 {NULL} /* Sentinel */
1083 1156 };
1084 1157
1085 1158 static PyGetSetDef index_getset[] = {
1086 1159 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
1087 1160 {NULL} /* Sentinel */
1088 1161 };
1089 1162
1090 1163 static PyTypeObject indexType = {
1091 1164 PyObject_HEAD_INIT(NULL)
1092 1165 0, /* ob_size */
1093 1166 "parsers.index", /* tp_name */
1094 1167 sizeof(indexObject), /* tp_basicsize */
1095 1168 0, /* tp_itemsize */
1096 1169 (destructor)index_dealloc, /* tp_dealloc */
1097 1170 0, /* tp_print */
1098 1171 0, /* tp_getattr */
1099 1172 0, /* tp_setattr */
1100 1173 0, /* tp_compare */
1101 1174 0, /* tp_repr */
1102 1175 0, /* tp_as_number */
1103 1176 &index_sequence_methods, /* tp_as_sequence */
1104 1177 &index_mapping_methods, /* tp_as_mapping */
1105 1178 0, /* tp_hash */
1106 1179 0, /* tp_call */
1107 1180 0, /* tp_str */
1108 1181 0, /* tp_getattro */
1109 1182 0, /* tp_setattro */
1110 1183 0, /* tp_as_buffer */
1111 1184 Py_TPFLAGS_DEFAULT, /* tp_flags */
1112 1185 "revlog index", /* tp_doc */
1113 1186 0, /* tp_traverse */
1114 1187 0, /* tp_clear */
1115 1188 0, /* tp_richcompare */
1116 1189 0, /* tp_weaklistoffset */
1117 1190 0, /* tp_iter */
1118 1191 0, /* tp_iternext */
1119 1192 index_methods, /* tp_methods */
1120 1193 0, /* tp_members */
1121 1194 index_getset, /* tp_getset */
1122 1195 0, /* tp_base */
1123 1196 0, /* tp_dict */
1124 1197 0, /* tp_descr_get */
1125 1198 0, /* tp_descr_set */
1126 1199 0, /* tp_dictoffset */
1127 1200 (initproc)index_init, /* tp_init */
1128 1201 0, /* tp_alloc */
1129 1202 };
1130 1203
1131 1204 /*
1132 1205 * returns a tuple of the form (index, index, cache) with elements as
1133 1206 * follows:
1134 1207 *
1135 1208 * index: an index object that lazily parses RevlogNG records
1136 1209 * cache: if data is inlined, a tuple (index_file_content, 0), else None
1137 1210 *
1138 1211 * added complications are for backwards compatibility
1139 1212 */
1140 1213 static PyObject *parse_index2(PyObject *self, PyObject *args)
1141 1214 {
1142 1215 PyObject *tuple = NULL, *cache = NULL;
1143 1216 indexObject *idx;
1144 1217 int ret;
1145 1218
1146 1219 idx = PyObject_New(indexObject, &indexType);
1147 1220 if (idx == NULL)
1148 1221 goto bail;
1149 1222
1150 1223 ret = index_init(idx, args);
1151 1224 if (ret == -1)
1152 1225 goto bail;
1153 1226
1154 1227 if (idx->inlined) {
1155 1228 cache = Py_BuildValue("iO", 0, idx->data);
1156 1229 if (cache == NULL)
1157 1230 goto bail;
1158 1231 } else {
1159 1232 cache = Py_None;
1160 1233 Py_INCREF(cache);
1161 1234 }
1162 1235
1163 1236 tuple = Py_BuildValue("NN", idx, cache);
1164 1237 if (!tuple)
1165 1238 goto bail;
1166 1239 return tuple;
1167 1240
1168 1241 bail:
1169 1242 Py_XDECREF(idx);
1170 1243 Py_XDECREF(cache);
1171 1244 Py_XDECREF(tuple);
1172 1245 return NULL;
1173 1246 }
1174 1247
1175 1248 static char parsers_doc[] = "Efficient content parsing.";
1176 1249
1177 1250 static PyMethodDef methods[] = {
1178 1251 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
1179 1252 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1180 1253 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
1181 1254 {NULL, NULL}
1182 1255 };
1183 1256
1184 1257 static void module_init(PyObject *mod)
1185 1258 {
1186 1259 indexType.tp_new = PyType_GenericNew;
1187 1260 if (PyType_Ready(&indexType) < 0)
1188 1261 return;
1189 1262 Py_INCREF(&indexType);
1190 1263
1191 1264 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
1192 1265
1193 1266 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
1194 1267 -1, -1, -1, -1, nullid, 20);
1195 1268 if (nullentry)
1196 1269 PyObject_GC_UnTrack(nullentry);
1197 1270 }
1198 1271
1199 1272 #ifdef IS_PY3K
1200 1273 static struct PyModuleDef parsers_module = {
1201 1274 PyModuleDef_HEAD_INIT,
1202 1275 "parsers",
1203 1276 parsers_doc,
1204 1277 -1,
1205 1278 methods
1206 1279 };
1207 1280
1208 1281 PyMODINIT_FUNC PyInit_parsers(void)
1209 1282 {
1210 1283 PyObject *mod = PyModule_Create(&parsers_module);
1211 1284 module_init(mod);
1212 1285 return mod;
1213 1286 }
1214 1287 #else
1215 1288 PyMODINIT_FUNC initparsers(void)
1216 1289 {
1217 1290 PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc);
1218 1291 module_init(mod);
1219 1292 }
1220 1293 #endif
@@ -1,1308 +1,1317 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from node import bin, hex, nullid, nullrev
16 16 from i18n import _
17 17 import ancestor, mdiff, parsers, error, util, dagutil
18 18 import struct, zlib, errno
19 19
20 20 _pack = struct.pack
21 21 _unpack = struct.unpack
22 22 _compress = zlib.compress
23 23 _decompress = zlib.decompress
24 24 _sha = util.sha1
25 25
26 26 # revlog header flags
27 27 REVLOGV0 = 0
28 28 REVLOGNG = 1
29 29 REVLOGNGINLINEDATA = (1 << 16)
30 30 REVLOGGENERALDELTA = (1 << 17)
31 31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35 35
36 36 # revlog index flags
37 37 REVIDX_KNOWN_FLAGS = 0
38 38
39 39 # max size of revlog with inline data
40 40 _maxinline = 131072
41 41 _chunksize = 1048576
42 42
43 43 RevlogError = error.RevlogError
44 44 LookupError = error.LookupError
45 45
46 46 def getoffset(q):
47 47 return int(q >> 16)
48 48
49 49 def gettype(q):
50 50 return int(q & 0xFFFF)
51 51
52 52 def offset_type(offset, type):
53 53 return long(long(offset) << 16 | type)
54 54
55 55 nullhash = _sha(nullid)
56 56
57 57 def hash(text, p1, p2):
58 58 """generate a hash from the given text and its parent hashes
59 59
60 60 This hash combines both the current file contents and its history
61 61 in a manner that makes it easy to distinguish nodes with the same
62 62 content in the revision graph.
63 63 """
64 64 # As of now, if one of the parent node is null, p2 is null
65 65 if p2 == nullid:
66 66 # deep copy of a hash is faster than creating one
67 67 s = nullhash.copy()
68 68 s.update(p1)
69 69 else:
70 70 # none of the parent nodes are nullid
71 71 l = [p1, p2]
72 72 l.sort()
73 73 s = _sha(l[0])
74 74 s.update(l[1])
75 75 s.update(text)
76 76 return s.digest()
77 77
78 78 def compress(text):
79 79 """ generate a possibly-compressed representation of text """
80 80 if not text:
81 81 return ("", text)
82 82 l = len(text)
83 83 bin = None
84 84 if l < 44:
85 85 pass
86 86 elif l > 1000000:
87 87 # zlib makes an internal copy, thus doubling memory usage for
88 88 # large files, so lets do this in pieces
89 89 z = zlib.compressobj()
90 90 p = []
91 91 pos = 0
92 92 while pos < l:
93 93 pos2 = pos + 2**20
94 94 p.append(z.compress(text[pos:pos2]))
95 95 pos = pos2
96 96 p.append(z.flush())
97 97 if sum(map(len, p)) < l:
98 98 bin = "".join(p)
99 99 else:
100 100 bin = _compress(text)
101 101 if bin is None or len(bin) > l:
102 102 if text[0] == '\0':
103 103 return ("", text)
104 104 return ('u', text)
105 105 return ("", bin)
106 106
107 107 def decompress(bin):
108 108 """ decompress the given input """
109 109 if not bin:
110 110 return bin
111 111 t = bin[0]
112 112 if t == '\0':
113 113 return bin
114 114 if t == 'x':
115 115 return _decompress(bin)
116 116 if t == 'u':
117 117 return bin[1:]
118 118 raise RevlogError(_("unknown compression type %r") % t)
119 119
120 120 indexformatv0 = ">4l20s20s20s"
121 121 v0shaoffset = 56
122 122
123 123 class revlogoldio(object):
124 124 def __init__(self):
125 125 self.size = struct.calcsize(indexformatv0)
126 126
127 127 def parseindex(self, data, inline):
128 128 s = self.size
129 129 index = []
130 130 nodemap = {nullid: nullrev}
131 131 n = off = 0
132 132 l = len(data)
133 133 while off + s <= l:
134 134 cur = data[off:off + s]
135 135 off += s
136 136 e = _unpack(indexformatv0, cur)
137 137 # transform to revlogv1 format
138 138 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
139 139 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
140 140 index.append(e2)
141 141 nodemap[e[6]] = n
142 142 n += 1
143 143
144 144 # add the magic null revision at -1
145 145 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
146 146
147 147 return index, nodemap, None
148 148
149 149 def packentry(self, entry, node, version, rev):
150 150 if gettype(entry[0]):
151 151 raise RevlogError(_("index entry flags need RevlogNG"))
152 152 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
153 153 node(entry[5]), node(entry[6]), entry[7])
154 154 return _pack(indexformatv0, *e2)
155 155
156 156 # index ng:
157 157 # 6 bytes: offset
158 158 # 2 bytes: flags
159 159 # 4 bytes: compressed length
160 160 # 4 bytes: uncompressed length
161 161 # 4 bytes: base rev
162 162 # 4 bytes: link rev
163 163 # 4 bytes: parent 1 rev
164 164 # 4 bytes: parent 2 rev
165 165 # 32 bytes: nodeid
166 166 indexformatng = ">Qiiiiii20s12x"
167 167 ngshaoffset = 32
168 168 versionformat = ">I"
169 169
170 170 class revlogio(object):
171 171 def __init__(self):
172 172 self.size = struct.calcsize(indexformatng)
173 173
174 174 def parseindex(self, data, inline):
175 175 # call the C implementation to parse the index data
176 176 index, cache = parsers.parse_index2(data, inline)
177 177 return index, getattr(index, 'nodemap', None), cache
178 178
179 179 def packentry(self, entry, node, version, rev):
180 180 p = _pack(indexformatng, *entry)
181 181 if rev == 0:
182 182 p = _pack(versionformat, version) + p[4:]
183 183 return p
184 184
185 185 class revlog(object):
186 186 """
187 187 the underlying revision storage object
188 188
189 189 A revlog consists of two parts, an index and the revision data.
190 190
191 191 The index is a file with a fixed record size containing
192 192 information on each revision, including its nodeid (hash), the
193 193 nodeids of its parents, the position and offset of its data within
194 194 the data file, and the revision it's based on. Finally, each entry
195 195 contains a linkrev entry that can serve as a pointer to external
196 196 data.
197 197
198 198 The revision data itself is a linear collection of data chunks.
199 199 Each chunk represents a revision and is usually represented as a
200 200 delta against the previous chunk. To bound lookup time, runs of
201 201 deltas are limited to about 2 times the length of the original
202 202 version data. This makes retrieval of a version proportional to
203 203 its size, or O(1) relative to the number of revisions.
204 204
205 205 Both pieces of the revlog are written to in an append-only
206 206 fashion, which means we never need to rewrite a file to insert or
207 207 remove data, and can use some simple techniques to avoid the need
208 208 for locking while reading.
209 209 """
210 210 def __init__(self, opener, indexfile):
211 211 """
212 212 create a revlog object
213 213
214 214 opener is a function that abstracts the file opening operation
215 215 and can be used to implement COW semantics or the like.
216 216 """
217 217 self.indexfile = indexfile
218 218 self.datafile = indexfile[:-2] + ".d"
219 219 self.opener = opener
220 220 self._cache = None
221 221 self._basecache = (0, 0)
222 222 self._chunkcache = (0, '')
223 223 self.index = []
224 224 self._pcache = {}
225 225 self._nodecache = {nullid: nullrev}
226 226 self._nodepos = None
227 227
228 228 v = REVLOG_DEFAULT_VERSION
229 229 opts = getattr(opener, 'options', None)
230 230 if opts is not None:
231 231 if 'revlogv1' in opts:
232 232 if 'generaldelta' in opts:
233 233 v |= REVLOGGENERALDELTA
234 234 else:
235 235 v = 0
236 236
237 237 i = ''
238 238 self._initempty = True
239 239 try:
240 240 f = self.opener(self.indexfile)
241 241 i = f.read()
242 242 f.close()
243 243 if len(i) > 0:
244 244 v = struct.unpack(versionformat, i[:4])[0]
245 245 self._initempty = False
246 246 except IOError, inst:
247 247 if inst.errno != errno.ENOENT:
248 248 raise
249 249
250 250 self.version = v
251 251 self._inline = v & REVLOGNGINLINEDATA
252 252 self._generaldelta = v & REVLOGGENERALDELTA
253 253 flags = v & ~0xFFFF
254 254 fmt = v & 0xFFFF
255 255 if fmt == REVLOGV0 and flags:
256 256 raise RevlogError(_("index %s unknown flags %#04x for format v0")
257 257 % (self.indexfile, flags >> 16))
258 258 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
259 259 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
260 260 % (self.indexfile, flags >> 16))
261 261 elif fmt > REVLOGNG:
262 262 raise RevlogError(_("index %s unknown format %d")
263 263 % (self.indexfile, fmt))
264 264
265 265 self._io = revlogio()
266 266 if self.version == REVLOGV0:
267 267 self._io = revlogoldio()
268 268 try:
269 269 d = self._io.parseindex(i, self._inline)
270 270 except (ValueError, IndexError):
271 271 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
272 272 self.index, nodemap, self._chunkcache = d
273 273 if nodemap is not None:
274 274 self.nodemap = self._nodecache = nodemap
275 275 if not self._chunkcache:
276 276 self._chunkclear()
277 277
278 278 def tip(self):
279 279 return self.node(len(self.index) - 2)
280 280 def __len__(self):
281 281 return len(self.index) - 1
282 282 def __iter__(self):
283 283 for i in xrange(len(self)):
284 284 yield i
285 285
286 286 @util.propertycache
287 287 def nodemap(self):
288 288 self.rev(self.node(0))
289 289 return self._nodecache
290 290
291 291 def hasnode(self, node):
292 292 try:
293 293 self.rev(node)
294 294 return True
295 295 except KeyError:
296 296 return False
297 297
298 298 def clearcaches(self):
299 299 try:
300 300 self._nodecache.clearcaches()
301 301 except AttributeError:
302 302 self._nodecache = {nullid: nullrev}
303 303 self._nodepos = None
304 304
305 305 def rev(self, node):
306 306 try:
307 307 return self._nodecache[node]
308 308 except RevlogError:
309 309 # parsers.c radix tree lookup failed
310 310 raise LookupError(node, self.indexfile, _('no node'))
311 311 except KeyError:
312 312 # pure python cache lookup failed
313 313 n = self._nodecache
314 314 i = self.index
315 315 p = self._nodepos
316 316 if p is None:
317 317 p = len(i) - 2
318 318 for r in xrange(p, -1, -1):
319 319 v = i[r][7]
320 320 n[v] = r
321 321 if v == node:
322 322 self._nodepos = r - 1
323 323 return r
324 324 raise LookupError(node, self.indexfile, _('no node'))
325 325
326 326 def node(self, rev):
327 327 return self.index[rev][7]
328 328 def linkrev(self, rev):
329 329 return self.index[rev][4]
330 330 def parents(self, node):
331 331 i = self.index
332 332 d = i[self.rev(node)]
333 333 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
334 334 def parentrevs(self, rev):
335 335 return self.index[rev][5:7]
336 336 def start(self, rev):
337 337 return int(self.index[rev][0] >> 16)
338 338 def end(self, rev):
339 339 return self.start(rev) + self.length(rev)
340 340 def length(self, rev):
341 341 return self.index[rev][1]
342 342 def chainbase(self, rev):
343 343 index = self.index
344 344 base = index[rev][3]
345 345 while base != rev:
346 346 rev = base
347 347 base = index[rev][3]
348 348 return base
349 349 def flags(self, rev):
350 350 return self.index[rev][0] & 0xFFFF
351 351 def rawsize(self, rev):
352 352 """return the length of the uncompressed text for a given revision"""
353 353 l = self.index[rev][2]
354 354 if l >= 0:
355 355 return l
356 356
357 357 t = self.revision(self.node(rev))
358 358 return len(t)
359 359 size = rawsize
360 360
361 361 def reachable(self, node, stop=None):
362 362 """return the set of all nodes ancestral to a given node, including
363 363 the node itself, stopping when stop is matched"""
364 364 reachable = set((node,))
365 365 visit = [node]
366 366 if stop:
367 367 stopn = self.rev(stop)
368 368 else:
369 369 stopn = 0
370 370 while visit:
371 371 n = visit.pop(0)
372 372 if n == stop:
373 373 continue
374 374 if n == nullid:
375 375 continue
376 376 for p in self.parents(n):
377 377 if self.rev(p) < stopn:
378 378 continue
379 379 if p not in reachable:
380 380 reachable.add(p)
381 381 visit.append(p)
382 382 return reachable
383 383
384 384 def ancestors(self, *revs):
385 385 """Generate the ancestors of 'revs' in reverse topological order.
386 386
387 387 Yield a sequence of revision numbers starting with the parents
388 388 of each revision in revs, i.e., each revision is *not* considered
389 389 an ancestor of itself. Results are in breadth-first order:
390 390 parents of each rev in revs, then parents of those, etc. Result
391 391 does not include the null revision."""
392 392 visit = list(revs)
393 393 seen = set([nullrev])
394 394 while visit:
395 395 for parent in self.parentrevs(visit.pop(0)):
396 396 if parent not in seen:
397 397 visit.append(parent)
398 398 seen.add(parent)
399 399 yield parent
400 400
401 401 def descendants(self, *revs):
402 402 """Generate the descendants of 'revs' in revision order.
403 403
404 404 Yield a sequence of revision numbers starting with a child of
405 405 some rev in revs, i.e., each revision is *not* considered a
406 406 descendant of itself. Results are ordered by revision number (a
407 407 topological sort)."""
408 408 first = min(revs)
409 409 if first == nullrev:
410 410 for i in self:
411 411 yield i
412 412 return
413 413
414 414 seen = set(revs)
415 415 for i in xrange(first + 1, len(self)):
416 416 for x in self.parentrevs(i):
417 417 if x != nullrev and x in seen:
418 418 seen.add(i)
419 419 yield i
420 420 break
421 421
422 422 def findcommonmissing(self, common=None, heads=None):
423 423 """Return a tuple of the ancestors of common and the ancestors of heads
424 424 that are not ancestors of common. In revset terminology, we return the
425 425 tuple:
426 426
427 427 ::common, (::heads) - (::common)
428 428
429 429 The list is sorted by revision number, meaning it is
430 430 topologically sorted.
431 431
432 432 'heads' and 'common' are both lists of node IDs. If heads is
433 433 not supplied, uses all of the revlog's heads. If common is not
434 434 supplied, uses nullid."""
435 435 if common is None:
436 436 common = [nullid]
437 437 if heads is None:
438 438 heads = self.heads()
439 439
440 440 common = [self.rev(n) for n in common]
441 441 heads = [self.rev(n) for n in heads]
442 442
443 443 # we want the ancestors, but inclusive
444 444 has = set(self.ancestors(*common))
445 445 has.add(nullrev)
446 446 has.update(common)
447 447
448 448 # take all ancestors from heads that aren't in has
449 449 missing = set()
450 450 visit = [r for r in heads if r not in has]
451 451 while visit:
452 452 r = visit.pop(0)
453 453 if r in missing:
454 454 continue
455 455 else:
456 456 missing.add(r)
457 457 for p in self.parentrevs(r):
458 458 if p not in has:
459 459 visit.append(p)
460 460 missing = list(missing)
461 461 missing.sort()
462 462 return has, [self.node(r) for r in missing]
463 463
464 464 def findmissing(self, common=None, heads=None):
465 465 """Return the ancestors of heads that are not ancestors of common.
466 466
467 467 More specifically, return a list of nodes N such that every N
468 468 satisfies the following constraints:
469 469
470 470 1. N is an ancestor of some node in 'heads'
471 471 2. N is not an ancestor of any node in 'common'
472 472
473 473 The list is sorted by revision number, meaning it is
474 474 topologically sorted.
475 475
476 476 'heads' and 'common' are both lists of node IDs. If heads is
477 477 not supplied, uses all of the revlog's heads. If common is not
478 478 supplied, uses nullid."""
479 479 _common, missing = self.findcommonmissing(common, heads)
480 480 return missing
481 481
482 482 def nodesbetween(self, roots=None, heads=None):
483 483 """Return a topological path from 'roots' to 'heads'.
484 484
485 485 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
486 486 topologically sorted list of all nodes N that satisfy both of
487 487 these constraints:
488 488
489 489 1. N is a descendant of some node in 'roots'
490 490 2. N is an ancestor of some node in 'heads'
491 491
492 492 Every node is considered to be both a descendant and an ancestor
493 493 of itself, so every reachable node in 'roots' and 'heads' will be
494 494 included in 'nodes'.
495 495
496 496 'outroots' is the list of reachable nodes in 'roots', i.e., the
497 497 subset of 'roots' that is returned in 'nodes'. Likewise,
498 498 'outheads' is the subset of 'heads' that is also in 'nodes'.
499 499
500 500 'roots' and 'heads' are both lists of node IDs. If 'roots' is
501 501 unspecified, uses nullid as the only root. If 'heads' is
502 502 unspecified, uses list of all of the revlog's heads."""
503 503 nonodes = ([], [], [])
504 504 if roots is not None:
505 505 roots = list(roots)
506 506 if not roots:
507 507 return nonodes
508 508 lowestrev = min([self.rev(n) for n in roots])
509 509 else:
510 510 roots = [nullid] # Everybody's a descendant of nullid
511 511 lowestrev = nullrev
512 512 if (lowestrev == nullrev) and (heads is None):
513 513 # We want _all_ the nodes!
514 514 return ([self.node(r) for r in self], [nullid], list(self.heads()))
515 515 if heads is None:
516 516 # All nodes are ancestors, so the latest ancestor is the last
517 517 # node.
518 518 highestrev = len(self) - 1
519 519 # Set ancestors to None to signal that every node is an ancestor.
520 520 ancestors = None
521 521 # Set heads to an empty dictionary for later discovery of heads
522 522 heads = {}
523 523 else:
524 524 heads = list(heads)
525 525 if not heads:
526 526 return nonodes
527 527 ancestors = set()
528 528 # Turn heads into a dictionary so we can remove 'fake' heads.
529 529 # Also, later we will be using it to filter out the heads we can't
530 530 # find from roots.
531 531 heads = dict.fromkeys(heads, False)
532 532 # Start at the top and keep marking parents until we're done.
533 533 nodestotag = set(heads)
534 534 # Remember where the top was so we can use it as a limit later.
535 535 highestrev = max([self.rev(n) for n in nodestotag])
536 536 while nodestotag:
537 537 # grab a node to tag
538 538 n = nodestotag.pop()
539 539 # Never tag nullid
540 540 if n == nullid:
541 541 continue
542 542 # A node's revision number represents its place in a
543 543 # topologically sorted list of nodes.
544 544 r = self.rev(n)
545 545 if r >= lowestrev:
546 546 if n not in ancestors:
547 547 # If we are possibly a descendant of one of the roots
548 548 # and we haven't already been marked as an ancestor
549 549 ancestors.add(n) # Mark as ancestor
550 550 # Add non-nullid parents to list of nodes to tag.
551 551 nodestotag.update([p for p in self.parents(n) if
552 552 p != nullid])
553 553 elif n in heads: # We've seen it before, is it a fake head?
554 554 # So it is, real heads should not be the ancestors of
555 555 # any other heads.
556 556 heads.pop(n)
557 557 if not ancestors:
558 558 return nonodes
559 559 # Now that we have our set of ancestors, we want to remove any
560 560 # roots that are not ancestors.
561 561
562 562 # If one of the roots was nullid, everything is included anyway.
563 563 if lowestrev > nullrev:
564 564 # But, since we weren't, let's recompute the lowest rev to not
565 565 # include roots that aren't ancestors.
566 566
567 567 # Filter out roots that aren't ancestors of heads
568 568 roots = [n for n in roots if n in ancestors]
569 569 # Recompute the lowest revision
570 570 if roots:
571 571 lowestrev = min([self.rev(n) for n in roots])
572 572 else:
573 573 # No more roots? Return empty list
574 574 return nonodes
575 575 else:
576 576 # We are descending from nullid, and don't need to care about
577 577 # any other roots.
578 578 lowestrev = nullrev
579 579 roots = [nullid]
580 580 # Transform our roots list into a set.
581 581 descendants = set(roots)
582 582 # Also, keep the original roots so we can filter out roots that aren't
583 583 # 'real' roots (i.e. are descended from other roots).
584 584 roots = descendants.copy()
585 585 # Our topologically sorted list of output nodes.
586 586 orderedout = []
587 587 # Don't start at nullid since we don't want nullid in our output list,
588 588 # and if nullid shows up in descedents, empty parents will look like
589 589 # they're descendants.
590 590 for r in xrange(max(lowestrev, 0), highestrev + 1):
591 591 n = self.node(r)
592 592 isdescendant = False
593 593 if lowestrev == nullrev: # Everybody is a descendant of nullid
594 594 isdescendant = True
595 595 elif n in descendants:
596 596 # n is already a descendant
597 597 isdescendant = True
598 598 # This check only needs to be done here because all the roots
599 599 # will start being marked is descendants before the loop.
600 600 if n in roots:
601 601 # If n was a root, check if it's a 'real' root.
602 602 p = tuple(self.parents(n))
603 603 # If any of its parents are descendants, it's not a root.
604 604 if (p[0] in descendants) or (p[1] in descendants):
605 605 roots.remove(n)
606 606 else:
607 607 p = tuple(self.parents(n))
608 608 # A node is a descendant if either of its parents are
609 609 # descendants. (We seeded the dependents list with the roots
610 610 # up there, remember?)
611 611 if (p[0] in descendants) or (p[1] in descendants):
612 612 descendants.add(n)
613 613 isdescendant = True
614 614 if isdescendant and ((ancestors is None) or (n in ancestors)):
615 615 # Only include nodes that are both descendants and ancestors.
616 616 orderedout.append(n)
617 617 if (ancestors is not None) and (n in heads):
618 618 # We're trying to figure out which heads are reachable
619 619 # from roots.
620 620 # Mark this head as having been reached
621 621 heads[n] = True
622 622 elif ancestors is None:
623 623 # Otherwise, we're trying to discover the heads.
624 624 # Assume this is a head because if it isn't, the next step
625 625 # will eventually remove it.
626 626 heads[n] = True
627 627 # But, obviously its parents aren't.
628 628 for p in self.parents(n):
629 629 heads.pop(p, None)
630 630 heads = [n for n, flag in heads.iteritems() if flag]
631 631 roots = list(roots)
632 632 assert orderedout
633 633 assert roots
634 634 assert heads
635 635 return (orderedout, roots, heads)
636 636
637 637 def headrevs(self):
638 638 count = len(self)
639 639 if not count:
640 640 return [nullrev]
641 641 ishead = [1] * (count + 1)
642 642 index = self.index
643 643 for r in xrange(count):
644 644 e = index[r]
645 645 ishead[e[5]] = ishead[e[6]] = 0
646 646 return [r for r in xrange(count) if ishead[r]]
647 647
648 648 def heads(self, start=None, stop=None):
649 649 """return the list of all nodes that have no children
650 650
651 651 if start is specified, only heads that are descendants of
652 652 start will be returned
653 653 if stop is specified, it will consider all the revs from stop
654 654 as if they had no children
655 655 """
656 656 if start is None and stop is None:
657 657 if not len(self):
658 658 return [nullid]
659 659 return [self.node(r) for r in self.headrevs()]
660 660
661 661 if start is None:
662 662 start = nullid
663 663 if stop is None:
664 664 stop = []
665 665 stoprevs = set([self.rev(n) for n in stop])
666 666 startrev = self.rev(start)
667 667 reachable = set((startrev,))
668 668 heads = set((startrev,))
669 669
670 670 parentrevs = self.parentrevs
671 671 for r in xrange(startrev + 1, len(self)):
672 672 for p in parentrevs(r):
673 673 if p in reachable:
674 674 if r not in stoprevs:
675 675 reachable.add(r)
676 676 heads.add(r)
677 677 if p in heads and p not in stoprevs:
678 678 heads.remove(p)
679 679
680 680 return [self.node(r) for r in heads]
681 681
682 682 def children(self, node):
683 683 """find the children of a given node"""
684 684 c = []
685 685 p = self.rev(node)
686 686 for r in range(p + 1, len(self)):
687 687 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
688 688 if prevs:
689 689 for pr in prevs:
690 690 if pr == p:
691 691 c.append(self.node(r))
692 692 elif p == nullrev:
693 693 c.append(self.node(r))
694 694 return c
695 695
696 696 def descendant(self, start, end):
697 697 if start == nullrev:
698 698 return True
699 699 for i in self.descendants(start):
700 700 if i == end:
701 701 return True
702 702 elif i > end:
703 703 break
704 704 return False
705 705
706 706 def ancestor(self, a, b):
707 707 """calculate the least common ancestor of nodes a and b"""
708 708
709 709 # fast path, check if it is a descendant
710 710 a, b = self.rev(a), self.rev(b)
711 711 start, end = sorted((a, b))
712 712 if self.descendant(start, end):
713 713 return self.node(start)
714 714
715 715 def parents(rev):
716 716 return [p for p in self.parentrevs(rev) if p != nullrev]
717 717
718 718 c = ancestor.ancestor(a, b, parents)
719 719 if c is None:
720 720 return nullid
721 721
722 722 return self.node(c)
723 723
724 724 def _match(self, id):
725 725 if isinstance(id, (long, int)):
726 726 # rev
727 727 return self.node(id)
728 728 if len(id) == 20:
729 729 # possibly a binary node
730 730 # odds of a binary node being all hex in ASCII are 1 in 10**25
731 731 try:
732 732 node = id
733 733 self.rev(node) # quick search the index
734 734 return node
735 735 except LookupError:
736 736 pass # may be partial hex id
737 737 try:
738 738 # str(rev)
739 739 rev = int(id)
740 740 if str(rev) != id:
741 741 raise ValueError
742 742 if rev < 0:
743 743 rev = len(self) + rev
744 744 if rev < 0 or rev >= len(self):
745 745 raise ValueError
746 746 return self.node(rev)
747 747 except (ValueError, OverflowError):
748 748 pass
749 749 if len(id) == 40:
750 750 try:
751 751 # a full hex nodeid?
752 752 node = bin(id)
753 753 self.rev(node)
754 754 return node
755 755 except (TypeError, LookupError):
756 756 pass
757 757
758 758 def _partialmatch(self, id):
759 try:
760 return self.index.partialmatch(id)
761 except RevlogError:
762 # parsers.c radix tree lookup gave multiple matches
763 raise LookupError(id, self.indexfile, _("ambiguous identifier"))
764 except (AttributeError, ValueError):
765 # we are pure python, or key was too short to search radix tree
766 pass
767
759 768 if id in self._pcache:
760 769 return self._pcache[id]
761 770
762 771 if len(id) < 40:
763 772 try:
764 773 # hex(node)[:...]
765 774 l = len(id) // 2 # grab an even number of digits
766 775 prefix = bin(id[:l * 2])
767 776 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
768 777 nl = [n for n in nl if hex(n).startswith(id)]
769 778 if len(nl) > 0:
770 779 if len(nl) == 1:
771 780 self._pcache[id] = nl[0]
772 781 return nl[0]
773 782 raise LookupError(id, self.indexfile,
774 783 _('ambiguous identifier'))
775 784 return None
776 785 except TypeError:
777 786 pass
778 787
779 788 def lookup(self, id):
780 789 """locate a node based on:
781 790 - revision number or str(revision number)
782 791 - nodeid or subset of hex nodeid
783 792 """
784 793 n = self._match(id)
785 794 if n is not None:
786 795 return n
787 796 n = self._partialmatch(id)
788 797 if n:
789 798 return n
790 799
791 800 raise LookupError(id, self.indexfile, _('no match found'))
792 801
793 802 def cmp(self, node, text):
794 803 """compare text with a given file revision
795 804
796 805 returns True if text is different than what is stored.
797 806 """
798 807 p1, p2 = self.parents(node)
799 808 return hash(text, p1, p2) != node
800 809
801 810 def _addchunk(self, offset, data):
802 811 o, d = self._chunkcache
803 812 # try to add to existing cache
804 813 if o + len(d) == offset and len(d) + len(data) < _chunksize:
805 814 self._chunkcache = o, d + data
806 815 else:
807 816 self._chunkcache = offset, data
808 817
809 818 def _loadchunk(self, offset, length):
810 819 if self._inline:
811 820 df = self.opener(self.indexfile)
812 821 else:
813 822 df = self.opener(self.datafile)
814 823
815 824 readahead = max(65536, length)
816 825 df.seek(offset)
817 826 d = df.read(readahead)
818 827 df.close()
819 828 self._addchunk(offset, d)
820 829 if readahead > length:
821 830 return util.buffer(d, 0, length)
822 831 return d
823 832
824 833 def _getchunk(self, offset, length):
825 834 o, d = self._chunkcache
826 835 l = len(d)
827 836
828 837 # is it in the cache?
829 838 cachestart = offset - o
830 839 cacheend = cachestart + length
831 840 if cachestart >= 0 and cacheend <= l:
832 841 if cachestart == 0 and cacheend == l:
833 842 return d # avoid a copy
834 843 return util.buffer(d, cachestart, cacheend - cachestart)
835 844
836 845 return self._loadchunk(offset, length)
837 846
838 847 def _chunkraw(self, startrev, endrev):
839 848 start = self.start(startrev)
840 849 length = self.end(endrev) - start
841 850 if self._inline:
842 851 start += (startrev + 1) * self._io.size
843 852 return self._getchunk(start, length)
844 853
845 854 def _chunk(self, rev):
846 855 return decompress(self._chunkraw(rev, rev))
847 856
848 857 def _chunkbase(self, rev):
849 858 return self._chunk(rev)
850 859
851 860 def _chunkclear(self):
852 861 self._chunkcache = (0, '')
853 862
854 863 def deltaparent(self, rev):
855 864 """return deltaparent of the given revision"""
856 865 base = self.index[rev][3]
857 866 if base == rev:
858 867 return nullrev
859 868 elif self._generaldelta:
860 869 return base
861 870 else:
862 871 return rev - 1
863 872
864 873 def revdiff(self, rev1, rev2):
865 874 """return or calculate a delta between two revisions"""
866 875 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
867 876 return str(self._chunk(rev2))
868 877
869 878 return mdiff.textdiff(self.revision(rev1),
870 879 self.revision(rev2))
871 880
872 881 def revision(self, nodeorrev):
873 882 """return an uncompressed revision of a given node or revision
874 883 number.
875 884 """
876 885 if isinstance(nodeorrev, int):
877 886 rev = nodeorrev
878 887 node = self.node(rev)
879 888 else:
880 889 node = nodeorrev
881 890 rev = None
882 891
883 892 cachedrev = None
884 893 if node == nullid:
885 894 return ""
886 895 if self._cache:
887 896 if self._cache[0] == node:
888 897 return self._cache[2]
889 898 cachedrev = self._cache[1]
890 899
891 900 # look up what we need to read
892 901 text = None
893 902 if rev is None:
894 903 rev = self.rev(node)
895 904
896 905 # check rev flags
897 906 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
898 907 raise RevlogError(_('incompatible revision flag %x') %
899 908 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
900 909
901 910 # build delta chain
902 911 chain = []
903 912 index = self.index # for performance
904 913 generaldelta = self._generaldelta
905 914 iterrev = rev
906 915 e = index[iterrev]
907 916 while iterrev != e[3] and iterrev != cachedrev:
908 917 chain.append(iterrev)
909 918 if generaldelta:
910 919 iterrev = e[3]
911 920 else:
912 921 iterrev -= 1
913 922 e = index[iterrev]
914 923 chain.reverse()
915 924 base = iterrev
916 925
917 926 if iterrev == cachedrev:
918 927 # cache hit
919 928 text = self._cache[2]
920 929
921 930 # drop cache to save memory
922 931 self._cache = None
923 932
924 933 self._chunkraw(base, rev)
925 934 if text is None:
926 935 text = str(self._chunkbase(base))
927 936
928 937 bins = [self._chunk(r) for r in chain]
929 938 text = mdiff.patches(text, bins)
930 939
931 940 text = self._checkhash(text, node, rev)
932 941
933 942 self._cache = (node, rev, text)
934 943 return text
935 944
936 945 def _checkhash(self, text, node, rev):
937 946 p1, p2 = self.parents(node)
938 947 if node != hash(text, p1, p2):
939 948 raise RevlogError(_("integrity check failed on %s:%d")
940 949 % (self.indexfile, rev))
941 950 return text
942 951
943 952 def checkinlinesize(self, tr, fp=None):
944 953 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
945 954 return
946 955
947 956 trinfo = tr.find(self.indexfile)
948 957 if trinfo is None:
949 958 raise RevlogError(_("%s not found in the transaction")
950 959 % self.indexfile)
951 960
952 961 trindex = trinfo[2]
953 962 dataoff = self.start(trindex)
954 963
955 964 tr.add(self.datafile, dataoff)
956 965
957 966 if fp:
958 967 fp.flush()
959 968 fp.close()
960 969
961 970 df = self.opener(self.datafile, 'w')
962 971 try:
963 972 for r in self:
964 973 df.write(self._chunkraw(r, r))
965 974 finally:
966 975 df.close()
967 976
968 977 fp = self.opener(self.indexfile, 'w', atomictemp=True)
969 978 self.version &= ~(REVLOGNGINLINEDATA)
970 979 self._inline = False
971 980 for i in self:
972 981 e = self._io.packentry(self.index[i], self.node, self.version, i)
973 982 fp.write(e)
974 983
975 984 # if we don't call close, the temp file will never replace the
976 985 # real index
977 986 fp.close()
978 987
979 988 tr.replace(self.indexfile, trindex * self._io.size)
980 989 self._chunkclear()
981 990
982 991 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
983 992 """add a revision to the log
984 993
985 994 text - the revision data to add
986 995 transaction - the transaction object used for rollback
987 996 link - the linkrev data to add
988 997 p1, p2 - the parent nodeids of the revision
989 998 cachedelta - an optional precomputed delta
990 999 """
991 1000 node = hash(text, p1, p2)
992 1001 if node in self.nodemap:
993 1002 return node
994 1003
995 1004 dfh = None
996 1005 if not self._inline:
997 1006 dfh = self.opener(self.datafile, "a")
998 1007 ifh = self.opener(self.indexfile, "a+")
999 1008 try:
1000 1009 return self._addrevision(node, text, transaction, link, p1, p2,
1001 1010 cachedelta, ifh, dfh)
1002 1011 finally:
1003 1012 if dfh:
1004 1013 dfh.close()
1005 1014 ifh.close()
1006 1015
1007 1016 def _addrevision(self, node, text, transaction, link, p1, p2,
1008 1017 cachedelta, ifh, dfh):
1009 1018 """internal function to add revisions to the log
1010 1019
1011 1020 see addrevision for argument descriptions.
1012 1021 invariants:
1013 1022 - text is optional (can be None); if not set, cachedelta must be set.
1014 1023 if both are set, they must correspond to eachother.
1015 1024 """
1016 1025 btext = [text]
1017 1026 def buildtext():
1018 1027 if btext[0] is not None:
1019 1028 return btext[0]
1020 1029 # flush any pending writes here so we can read it in revision
1021 1030 if dfh:
1022 1031 dfh.flush()
1023 1032 ifh.flush()
1024 1033 basetext = self.revision(self.node(cachedelta[0]))
1025 1034 btext[0] = mdiff.patch(basetext, cachedelta[1])
1026 1035 chk = hash(btext[0], p1, p2)
1027 1036 if chk != node:
1028 1037 raise RevlogError(_("consistency error in delta"))
1029 1038 return btext[0]
1030 1039
1031 1040 def builddelta(rev):
1032 1041 # can we use the cached delta?
1033 1042 if cachedelta and cachedelta[0] == rev:
1034 1043 delta = cachedelta[1]
1035 1044 else:
1036 1045 t = buildtext()
1037 1046 ptext = self.revision(self.node(rev))
1038 1047 delta = mdiff.textdiff(ptext, t)
1039 1048 data = compress(delta)
1040 1049 l = len(data[1]) + len(data[0])
1041 1050 if basecache[0] == rev:
1042 1051 chainbase = basecache[1]
1043 1052 else:
1044 1053 chainbase = self.chainbase(rev)
1045 1054 dist = l + offset - self.start(chainbase)
1046 1055 if self._generaldelta:
1047 1056 base = rev
1048 1057 else:
1049 1058 base = chainbase
1050 1059 return dist, l, data, base, chainbase
1051 1060
1052 1061 curr = len(self)
1053 1062 prev = curr - 1
1054 1063 base = chainbase = curr
1055 1064 offset = self.end(prev)
1056 1065 flags = 0
1057 1066 d = None
1058 1067 basecache = self._basecache
1059 1068 p1r, p2r = self.rev(p1), self.rev(p2)
1060 1069
1061 1070 # should we try to build a delta?
1062 1071 if prev != nullrev:
1063 1072 if self._generaldelta:
1064 1073 if p1r >= basecache[1]:
1065 1074 d = builddelta(p1r)
1066 1075 elif p2r >= basecache[1]:
1067 1076 d = builddelta(p2r)
1068 1077 else:
1069 1078 d = builddelta(prev)
1070 1079 else:
1071 1080 d = builddelta(prev)
1072 1081 dist, l, data, base, chainbase = d
1073 1082
1074 1083 # full versions are inserted when the needed deltas
1075 1084 # become comparable to the uncompressed text
1076 1085 if text is None:
1077 1086 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1078 1087 cachedelta[1])
1079 1088 else:
1080 1089 textlen = len(text)
1081 1090 if d is None or dist > textlen * 2:
1082 1091 text = buildtext()
1083 1092 data = compress(text)
1084 1093 l = len(data[1]) + len(data[0])
1085 1094 base = chainbase = curr
1086 1095
1087 1096 e = (offset_type(offset, flags), l, textlen,
1088 1097 base, link, p1r, p2r, node)
1089 1098 self.index.insert(-1, e)
1090 1099 self.nodemap[node] = curr
1091 1100
1092 1101 entry = self._io.packentry(e, self.node, self.version, curr)
1093 1102 if not self._inline:
1094 1103 transaction.add(self.datafile, offset)
1095 1104 transaction.add(self.indexfile, curr * len(entry))
1096 1105 if data[0]:
1097 1106 dfh.write(data[0])
1098 1107 dfh.write(data[1])
1099 1108 dfh.flush()
1100 1109 ifh.write(entry)
1101 1110 else:
1102 1111 offset += curr * self._io.size
1103 1112 transaction.add(self.indexfile, offset, curr)
1104 1113 ifh.write(entry)
1105 1114 ifh.write(data[0])
1106 1115 ifh.write(data[1])
1107 1116 self.checkinlinesize(transaction, ifh)
1108 1117
1109 1118 if type(text) == str: # only accept immutable objects
1110 1119 self._cache = (node, curr, text)
1111 1120 self._basecache = (curr, chainbase)
1112 1121 return node
1113 1122
1114 1123 def group(self, nodelist, bundler, reorder=None):
1115 1124 """Calculate a delta group, yielding a sequence of changegroup chunks
1116 1125 (strings).
1117 1126
1118 1127 Given a list of changeset revs, return a set of deltas and
1119 1128 metadata corresponding to nodes. The first delta is
1120 1129 first parent(nodelist[0]) -> nodelist[0], the receiver is
1121 1130 guaranteed to have this parent as it has all history before
1122 1131 these changesets. In the case firstparent is nullrev the
1123 1132 changegroup starts with a full revision.
1124 1133 """
1125 1134
1126 1135 # if we don't have any revisions touched by these changesets, bail
1127 1136 if len(nodelist) == 0:
1128 1137 yield bundler.close()
1129 1138 return
1130 1139
1131 1140 # for generaldelta revlogs, we linearize the revs; this will both be
1132 1141 # much quicker and generate a much smaller bundle
1133 1142 if (self._generaldelta and reorder is not False) or reorder:
1134 1143 dag = dagutil.revlogdag(self)
1135 1144 revs = set(self.rev(n) for n in nodelist)
1136 1145 revs = dag.linearize(revs)
1137 1146 else:
1138 1147 revs = sorted([self.rev(n) for n in nodelist])
1139 1148
1140 1149 # add the parent of the first rev
1141 1150 p = self.parentrevs(revs[0])[0]
1142 1151 revs.insert(0, p)
1143 1152
1144 1153 # build deltas
1145 1154 for r in xrange(len(revs) - 1):
1146 1155 prev, curr = revs[r], revs[r + 1]
1147 1156 for c in bundler.revchunk(self, curr, prev):
1148 1157 yield c
1149 1158
1150 1159 yield bundler.close()
1151 1160
1152 1161 def addgroup(self, bundle, linkmapper, transaction):
1153 1162 """
1154 1163 add a delta group
1155 1164
1156 1165 given a set of deltas, add them to the revision log. the
1157 1166 first delta is against its parent, which should be in our
1158 1167 log, the rest are against the previous delta.
1159 1168 """
1160 1169
1161 1170 # track the base of the current delta log
1162 1171 content = []
1163 1172 node = None
1164 1173
1165 1174 r = len(self)
1166 1175 end = 0
1167 1176 if r:
1168 1177 end = self.end(r - 1)
1169 1178 ifh = self.opener(self.indexfile, "a+")
1170 1179 isize = r * self._io.size
1171 1180 if self._inline:
1172 1181 transaction.add(self.indexfile, end + isize, r)
1173 1182 dfh = None
1174 1183 else:
1175 1184 transaction.add(self.indexfile, isize, r)
1176 1185 transaction.add(self.datafile, end)
1177 1186 dfh = self.opener(self.datafile, "a")
1178 1187
1179 1188 try:
1180 1189 # loop through our set of deltas
1181 1190 chain = None
1182 1191 while True:
1183 1192 chunkdata = bundle.deltachunk(chain)
1184 1193 if not chunkdata:
1185 1194 break
1186 1195 node = chunkdata['node']
1187 1196 p1 = chunkdata['p1']
1188 1197 p2 = chunkdata['p2']
1189 1198 cs = chunkdata['cs']
1190 1199 deltabase = chunkdata['deltabase']
1191 1200 delta = chunkdata['delta']
1192 1201
1193 1202 content.append(node)
1194 1203
1195 1204 link = linkmapper(cs)
1196 1205 if node in self.nodemap:
1197 1206 # this can happen if two branches make the same change
1198 1207 chain = node
1199 1208 continue
1200 1209
1201 1210 for p in (p1, p2):
1202 1211 if not p in self.nodemap:
1203 1212 raise LookupError(p, self.indexfile,
1204 1213 _('unknown parent'))
1205 1214
1206 1215 if deltabase not in self.nodemap:
1207 1216 raise LookupError(deltabase, self.indexfile,
1208 1217 _('unknown delta base'))
1209 1218
1210 1219 baserev = self.rev(deltabase)
1211 1220 chain = self._addrevision(node, None, transaction, link,
1212 1221 p1, p2, (baserev, delta), ifh, dfh)
1213 1222 if not dfh and not self._inline:
1214 1223 # addrevision switched from inline to conventional
1215 1224 # reopen the index
1216 1225 ifh.close()
1217 1226 dfh = self.opener(self.datafile, "a")
1218 1227 ifh = self.opener(self.indexfile, "a")
1219 1228 finally:
1220 1229 if dfh:
1221 1230 dfh.close()
1222 1231 ifh.close()
1223 1232
1224 1233 return content
1225 1234
1226 1235 def strip(self, minlink, transaction):
1227 1236 """truncate the revlog on the first revision with a linkrev >= minlink
1228 1237
1229 1238 This function is called when we're stripping revision minlink and
1230 1239 its descendants from the repository.
1231 1240
1232 1241 We have to remove all revisions with linkrev >= minlink, because
1233 1242 the equivalent changelog revisions will be renumbered after the
1234 1243 strip.
1235 1244
1236 1245 So we truncate the revlog on the first of these revisions, and
1237 1246 trust that the caller has saved the revisions that shouldn't be
1238 1247 removed and that it'll re-add them after this truncation.
1239 1248 """
1240 1249 if len(self) == 0:
1241 1250 return
1242 1251
1243 1252 for rev in self:
1244 1253 if self.index[rev][4] >= minlink:
1245 1254 break
1246 1255 else:
1247 1256 return
1248 1257
1249 1258 # first truncate the files on disk
1250 1259 end = self.start(rev)
1251 1260 if not self._inline:
1252 1261 transaction.add(self.datafile, end)
1253 1262 end = rev * self._io.size
1254 1263 else:
1255 1264 end += rev * self._io.size
1256 1265
1257 1266 transaction.add(self.indexfile, end)
1258 1267
1259 1268 # then reset internal state in memory to forget those revisions
1260 1269 self._cache = None
1261 1270 self._chunkclear()
1262 1271 for x in xrange(rev, len(self)):
1263 1272 del self.nodemap[self.node(x)]
1264 1273
1265 1274 del self.index[rev:-1]
1266 1275
1267 1276 def checksize(self):
1268 1277 expected = 0
1269 1278 if len(self):
1270 1279 expected = max(0, self.end(len(self) - 1))
1271 1280
1272 1281 try:
1273 1282 f = self.opener(self.datafile)
1274 1283 f.seek(0, 2)
1275 1284 actual = f.tell()
1276 1285 f.close()
1277 1286 dd = actual - expected
1278 1287 except IOError, inst:
1279 1288 if inst.errno != errno.ENOENT:
1280 1289 raise
1281 1290 dd = 0
1282 1291
1283 1292 try:
1284 1293 f = self.opener(self.indexfile)
1285 1294 f.seek(0, 2)
1286 1295 actual = f.tell()
1287 1296 f.close()
1288 1297 s = self._io.size
1289 1298 i = max(0, actual // s)
1290 1299 di = actual - (i * s)
1291 1300 if self._inline:
1292 1301 databytes = 0
1293 1302 for r in self:
1294 1303 databytes += max(0, self.length(r))
1295 1304 dd = 0
1296 1305 di = actual - len(self) * s - databytes
1297 1306 except IOError, inst:
1298 1307 if inst.errno != errno.ENOENT:
1299 1308 raise
1300 1309 di = 0
1301 1310
1302 1311 return (dd, di)
1303 1312
1304 1313 def files(self):
1305 1314 res = [self.indexfile]
1306 1315 if not self._inline:
1307 1316 res.append(self.datafile)
1308 1317 return res
General Comments 0
You need to be logged in to leave comments. Login now