##// END OF EJS Templates
revlog: switch to a C version of headrevs...
Bryan O'Sullivan -
r16786:2631cd5d default
parent child Browse files
Show More
@@ -1,1291 +1,1368 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #include <Python.h>
11 11 #include <ctype.h>
12 12 #include <string.h>
13 13
14 14 #include "util.h"
15 15
16 16 static inline int hexdigit(const char *p, Py_ssize_t off)
17 17 {
18 18 char c = p[off];
19 19
20 20 if (c >= '0' && c <= '9')
21 21 return c - '0';
22 22 if (c >= 'a' && c <= 'f')
23 23 return c - 'a' + 10;
24 24 if (c >= 'A' && c <= 'F')
25 25 return c - 'A' + 10;
26 26
27 27 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
28 28 return 0;
29 29 }
30 30
31 31 /*
32 32 * Turn a hex-encoded string into binary.
33 33 */
34 34 static PyObject *unhexlify(const char *str, int len)
35 35 {
36 36 PyObject *ret;
37 37 char *d;
38 38 int i;
39 39
40 40 ret = PyBytes_FromStringAndSize(NULL, len / 2);
41 41
42 42 if (!ret)
43 43 return NULL;
44 44
45 45 d = PyBytes_AsString(ret);
46 46
47 47 for (i = 0; i < len;) {
48 48 int hi = hexdigit(str, i++);
49 49 int lo = hexdigit(str, i++);
50 50 *d++ = (hi << 4) | lo;
51 51 }
52 52
53 53 return ret;
54 54 }
55 55
56 56 /*
57 57 * This code assumes that a manifest is stitched together with newline
58 58 * ('\n') characters.
59 59 */
60 60 static PyObject *parse_manifest(PyObject *self, PyObject *args)
61 61 {
62 62 PyObject *mfdict, *fdict;
63 63 char *str, *cur, *start, *zero;
64 64 int len;
65 65
66 66 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
67 67 &PyDict_Type, &mfdict,
68 68 &PyDict_Type, &fdict,
69 69 &str, &len))
70 70 goto quit;
71 71
72 72 for (start = cur = str, zero = NULL; cur < str + len; cur++) {
73 73 PyObject *file = NULL, *node = NULL;
74 74 PyObject *flags = NULL;
75 75 int nlen;
76 76
77 77 if (!*cur) {
78 78 zero = cur;
79 79 continue;
80 80 }
81 81 else if (*cur != '\n')
82 82 continue;
83 83
84 84 if (!zero) {
85 85 PyErr_SetString(PyExc_ValueError,
86 86 "manifest entry has no separator");
87 87 goto quit;
88 88 }
89 89
90 90 file = PyBytes_FromStringAndSize(start, zero - start);
91 91
92 92 if (!file)
93 93 goto bail;
94 94
95 95 nlen = cur - zero - 1;
96 96
97 97 node = unhexlify(zero + 1, nlen > 40 ? 40 : nlen);
98 98 if (!node)
99 99 goto bail;
100 100
101 101 if (nlen > 40) {
102 102 flags = PyBytes_FromStringAndSize(zero + 41,
103 103 nlen - 40);
104 104 if (!flags)
105 105 goto bail;
106 106
107 107 if (PyDict_SetItem(fdict, file, flags) == -1)
108 108 goto bail;
109 109 }
110 110
111 111 if (PyDict_SetItem(mfdict, file, node) == -1)
112 112 goto bail;
113 113
114 114 start = cur + 1;
115 115 zero = NULL;
116 116
117 117 Py_XDECREF(flags);
118 118 Py_XDECREF(node);
119 119 Py_XDECREF(file);
120 120 continue;
121 121 bail:
122 122 Py_XDECREF(flags);
123 123 Py_XDECREF(node);
124 124 Py_XDECREF(file);
125 125 goto quit;
126 126 }
127 127
128 128 if (len > 0 && *(cur - 1) != '\n') {
129 129 PyErr_SetString(PyExc_ValueError,
130 130 "manifest contains trailing garbage");
131 131 goto quit;
132 132 }
133 133
134 134 Py_INCREF(Py_None);
135 135 return Py_None;
136 136 quit:
137 137 return NULL;
138 138 }
139 139
140 140 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
141 141 {
142 142 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
143 143 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
144 144 char *str, *cur, *end, *cpos;
145 145 int state, mode, size, mtime;
146 146 unsigned int flen;
147 147 int len;
148 148
149 149 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
150 150 &PyDict_Type, &dmap,
151 151 &PyDict_Type, &cmap,
152 152 &str, &len))
153 153 goto quit;
154 154
155 155 /* read parents */
156 156 if (len < 40)
157 157 goto quit;
158 158
159 159 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
160 160 if (!parents)
161 161 goto quit;
162 162
163 163 /* read filenames */
164 164 cur = str + 40;
165 165 end = str + len;
166 166
167 167 while (cur < end - 17) {
168 168 /* unpack header */
169 169 state = *cur;
170 170 mode = getbe32(cur + 1);
171 171 size = getbe32(cur + 5);
172 172 mtime = getbe32(cur + 9);
173 173 flen = getbe32(cur + 13);
174 174 cur += 17;
175 175 if (cur + flen > end || cur + flen < cur) {
176 176 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
177 177 goto quit;
178 178 }
179 179
180 180 entry = Py_BuildValue("ciii", state, mode, size, mtime);
181 181 if (!entry)
182 182 goto quit;
183 183 PyObject_GC_UnTrack(entry); /* don't waste time with this */
184 184
185 185 cpos = memchr(cur, 0, flen);
186 186 if (cpos) {
187 187 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
188 188 cname = PyBytes_FromStringAndSize(cpos + 1,
189 189 flen - (cpos - cur) - 1);
190 190 if (!fname || !cname ||
191 191 PyDict_SetItem(cmap, fname, cname) == -1 ||
192 192 PyDict_SetItem(dmap, fname, entry) == -1)
193 193 goto quit;
194 194 Py_DECREF(cname);
195 195 } else {
196 196 fname = PyBytes_FromStringAndSize(cur, flen);
197 197 if (!fname ||
198 198 PyDict_SetItem(dmap, fname, entry) == -1)
199 199 goto quit;
200 200 }
201 201 cur += flen;
202 202 Py_DECREF(fname);
203 203 Py_DECREF(entry);
204 204 fname = cname = entry = NULL;
205 205 }
206 206
207 207 ret = parents;
208 208 Py_INCREF(ret);
209 209 quit:
210 210 Py_XDECREF(fname);
211 211 Py_XDECREF(cname);
212 212 Py_XDECREF(entry);
213 213 Py_XDECREF(parents);
214 214 return ret;
215 215 }
216 216
217 217 /*
218 218 * A base-16 trie for fast node->rev mapping.
219 219 *
220 220 * Positive value is index of the next node in the trie
221 221 * Negative value is a leaf: -(rev + 1)
222 222 * Zero is empty
223 223 */
224 224 typedef struct {
225 225 int children[16];
226 226 } nodetree;
227 227
228 228 /*
229 229 * This class has two behaviours.
230 230 *
231 231 * When used in a list-like way (with integer keys), we decode an
232 232 * entry in a RevlogNG index file on demand. Our last entry is a
233 233 * sentinel, always a nullid. We have limited support for
234 234 * integer-keyed insert and delete, only at elements right before the
235 235 * sentinel.
236 236 *
237 237 * With string keys, we lazily perform a reverse mapping from node to
238 238 * rev, using a base-16 trie.
239 239 */
240 240 typedef struct {
241 241 PyObject_HEAD
242 242 /* Type-specific fields go here. */
243 243 PyObject *data; /* raw bytes of index */
244 244 PyObject **cache; /* cached tuples */
245 245 const char **offsets; /* populated on demand */
246 246 Py_ssize_t raw_length; /* original number of elements */
247 247 Py_ssize_t length; /* current number of elements */
248 248 PyObject *added; /* populated on demand */
249 249 nodetree *nt; /* base-16 trie */
250 250 int ntlength; /* # nodes in use */
251 251 int ntcapacity; /* # nodes allocated */
252 252 int ntdepth; /* maximum depth of tree */
253 253 int ntsplits; /* # splits performed */
254 254 int ntrev; /* last rev scanned */
255 255 int ntlookups; /* # lookups */
256 256 int ntmisses; /* # lookups that miss the cache */
257 257 int inlined;
258 258 } indexObject;
259 259
260 260 static Py_ssize_t index_length(const indexObject *self)
261 261 {
262 262 if (self->added == NULL)
263 263 return self->length;
264 264 return self->length + PyList_GET_SIZE(self->added);
265 265 }
266 266
267 267 static PyObject *nullentry;
268 268 static const char nullid[20];
269 269
270 270 static long inline_scan(indexObject *self, const char **offsets);
271 271
272 272 #if LONG_MAX == 0x7fffffffL
273 273 static char *tuple_format = "Kiiiiiis#";
274 274 #else
275 275 static char *tuple_format = "kiiiiiis#";
276 276 #endif
277 277
278 278 /*
279 279 * Return a pointer to the beginning of a RevlogNG record.
280 280 */
281 281 static const char *index_deref(indexObject *self, Py_ssize_t pos)
282 282 {
283 283 if (self->inlined && pos > 0) {
284 284 if (self->offsets == NULL) {
285 285 self->offsets = malloc(self->raw_length *
286 286 sizeof(*self->offsets));
287 287 if (self->offsets == NULL)
288 288 return (const char *)PyErr_NoMemory();
289 289 inline_scan(self, self->offsets);
290 290 }
291 291 return self->offsets[pos];
292 292 }
293 293
294 294 return PyString_AS_STRING(self->data) + pos * 64;
295 295 }
296 296
297 297 /*
298 298 * RevlogNG format (all in big endian, data may be inlined):
299 299 * 6 bytes: offset
300 300 * 2 bytes: flags
301 301 * 4 bytes: compressed length
302 302 * 4 bytes: uncompressed length
303 303 * 4 bytes: base revision
304 304 * 4 bytes: link revision
305 305 * 4 bytes: parent 1 revision
306 306 * 4 bytes: parent 2 revision
307 307 * 32 bytes: nodeid (only 20 bytes used)
308 308 */
309 309 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
310 310 {
311 311 uint64_t offset_flags;
312 312 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
313 313 const char *c_node_id;
314 314 const char *data;
315 315 Py_ssize_t length = index_length(self);
316 316 PyObject *entry;
317 317
318 318 if (pos < 0)
319 319 pos += length;
320 320
321 321 if (pos < 0 || pos >= length) {
322 322 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
323 323 return NULL;
324 324 }
325 325
326 326 if (pos == length - 1) {
327 327 Py_INCREF(nullentry);
328 328 return nullentry;
329 329 }
330 330
331 331 if (pos >= self->length - 1) {
332 332 PyObject *obj;
333 333 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
334 334 Py_INCREF(obj);
335 335 return obj;
336 336 }
337 337
338 338 if (self->cache) {
339 339 if (self->cache[pos]) {
340 340 Py_INCREF(self->cache[pos]);
341 341 return self->cache[pos];
342 342 }
343 343 } else {
344 344 self->cache = calloc(self->raw_length, sizeof(PyObject *));
345 345 if (self->cache == NULL)
346 346 return PyErr_NoMemory();
347 347 }
348 348
349 349 data = index_deref(self, pos);
350 350 if (data == NULL)
351 351 return NULL;
352 352
353 353 offset_flags = getbe32(data + 4);
354 354 if (pos == 0) /* mask out version number for the first entry */
355 355 offset_flags &= 0xFFFF;
356 356 else {
357 357 uint32_t offset_high = getbe32(data);
358 358 offset_flags |= ((uint64_t)offset_high) << 32;
359 359 }
360 360
361 361 comp_len = getbe32(data + 8);
362 362 uncomp_len = getbe32(data + 12);
363 363 base_rev = getbe32(data + 16);
364 364 link_rev = getbe32(data + 20);
365 365 parent_1 = getbe32(data + 24);
366 366 parent_2 = getbe32(data + 28);
367 367 c_node_id = data + 32;
368 368
369 369 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
370 370 uncomp_len, base_rev, link_rev,
371 371 parent_1, parent_2, c_node_id, 20);
372 372
373 373 if (entry)
374 374 PyObject_GC_UnTrack(entry);
375 375
376 376 self->cache[pos] = entry;
377 377 Py_INCREF(entry);
378 378
379 379 return entry;
380 380 }
381 381
382 382 /*
383 383 * Return the 20-byte SHA of the node corresponding to the given rev.
384 384 */
385 385 static const char *index_node(indexObject *self, Py_ssize_t pos)
386 386 {
387 387 Py_ssize_t length = index_length(self);
388 388 const char *data;
389 389
390 390 if (pos == length - 1 || pos == INT_MAX)
391 391 return nullid;
392 392
393 393 if (pos >= length)
394 394 return NULL;
395 395
396 396 if (pos >= self->length - 1) {
397 397 PyObject *tuple, *str;
398 398 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
399 399 str = PyTuple_GetItem(tuple, 7);
400 400 return str ? PyString_AS_STRING(str) : NULL;
401 401 }
402 402
403 403 data = index_deref(self, pos);
404 404 return data ? data + 32 : NULL;
405 405 }
406 406
407 407 static int nt_insert(indexObject *self, const char *node, int rev);
408 408
409 409 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
410 410 {
411 411 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
412 412 return -1;
413 413 if (*nodelen == 20)
414 414 return 0;
415 415 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
416 416 return -1;
417 417 }
418 418
419 419 static PyObject *index_insert(indexObject *self, PyObject *args)
420 420 {
421 421 PyObject *obj;
422 422 char *node;
423 423 long offset;
424 424 Py_ssize_t len, nodelen;
425 425
426 426 if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
427 427 return NULL;
428 428
429 429 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
430 430 PyErr_SetString(PyExc_TypeError, "8-tuple required");
431 431 return NULL;
432 432 }
433 433
434 434 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
435 435 return NULL;
436 436
437 437 len = index_length(self);
438 438
439 439 if (offset < 0)
440 440 offset += len;
441 441
442 442 if (offset != len - 1) {
443 443 PyErr_SetString(PyExc_IndexError,
444 444 "insert only supported at index -1");
445 445 return NULL;
446 446 }
447 447
448 448 if (offset > INT_MAX) {
449 449 PyErr_SetString(PyExc_ValueError,
450 450 "currently only 2**31 revs supported");
451 451 return NULL;
452 452 }
453 453
454 454 if (self->added == NULL) {
455 455 self->added = PyList_New(0);
456 456 if (self->added == NULL)
457 457 return NULL;
458 458 }
459 459
460 460 if (PyList_Append(self->added, obj) == -1)
461 461 return NULL;
462 462
463 463 if (self->nt)
464 464 nt_insert(self, node, (int)offset);
465 465
466 466 Py_RETURN_NONE;
467 467 }
468 468
469 469 static void _index_clearcaches(indexObject *self)
470 470 {
471 471 if (self->cache) {
472 472 Py_ssize_t i;
473 473
474 474 for (i = 0; i < self->raw_length; i++)
475 475 Py_CLEAR(self->cache[i]);
476 476 free(self->cache);
477 477 self->cache = NULL;
478 478 }
479 479 if (self->offsets) {
480 480 free(self->offsets);
481 481 self->offsets = NULL;
482 482 }
483 483 if (self->nt) {
484 484 free(self->nt);
485 485 self->nt = NULL;
486 486 }
487 487 }
488 488
489 489 static PyObject *index_clearcaches(indexObject *self)
490 490 {
491 491 _index_clearcaches(self);
492 492 self->ntlength = self->ntcapacity = 0;
493 493 self->ntdepth = self->ntsplits = 0;
494 494 self->ntrev = -1;
495 495 self->ntlookups = self->ntmisses = 0;
496 496 Py_RETURN_NONE;
497 497 }
498 498
499 499 static PyObject *index_stats(indexObject *self)
500 500 {
501 501 PyObject *obj = PyDict_New();
502 502
503 503 if (obj == NULL)
504 504 return NULL;
505 505
506 506 #define istat(__n, __d) \
507 507 if (PyDict_SetItemString(obj, __d, PyInt_FromSsize_t(self->__n)) == -1) \
508 508 goto bail;
509 509
510 510 if (self->added) {
511 511 Py_ssize_t len = PyList_GET_SIZE(self->added);
512 512 if (PyDict_SetItemString(obj, "index entries added",
513 513 PyInt_FromSsize_t(len)) == -1)
514 514 goto bail;
515 515 }
516 516
517 517 if (self->raw_length != self->length - 1)
518 518 istat(raw_length, "revs on disk");
519 519 istat(length, "revs in memory");
520 520 istat(ntcapacity, "node trie capacity");
521 521 istat(ntdepth, "node trie depth");
522 522 istat(ntlength, "node trie count");
523 523 istat(ntlookups, "node trie lookups");
524 524 istat(ntmisses, "node trie misses");
525 525 istat(ntrev, "node trie last rev scanned");
526 526 istat(ntsplits, "node trie splits");
527 527
528 528 #undef istat
529 529
530 530 return obj;
531 531
532 532 bail:
533 533 Py_XDECREF(obj);
534 534 return NULL;
535 535 }
536 536
537 static PyObject *index_headrevs(indexObject *self)
538 {
539 Py_ssize_t i, len, addlen;
540 char *nothead = NULL;
541 PyObject *heads;
542
543 len = index_length(self) - 1;
544 heads = PyList_New(0);
545 if (heads == NULL)
546 goto bail;
547 if (len == 0) {
548 PyObject *nullid = PyInt_FromLong(-1);
549 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
550 Py_XDECREF(nullid);
551 goto bail;
552 }
553 goto done;
554 }
555
556 nothead = calloc(len, 1);
557 if (nothead == NULL)
558 goto bail;
559
560 for (i = 0; i < self->raw_length; i++) {
561 const char *data = index_deref(self, i);
562 int parent_1 = getbe32(data + 24);
563 int parent_2 = getbe32(data + 28);
564 if (parent_1 >= 0)
565 nothead[parent_1] = 1;
566 if (parent_2 >= 0)
567 nothead[parent_2] = 1;
568 }
569
570 addlen = self->added ? PyList_GET_SIZE(self->added) : 0;
571
572 for (i = 0; i < addlen; i++) {
573 PyObject *rev = PyList_GET_ITEM(self->added, i);
574 PyObject *p1 = PyTuple_GET_ITEM(rev, 5);
575 PyObject *p2 = PyTuple_GET_ITEM(rev, 6);
576 long parent_1, parent_2;
577
578 if (!PyInt_Check(p1) || !PyInt_Check(p2)) {
579 PyErr_SetString(PyExc_TypeError,
580 "revlog parents are invalid");
581 goto bail;
582 }
583 parent_1 = PyInt_AS_LONG(p1);
584 parent_2 = PyInt_AS_LONG(p2);
585 if (parent_1 >= 0)
586 nothead[parent_1] = 1;
587 if (parent_2 >= 0)
588 nothead[parent_2] = 1;
589 }
590
591 for (i = 0; i < len; i++) {
592 PyObject *head;
593
594 if (nothead[i])
595 continue;
596 head = PyInt_FromLong(i);
597 if (head == NULL || PyList_Append(heads, head) == -1) {
598 Py_XDECREF(head);
599 goto bail;
600 }
601 }
602
603 done:
604 free(nothead);
605 return heads;
606 bail:
607 Py_XDECREF(heads);
608 free(nothead);
609 return NULL;
610 }
611
537 612 static inline int nt_level(const char *node, Py_ssize_t level)
538 613 {
539 614 int v = node[level>>1];
540 615 if (!(level & 1))
541 616 v >>= 4;
542 617 return v & 0xf;
543 618 }
544 619
545 620 /*
546 621 * Return values:
547 622 *
548 623 * -4: match is ambiguous (multiple candidates)
549 624 * -2: not found
550 625 * rest: valid rev
551 626 */
552 627 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
553 628 int hex)
554 629 {
555 630 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
556 631 int level, maxlevel, off;
557 632
558 633 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
559 634 return -1;
560 635
561 636 if (self->nt == NULL)
562 637 return -2;
563 638
564 639 if (hex)
565 640 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
566 641 else
567 642 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
568 643
569 644 for (level = off = 0; level < maxlevel; level++) {
570 645 int k = getnybble(node, level);
571 646 nodetree *n = &self->nt[off];
572 647 int v = n->children[k];
573 648
574 649 if (v < 0) {
575 650 const char *n;
576 651 Py_ssize_t i;
577 652
578 653 v = -v - 1;
579 654 n = index_node(self, v);
580 655 if (n == NULL)
581 656 return -2;
582 657 for (i = level; i < maxlevel; i++)
583 658 if (getnybble(node, i) != nt_level(n, i))
584 659 return -2;
585 660 return v;
586 661 }
587 662 if (v == 0)
588 663 return -2;
589 664 off = v;
590 665 }
591 666 /* multiple matches against an ambiguous prefix */
592 667 return -4;
593 668 }
594 669
595 670 static int nt_new(indexObject *self)
596 671 {
597 672 if (self->ntlength == self->ntcapacity) {
598 673 self->ntcapacity *= 2;
599 674 self->nt = realloc(self->nt,
600 675 self->ntcapacity * sizeof(nodetree));
601 676 if (self->nt == NULL) {
602 677 PyErr_SetString(PyExc_MemoryError, "out of memory");
603 678 return -1;
604 679 }
605 680 memset(&self->nt[self->ntlength], 0,
606 681 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
607 682 }
608 683 return self->ntlength++;
609 684 }
610 685
611 686 static int nt_insert(indexObject *self, const char *node, int rev)
612 687 {
613 688 int level = 0;
614 689 int off = 0;
615 690
616 691 while (level < 40) {
617 692 int k = nt_level(node, level);
618 693 nodetree *n;
619 694 int v;
620 695
621 696 n = &self->nt[off];
622 697 v = n->children[k];
623 698
624 699 if (v == 0) {
625 700 n->children[k] = -rev - 1;
626 701 return 0;
627 702 }
628 703 if (v < 0) {
629 704 const char *oldnode = index_node(self, -v - 1);
630 705 int noff;
631 706
632 707 if (!oldnode || !memcmp(oldnode, node, 20)) {
633 708 n->children[k] = -rev - 1;
634 709 return 0;
635 710 }
636 711 noff = nt_new(self);
637 712 if (noff == -1)
638 713 return -1;
639 714 /* self->nt may have been changed by realloc */
640 715 self->nt[off].children[k] = noff;
641 716 off = noff;
642 717 n = &self->nt[off];
643 718 n->children[nt_level(oldnode, ++level)] = v;
644 719 if (level > self->ntdepth)
645 720 self->ntdepth = level;
646 721 self->ntsplits += 1;
647 722 } else {
648 723 level += 1;
649 724 off = v;
650 725 }
651 726 }
652 727
653 728 return -1;
654 729 }
655 730
656 731 static int nt_init(indexObject *self)
657 732 {
658 733 if (self->nt == NULL) {
659 734 self->ntcapacity = self->raw_length < 4
660 735 ? 4 : self->raw_length / 2;
661 736 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
662 737 if (self->nt == NULL) {
663 738 PyErr_NoMemory();
664 739 return -1;
665 740 }
666 741 self->ntlength = 1;
667 742 self->ntrev = (int)index_length(self) - 1;
668 743 self->ntlookups = 1;
669 744 self->ntmisses = 0;
670 745 if (nt_insert(self, nullid, INT_MAX) == -1)
671 746 return -1;
672 747 }
673 748 return 0;
674 749 }
675 750
676 751 /*
677 752 * Return values:
678 753 *
679 754 * -3: error (exception set)
680 755 * -2: not found (no exception set)
681 756 * rest: valid rev
682 757 */
683 758 static int index_find_node(indexObject *self,
684 759 const char *node, Py_ssize_t nodelen)
685 760 {
686 761 int rev;
687 762
688 763 self->ntlookups++;
689 764 rev = nt_find(self, node, nodelen, 0);
690 765 if (rev >= -1)
691 766 return rev;
692 767
693 768 if (nt_init(self) == -1)
694 769 return -3;
695 770
696 771 /*
697 772 * For the first handful of lookups, we scan the entire index,
698 773 * and cache only the matching nodes. This optimizes for cases
699 774 * like "hg tip", where only a few nodes are accessed.
700 775 *
701 776 * After that, we cache every node we visit, using a single
702 777 * scan amortized over multiple lookups. This gives the best
703 778 * bulk performance, e.g. for "hg log".
704 779 */
705 780 if (self->ntmisses++ < 4) {
706 781 for (rev = self->ntrev - 1; rev >= 0; rev--) {
707 782 const char *n = index_node(self, rev);
708 783 if (n == NULL)
709 784 return -2;
710 785 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
711 786 if (nt_insert(self, n, rev) == -1)
712 787 return -3;
713 788 break;
714 789 }
715 790 }
716 791 } else {
717 792 for (rev = self->ntrev - 1; rev >= 0; rev--) {
718 793 const char *n = index_node(self, rev);
719 794 if (n == NULL) {
720 795 self->ntrev = rev + 1;
721 796 return -2;
722 797 }
723 798 if (nt_insert(self, n, rev) == -1) {
724 799 self->ntrev = rev + 1;
725 800 return -3;
726 801 }
727 802 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
728 803 break;
729 804 }
730 805 }
731 806 self->ntrev = rev;
732 807 }
733 808
734 809 if (rev >= 0)
735 810 return rev;
736 811 return -2;
737 812 }
738 813
739 814 static PyObject *raise_revlog_error(void)
740 815 {
741 816 static PyObject *errclass;
742 817 PyObject *mod = NULL, *errobj;
743 818
744 819 if (errclass == NULL) {
745 820 PyObject *dict;
746 821
747 822 mod = PyImport_ImportModule("mercurial.error");
748 823 if (mod == NULL)
749 824 goto classfail;
750 825
751 826 dict = PyModule_GetDict(mod);
752 827 if (dict == NULL)
753 828 goto classfail;
754 829
755 830 errclass = PyDict_GetItemString(dict, "RevlogError");
756 831 if (errclass == NULL) {
757 832 PyErr_SetString(PyExc_SystemError,
758 833 "could not find RevlogError");
759 834 goto classfail;
760 835 }
761 836 Py_INCREF(errclass);
762 837 }
763 838
764 839 errobj = PyObject_CallFunction(errclass, NULL);
765 840 if (errobj == NULL)
766 841 return NULL;
767 842 PyErr_SetObject(errclass, errobj);
768 843 return errobj;
769 844
770 845 classfail:
771 846 Py_XDECREF(mod);
772 847 return NULL;
773 848 }
774 849
775 850 static PyObject *index_getitem(indexObject *self, PyObject *value)
776 851 {
777 852 char *node;
778 853 Py_ssize_t nodelen;
779 854 int rev;
780 855
781 856 if (PyInt_Check(value))
782 857 return index_get(self, PyInt_AS_LONG(value));
783 858
784 859 if (node_check(value, &node, &nodelen) == -1)
785 860 return NULL;
786 861 rev = index_find_node(self, node, nodelen);
787 862 if (rev >= -1)
788 863 return PyInt_FromLong(rev);
789 864 if (rev == -2)
790 865 raise_revlog_error();
791 866 return NULL;
792 867 }
793 868
794 869 static int nt_partialmatch(indexObject *self, const char *node,
795 870 Py_ssize_t nodelen)
796 871 {
797 872 int rev;
798 873
799 874 if (nt_init(self) == -1)
800 875 return -3;
801 876
802 877 if (self->ntrev > 0) {
803 878 /* ensure that the radix tree is fully populated */
804 879 for (rev = self->ntrev - 1; rev >= 0; rev--) {
805 880 const char *n = index_node(self, rev);
806 881 if (n == NULL)
807 882 return -2;
808 883 if (nt_insert(self, n, rev) == -1)
809 884 return -3;
810 885 }
811 886 self->ntrev = rev;
812 887 }
813 888
814 889 return nt_find(self, node, nodelen, 1);
815 890 }
816 891
817 892 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
818 893 {
819 894 const char *fullnode;
820 895 int nodelen;
821 896 char *node;
822 897 int rev, i;
823 898
824 899 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
825 900 return NULL;
826 901
827 902 if (nodelen < 4) {
828 903 PyErr_SetString(PyExc_ValueError, "key too short");
829 904 return NULL;
830 905 }
831 906
832 907 if (nodelen > 40)
833 908 nodelen = 40;
834 909
835 910 for (i = 0; i < nodelen; i++)
836 911 hexdigit(node, i);
837 912 if (PyErr_Occurred()) {
838 913 /* input contains non-hex characters */
839 914 PyErr_Clear();
840 915 Py_RETURN_NONE;
841 916 }
842 917
843 918 rev = nt_partialmatch(self, node, nodelen);
844 919
845 920 switch (rev) {
846 921 case -4:
847 922 raise_revlog_error();
848 923 case -3:
849 924 return NULL;
850 925 case -2:
851 926 Py_RETURN_NONE;
852 927 case -1:
853 928 return PyString_FromStringAndSize(nullid, 20);
854 929 }
855 930
856 931 fullnode = index_node(self, rev);
857 932 if (fullnode == NULL) {
858 933 PyErr_Format(PyExc_IndexError,
859 934 "could not access rev %d", rev);
860 935 return NULL;
861 936 }
862 937 return PyString_FromStringAndSize(fullnode, 20);
863 938 }
864 939
865 940 static PyObject *index_m_get(indexObject *self, PyObject *args)
866 941 {
867 942 Py_ssize_t nodelen;
868 943 PyObject *val;
869 944 char *node;
870 945 int rev;
871 946
872 947 if (!PyArg_ParseTuple(args, "O", &val))
873 948 return NULL;
874 949 if (node_check(val, &node, &nodelen) == -1)
875 950 return NULL;
876 951 rev = index_find_node(self, node, nodelen);
877 952 if (rev == -3)
878 953 return NULL;
879 954 if (rev == -2)
880 955 Py_RETURN_NONE;
881 956 return PyInt_FromLong(rev);
882 957 }
883 958
884 959 static int index_contains(indexObject *self, PyObject *value)
885 960 {
886 961 char *node;
887 962 Py_ssize_t nodelen;
888 963
889 964 if (PyInt_Check(value)) {
890 965 long rev = PyInt_AS_LONG(value);
891 966 return rev >= -1 && rev < index_length(self);
892 967 }
893 968
894 969 if (node_check(value, &node, &nodelen) == -1)
895 970 return -1;
896 971
897 972 switch (index_find_node(self, node, nodelen)) {
898 973 case -3:
899 974 return -1;
900 975 case -2:
901 976 return 0;
902 977 default:
903 978 return 1;
904 979 }
905 980 }
906 981
907 982 /*
908 983 * Invalidate any trie entries introduced by added revs.
909 984 */
910 985 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
911 986 {
912 987 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
913 988
914 989 for (i = start; i < len; i++) {
915 990 PyObject *tuple = PyList_GET_ITEM(self->added, i);
916 991 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
917 992
918 993 nt_insert(self, PyString_AS_STRING(node), -1);
919 994 }
920 995
921 996 if (start == 0)
922 997 Py_CLEAR(self->added);
923 998 }
924 999
925 1000 /*
926 1001 * Delete a numeric range of revs, which must be at the end of the
927 1002 * range, but exclude the sentinel nullid entry.
928 1003 */
929 1004 static int index_slice_del(indexObject *self, PyObject *item)
930 1005 {
931 1006 Py_ssize_t start, stop, step, slicelength;
932 1007 Py_ssize_t length = index_length(self);
933 1008 int ret = 0;
934 1009
935 1010 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
936 1011 &start, &stop, &step, &slicelength) < 0)
937 1012 return -1;
938 1013
939 1014 if (slicelength <= 0)
940 1015 return 0;
941 1016
942 1017 if ((step < 0 && start < stop) || (step > 0 && start > stop))
943 1018 stop = start;
944 1019
945 1020 if (step < 0) {
946 1021 stop = start + 1;
947 1022 start = stop + step*(slicelength - 1) - 1;
948 1023 step = -step;
949 1024 }
950 1025
951 1026 if (step != 1) {
952 1027 PyErr_SetString(PyExc_ValueError,
953 1028 "revlog index delete requires step size of 1");
954 1029 return -1;
955 1030 }
956 1031
957 1032 if (stop != length - 1) {
958 1033 PyErr_SetString(PyExc_IndexError,
959 1034 "revlog index deletion indices are invalid");
960 1035 return -1;
961 1036 }
962 1037
963 1038 if (start < self->length - 1) {
964 1039 if (self->nt) {
965 1040 Py_ssize_t i;
966 1041
967 1042 for (i = start + 1; i < self->length - 1; i++) {
968 1043 const char *node = index_node(self, i);
969 1044
970 1045 if (node)
971 1046 nt_insert(self, node, -1);
972 1047 }
973 1048 if (self->added)
974 1049 nt_invalidate_added(self, 0);
975 1050 if (self->ntrev > start)
976 1051 self->ntrev = (int)start;
977 1052 }
978 1053 self->length = start + 1;
979 1054 if (start < self->raw_length)
980 1055 self->raw_length = start;
981 1056 goto done;
982 1057 }
983 1058
984 1059 if (self->nt) {
985 1060 nt_invalidate_added(self, start - self->length + 1);
986 1061 if (self->ntrev > start)
987 1062 self->ntrev = (int)start;
988 1063 }
989 1064 if (self->added)
990 1065 ret = PyList_SetSlice(self->added, start - self->length + 1,
991 1066 PyList_GET_SIZE(self->added), NULL);
992 1067 done:
993 1068 return ret;
994 1069 }
995 1070
996 1071 /*
997 1072 * Supported ops:
998 1073 *
999 1074 * slice deletion
1000 1075 * string assignment (extend node->rev mapping)
1001 1076 * string deletion (shrink node->rev mapping)
1002 1077 */
1003 1078 static int index_assign_subscript(indexObject *self, PyObject *item,
1004 1079 PyObject *value)
1005 1080 {
1006 1081 char *node;
1007 1082 Py_ssize_t nodelen;
1008 1083 long rev;
1009 1084
1010 1085 if (PySlice_Check(item) && value == NULL)
1011 1086 return index_slice_del(self, item);
1012 1087
1013 1088 if (node_check(item, &node, &nodelen) == -1)
1014 1089 return -1;
1015 1090
1016 1091 if (value == NULL)
1017 1092 return self->nt ? nt_insert(self, node, -1) : 0;
1018 1093 rev = PyInt_AsLong(value);
1019 1094 if (rev > INT_MAX || rev < 0) {
1020 1095 if (!PyErr_Occurred())
1021 1096 PyErr_SetString(PyExc_ValueError, "rev out of range");
1022 1097 return -1;
1023 1098 }
1024 1099 return nt_insert(self, node, (int)rev);
1025 1100 }
1026 1101
1027 1102 /*
1028 1103 * Find all RevlogNG entries in an index that has inline data. Update
1029 1104 * the optional "offsets" table with those entries.
1030 1105 */
1031 1106 static long inline_scan(indexObject *self, const char **offsets)
1032 1107 {
1033 1108 const char *data = PyString_AS_STRING(self->data);
1034 1109 const char *end = data + PyString_GET_SIZE(self->data);
1035 1110 const long hdrsize = 64;
1036 1111 long incr = hdrsize;
1037 1112 Py_ssize_t len = 0;
1038 1113
1039 1114 while (data + hdrsize <= end) {
1040 1115 uint32_t comp_len;
1041 1116 const char *old_data;
1042 1117 /* 3rd element of header is length of compressed inline data */
1043 1118 comp_len = getbe32(data + 8);
1044 1119 incr = hdrsize + comp_len;
1045 1120 if (incr < hdrsize)
1046 1121 break;
1047 1122 if (offsets)
1048 1123 offsets[len] = data;
1049 1124 len++;
1050 1125 old_data = data;
1051 1126 data += incr;
1052 1127 if (data <= old_data)
1053 1128 break;
1054 1129 }
1055 1130
1056 1131 if (data != end && data + hdrsize != end) {
1057 1132 if (!PyErr_Occurred())
1058 1133 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1059 1134 return -1;
1060 1135 }
1061 1136
1062 1137 return len;
1063 1138 }
1064 1139
1065 1140 static int index_init(indexObject *self, PyObject *args)
1066 1141 {
1067 1142 PyObject *data_obj, *inlined_obj;
1068 1143 Py_ssize_t size;
1069 1144
1070 1145 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
1071 1146 return -1;
1072 1147 if (!PyString_Check(data_obj)) {
1073 1148 PyErr_SetString(PyExc_TypeError, "data is not a string");
1074 1149 return -1;
1075 1150 }
1076 1151 size = PyString_GET_SIZE(data_obj);
1077 1152
1078 1153 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
1079 1154 self->data = data_obj;
1080 1155 self->cache = NULL;
1081 1156
1082 1157 self->added = NULL;
1083 1158 self->offsets = NULL;
1084 1159 self->nt = NULL;
1085 1160 self->ntlength = self->ntcapacity = 0;
1086 1161 self->ntdepth = self->ntsplits = 0;
1087 1162 self->ntlookups = self->ntmisses = 0;
1088 1163 self->ntrev = -1;
1089 1164 Py_INCREF(self->data);
1090 1165
1091 1166 if (self->inlined) {
1092 1167 long len = inline_scan(self, NULL);
1093 1168 if (len == -1)
1094 1169 goto bail;
1095 1170 self->raw_length = len;
1096 1171 self->length = len + 1;
1097 1172 } else {
1098 1173 if (size % 64) {
1099 1174 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1100 1175 goto bail;
1101 1176 }
1102 1177 self->raw_length = size / 64;
1103 1178 self->length = self->raw_length + 1;
1104 1179 }
1105 1180
1106 1181 return 0;
1107 1182 bail:
1108 1183 return -1;
1109 1184 }
1110 1185
1111 1186 static PyObject *index_nodemap(indexObject *self)
1112 1187 {
1113 1188 Py_INCREF(self);
1114 1189 return (PyObject *)self;
1115 1190 }
1116 1191
1117 1192 static void index_dealloc(indexObject *self)
1118 1193 {
1119 1194 _index_clearcaches(self);
1120 1195 Py_DECREF(self->data);
1121 1196 Py_XDECREF(self->added);
1122 1197 PyObject_Del(self);
1123 1198 }
1124 1199
1125 1200 static PySequenceMethods index_sequence_methods = {
1126 1201 (lenfunc)index_length, /* sq_length */
1127 1202 0, /* sq_concat */
1128 1203 0, /* sq_repeat */
1129 1204 (ssizeargfunc)index_get, /* sq_item */
1130 1205 0, /* sq_slice */
1131 1206 0, /* sq_ass_item */
1132 1207 0, /* sq_ass_slice */
1133 1208 (objobjproc)index_contains, /* sq_contains */
1134 1209 };
1135 1210
1136 1211 static PyMappingMethods index_mapping_methods = {
1137 1212 (lenfunc)index_length, /* mp_length */
1138 1213 (binaryfunc)index_getitem, /* mp_subscript */
1139 1214 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
1140 1215 };
1141 1216
1142 1217 static PyMethodDef index_methods[] = {
1143 1218 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
1144 1219 "clear the index caches"},
1145 1220 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1146 1221 "get an index entry"},
1222 {"headrevs", (PyCFunction)index_headrevs, METH_NOARGS,
1223 "get head revisions"},
1147 1224 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1148 1225 "insert an index entry"},
1149 1226 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
1150 1227 "match a potentially ambiguous node ID"},
1151 1228 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1152 1229 "stats for the index"},
1153 1230 {NULL} /* Sentinel */
1154 1231 };
1155 1232
1156 1233 static PyGetSetDef index_getset[] = {
1157 1234 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
1158 1235 {NULL} /* Sentinel */
1159 1236 };
1160 1237
1161 1238 static PyTypeObject indexType = {
1162 1239 PyObject_HEAD_INIT(NULL)
1163 1240 0, /* ob_size */
1164 1241 "parsers.index", /* tp_name */
1165 1242 sizeof(indexObject), /* tp_basicsize */
1166 1243 0, /* tp_itemsize */
1167 1244 (destructor)index_dealloc, /* tp_dealloc */
1168 1245 0, /* tp_print */
1169 1246 0, /* tp_getattr */
1170 1247 0, /* tp_setattr */
1171 1248 0, /* tp_compare */
1172 1249 0, /* tp_repr */
1173 1250 0, /* tp_as_number */
1174 1251 &index_sequence_methods, /* tp_as_sequence */
1175 1252 &index_mapping_methods, /* tp_as_mapping */
1176 1253 0, /* tp_hash */
1177 1254 0, /* tp_call */
1178 1255 0, /* tp_str */
1179 1256 0, /* tp_getattro */
1180 1257 0, /* tp_setattro */
1181 1258 0, /* tp_as_buffer */
1182 1259 Py_TPFLAGS_DEFAULT, /* tp_flags */
1183 1260 "revlog index", /* tp_doc */
1184 1261 0, /* tp_traverse */
1185 1262 0, /* tp_clear */
1186 1263 0, /* tp_richcompare */
1187 1264 0, /* tp_weaklistoffset */
1188 1265 0, /* tp_iter */
1189 1266 0, /* tp_iternext */
1190 1267 index_methods, /* tp_methods */
1191 1268 0, /* tp_members */
1192 1269 index_getset, /* tp_getset */
1193 1270 0, /* tp_base */
1194 1271 0, /* tp_dict */
1195 1272 0, /* tp_descr_get */
1196 1273 0, /* tp_descr_set */
1197 1274 0, /* tp_dictoffset */
1198 1275 (initproc)index_init, /* tp_init */
1199 1276 0, /* tp_alloc */
1200 1277 };
1201 1278
1202 1279 /*
1203 1280 * returns a tuple of the form (index, index, cache) with elements as
1204 1281 * follows:
1205 1282 *
1206 1283 * index: an index object that lazily parses RevlogNG records
1207 1284 * cache: if data is inlined, a tuple (index_file_content, 0), else None
1208 1285 *
1209 1286 * added complications are for backwards compatibility
1210 1287 */
1211 1288 static PyObject *parse_index2(PyObject *self, PyObject *args)
1212 1289 {
1213 1290 PyObject *tuple = NULL, *cache = NULL;
1214 1291 indexObject *idx;
1215 1292 int ret;
1216 1293
1217 1294 idx = PyObject_New(indexObject, &indexType);
1218 1295 if (idx == NULL)
1219 1296 goto bail;
1220 1297
1221 1298 ret = index_init(idx, args);
1222 1299 if (ret == -1)
1223 1300 goto bail;
1224 1301
1225 1302 if (idx->inlined) {
1226 1303 cache = Py_BuildValue("iO", 0, idx->data);
1227 1304 if (cache == NULL)
1228 1305 goto bail;
1229 1306 } else {
1230 1307 cache = Py_None;
1231 1308 Py_INCREF(cache);
1232 1309 }
1233 1310
1234 1311 tuple = Py_BuildValue("NN", idx, cache);
1235 1312 if (!tuple)
1236 1313 goto bail;
1237 1314 return tuple;
1238 1315
1239 1316 bail:
1240 1317 Py_XDECREF(idx);
1241 1318 Py_XDECREF(cache);
1242 1319 Py_XDECREF(tuple);
1243 1320 return NULL;
1244 1321 }
1245 1322
1246 1323 static char parsers_doc[] = "Efficient content parsing.";
1247 1324
1248 1325 static PyMethodDef methods[] = {
1249 1326 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
1250 1327 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1251 1328 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
1252 1329 {NULL, NULL}
1253 1330 };
1254 1331
1255 1332 static void module_init(PyObject *mod)
1256 1333 {
1257 1334 indexType.tp_new = PyType_GenericNew;
1258 1335 if (PyType_Ready(&indexType) < 0)
1259 1336 return;
1260 1337 Py_INCREF(&indexType);
1261 1338
1262 1339 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
1263 1340
1264 1341 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
1265 1342 -1, -1, -1, -1, nullid, 20);
1266 1343 if (nullentry)
1267 1344 PyObject_GC_UnTrack(nullentry);
1268 1345 }
1269 1346
1270 1347 #ifdef IS_PY3K
1271 1348 static struct PyModuleDef parsers_module = {
1272 1349 PyModuleDef_HEAD_INIT,
1273 1350 "parsers",
1274 1351 parsers_doc,
1275 1352 -1,
1276 1353 methods
1277 1354 };
1278 1355
1279 1356 PyMODINIT_FUNC PyInit_parsers(void)
1280 1357 {
1281 1358 PyObject *mod = PyModule_Create(&parsers_module);
1282 1359 module_init(mod);
1283 1360 return mod;
1284 1361 }
1285 1362 #else
1286 1363 PyMODINIT_FUNC initparsers(void)
1287 1364 {
1288 1365 PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc);
1289 1366 module_init(mod);
1290 1367 }
1291 1368 #endif
@@ -1,1317 +1,1321 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from node import bin, hex, nullid, nullrev
16 16 from i18n import _
17 17 import ancestor, mdiff, parsers, error, util, dagutil
18 18 import struct, zlib, errno
19 19
20 20 _pack = struct.pack
21 21 _unpack = struct.unpack
22 22 _compress = zlib.compress
23 23 _decompress = zlib.decompress
24 24 _sha = util.sha1
25 25
26 26 # revlog header flags
27 27 REVLOGV0 = 0
28 28 REVLOGNG = 1
29 29 REVLOGNGINLINEDATA = (1 << 16)
30 30 REVLOGGENERALDELTA = (1 << 17)
31 31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35 35
36 36 # revlog index flags
37 37 REVIDX_KNOWN_FLAGS = 0
38 38
39 39 # max size of revlog with inline data
40 40 _maxinline = 131072
41 41 _chunksize = 1048576
42 42
43 43 RevlogError = error.RevlogError
44 44 LookupError = error.LookupError
45 45
46 46 def getoffset(q):
47 47 return int(q >> 16)
48 48
49 49 def gettype(q):
50 50 return int(q & 0xFFFF)
51 51
52 52 def offset_type(offset, type):
53 53 return long(long(offset) << 16 | type)
54 54
55 55 nullhash = _sha(nullid)
56 56
57 57 def hash(text, p1, p2):
58 58 """generate a hash from the given text and its parent hashes
59 59
60 60 This hash combines both the current file contents and its history
61 61 in a manner that makes it easy to distinguish nodes with the same
62 62 content in the revision graph.
63 63 """
64 64 # As of now, if one of the parent node is null, p2 is null
65 65 if p2 == nullid:
66 66 # deep copy of a hash is faster than creating one
67 67 s = nullhash.copy()
68 68 s.update(p1)
69 69 else:
70 70 # none of the parent nodes are nullid
71 71 l = [p1, p2]
72 72 l.sort()
73 73 s = _sha(l[0])
74 74 s.update(l[1])
75 75 s.update(text)
76 76 return s.digest()
77 77
78 78 def compress(text):
79 79 """ generate a possibly-compressed representation of text """
80 80 if not text:
81 81 return ("", text)
82 82 l = len(text)
83 83 bin = None
84 84 if l < 44:
85 85 pass
86 86 elif l > 1000000:
87 87 # zlib makes an internal copy, thus doubling memory usage for
88 88 # large files, so lets do this in pieces
89 89 z = zlib.compressobj()
90 90 p = []
91 91 pos = 0
92 92 while pos < l:
93 93 pos2 = pos + 2**20
94 94 p.append(z.compress(text[pos:pos2]))
95 95 pos = pos2
96 96 p.append(z.flush())
97 97 if sum(map(len, p)) < l:
98 98 bin = "".join(p)
99 99 else:
100 100 bin = _compress(text)
101 101 if bin is None or len(bin) > l:
102 102 if text[0] == '\0':
103 103 return ("", text)
104 104 return ('u', text)
105 105 return ("", bin)
106 106
107 107 def decompress(bin):
108 108 """ decompress the given input """
109 109 if not bin:
110 110 return bin
111 111 t = bin[0]
112 112 if t == '\0':
113 113 return bin
114 114 if t == 'x':
115 115 return _decompress(bin)
116 116 if t == 'u':
117 117 return bin[1:]
118 118 raise RevlogError(_("unknown compression type %r") % t)
119 119
120 120 indexformatv0 = ">4l20s20s20s"
121 121 v0shaoffset = 56
122 122
123 123 class revlogoldio(object):
124 124 def __init__(self):
125 125 self.size = struct.calcsize(indexformatv0)
126 126
127 127 def parseindex(self, data, inline):
128 128 s = self.size
129 129 index = []
130 130 nodemap = {nullid: nullrev}
131 131 n = off = 0
132 132 l = len(data)
133 133 while off + s <= l:
134 134 cur = data[off:off + s]
135 135 off += s
136 136 e = _unpack(indexformatv0, cur)
137 137 # transform to revlogv1 format
138 138 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
139 139 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
140 140 index.append(e2)
141 141 nodemap[e[6]] = n
142 142 n += 1
143 143
144 144 # add the magic null revision at -1
145 145 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
146 146
147 147 return index, nodemap, None
148 148
149 149 def packentry(self, entry, node, version, rev):
150 150 if gettype(entry[0]):
151 151 raise RevlogError(_("index entry flags need RevlogNG"))
152 152 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
153 153 node(entry[5]), node(entry[6]), entry[7])
154 154 return _pack(indexformatv0, *e2)
155 155
156 156 # index ng:
157 157 # 6 bytes: offset
158 158 # 2 bytes: flags
159 159 # 4 bytes: compressed length
160 160 # 4 bytes: uncompressed length
161 161 # 4 bytes: base rev
162 162 # 4 bytes: link rev
163 163 # 4 bytes: parent 1 rev
164 164 # 4 bytes: parent 2 rev
165 165 # 32 bytes: nodeid
166 166 indexformatng = ">Qiiiiii20s12x"
167 167 ngshaoffset = 32
168 168 versionformat = ">I"
169 169
170 170 class revlogio(object):
171 171 def __init__(self):
172 172 self.size = struct.calcsize(indexformatng)
173 173
174 174 def parseindex(self, data, inline):
175 175 # call the C implementation to parse the index data
176 176 index, cache = parsers.parse_index2(data, inline)
177 177 return index, getattr(index, 'nodemap', None), cache
178 178
179 179 def packentry(self, entry, node, version, rev):
180 180 p = _pack(indexformatng, *entry)
181 181 if rev == 0:
182 182 p = _pack(versionformat, version) + p[4:]
183 183 return p
184 184
185 185 class revlog(object):
186 186 """
187 187 the underlying revision storage object
188 188
189 189 A revlog consists of two parts, an index and the revision data.
190 190
191 191 The index is a file with a fixed record size containing
192 192 information on each revision, including its nodeid (hash), the
193 193 nodeids of its parents, the position and offset of its data within
194 194 the data file, and the revision it's based on. Finally, each entry
195 195 contains a linkrev entry that can serve as a pointer to external
196 196 data.
197 197
198 198 The revision data itself is a linear collection of data chunks.
199 199 Each chunk represents a revision and is usually represented as a
200 200 delta against the previous chunk. To bound lookup time, runs of
201 201 deltas are limited to about 2 times the length of the original
202 202 version data. This makes retrieval of a version proportional to
203 203 its size, or O(1) relative to the number of revisions.
204 204
205 205 Both pieces of the revlog are written to in an append-only
206 206 fashion, which means we never need to rewrite a file to insert or
207 207 remove data, and can use some simple techniques to avoid the need
208 208 for locking while reading.
209 209 """
210 210 def __init__(self, opener, indexfile):
211 211 """
212 212 create a revlog object
213 213
214 214 opener is a function that abstracts the file opening operation
215 215 and can be used to implement COW semantics or the like.
216 216 """
217 217 self.indexfile = indexfile
218 218 self.datafile = indexfile[:-2] + ".d"
219 219 self.opener = opener
220 220 self._cache = None
221 221 self._basecache = (0, 0)
222 222 self._chunkcache = (0, '')
223 223 self.index = []
224 224 self._pcache = {}
225 225 self._nodecache = {nullid: nullrev}
226 226 self._nodepos = None
227 227
228 228 v = REVLOG_DEFAULT_VERSION
229 229 opts = getattr(opener, 'options', None)
230 230 if opts is not None:
231 231 if 'revlogv1' in opts:
232 232 if 'generaldelta' in opts:
233 233 v |= REVLOGGENERALDELTA
234 234 else:
235 235 v = 0
236 236
237 237 i = ''
238 238 self._initempty = True
239 239 try:
240 240 f = self.opener(self.indexfile)
241 241 i = f.read()
242 242 f.close()
243 243 if len(i) > 0:
244 244 v = struct.unpack(versionformat, i[:4])[0]
245 245 self._initempty = False
246 246 except IOError, inst:
247 247 if inst.errno != errno.ENOENT:
248 248 raise
249 249
250 250 self.version = v
251 251 self._inline = v & REVLOGNGINLINEDATA
252 252 self._generaldelta = v & REVLOGGENERALDELTA
253 253 flags = v & ~0xFFFF
254 254 fmt = v & 0xFFFF
255 255 if fmt == REVLOGV0 and flags:
256 256 raise RevlogError(_("index %s unknown flags %#04x for format v0")
257 257 % (self.indexfile, flags >> 16))
258 258 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
259 259 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
260 260 % (self.indexfile, flags >> 16))
261 261 elif fmt > REVLOGNG:
262 262 raise RevlogError(_("index %s unknown format %d")
263 263 % (self.indexfile, fmt))
264 264
265 265 self._io = revlogio()
266 266 if self.version == REVLOGV0:
267 267 self._io = revlogoldio()
268 268 try:
269 269 d = self._io.parseindex(i, self._inline)
270 270 except (ValueError, IndexError):
271 271 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
272 272 self.index, nodemap, self._chunkcache = d
273 273 if nodemap is not None:
274 274 self.nodemap = self._nodecache = nodemap
275 275 if not self._chunkcache:
276 276 self._chunkclear()
277 277
278 278 def tip(self):
279 279 return self.node(len(self.index) - 2)
280 280 def __len__(self):
281 281 return len(self.index) - 1
282 282 def __iter__(self):
283 283 for i in xrange(len(self)):
284 284 yield i
285 285
286 286 @util.propertycache
287 287 def nodemap(self):
288 288 self.rev(self.node(0))
289 289 return self._nodecache
290 290
291 291 def hasnode(self, node):
292 292 try:
293 293 self.rev(node)
294 294 return True
295 295 except KeyError:
296 296 return False
297 297
298 298 def clearcaches(self):
299 299 try:
300 300 self._nodecache.clearcaches()
301 301 except AttributeError:
302 302 self._nodecache = {nullid: nullrev}
303 303 self._nodepos = None
304 304
305 305 def rev(self, node):
306 306 try:
307 307 return self._nodecache[node]
308 308 except RevlogError:
309 309 # parsers.c radix tree lookup failed
310 310 raise LookupError(node, self.indexfile, _('no node'))
311 311 except KeyError:
312 312 # pure python cache lookup failed
313 313 n = self._nodecache
314 314 i = self.index
315 315 p = self._nodepos
316 316 if p is None:
317 317 p = len(i) - 2
318 318 for r in xrange(p, -1, -1):
319 319 v = i[r][7]
320 320 n[v] = r
321 321 if v == node:
322 322 self._nodepos = r - 1
323 323 return r
324 324 raise LookupError(node, self.indexfile, _('no node'))
325 325
326 326 def node(self, rev):
327 327 return self.index[rev][7]
328 328 def linkrev(self, rev):
329 329 return self.index[rev][4]
330 330 def parents(self, node):
331 331 i = self.index
332 332 d = i[self.rev(node)]
333 333 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
334 334 def parentrevs(self, rev):
335 335 return self.index[rev][5:7]
336 336 def start(self, rev):
337 337 return int(self.index[rev][0] >> 16)
338 338 def end(self, rev):
339 339 return self.start(rev) + self.length(rev)
340 340 def length(self, rev):
341 341 return self.index[rev][1]
342 342 def chainbase(self, rev):
343 343 index = self.index
344 344 base = index[rev][3]
345 345 while base != rev:
346 346 rev = base
347 347 base = index[rev][3]
348 348 return base
349 349 def flags(self, rev):
350 350 return self.index[rev][0] & 0xFFFF
351 351 def rawsize(self, rev):
352 352 """return the length of the uncompressed text for a given revision"""
353 353 l = self.index[rev][2]
354 354 if l >= 0:
355 355 return l
356 356
357 357 t = self.revision(self.node(rev))
358 358 return len(t)
359 359 size = rawsize
360 360
361 361 def reachable(self, node, stop=None):
362 362 """return the set of all nodes ancestral to a given node, including
363 363 the node itself, stopping when stop is matched"""
364 364 reachable = set((node,))
365 365 visit = [node]
366 366 if stop:
367 367 stopn = self.rev(stop)
368 368 else:
369 369 stopn = 0
370 370 while visit:
371 371 n = visit.pop(0)
372 372 if n == stop:
373 373 continue
374 374 if n == nullid:
375 375 continue
376 376 for p in self.parents(n):
377 377 if self.rev(p) < stopn:
378 378 continue
379 379 if p not in reachable:
380 380 reachable.add(p)
381 381 visit.append(p)
382 382 return reachable
383 383
384 384 def ancestors(self, *revs):
385 385 """Generate the ancestors of 'revs' in reverse topological order.
386 386
387 387 Yield a sequence of revision numbers starting with the parents
388 388 of each revision in revs, i.e., each revision is *not* considered
389 389 an ancestor of itself. Results are in breadth-first order:
390 390 parents of each rev in revs, then parents of those, etc. Result
391 391 does not include the null revision."""
392 392 visit = list(revs)
393 393 seen = set([nullrev])
394 394 while visit:
395 395 for parent in self.parentrevs(visit.pop(0)):
396 396 if parent not in seen:
397 397 visit.append(parent)
398 398 seen.add(parent)
399 399 yield parent
400 400
401 401 def descendants(self, *revs):
402 402 """Generate the descendants of 'revs' in revision order.
403 403
404 404 Yield a sequence of revision numbers starting with a child of
405 405 some rev in revs, i.e., each revision is *not* considered a
406 406 descendant of itself. Results are ordered by revision number (a
407 407 topological sort)."""
408 408 first = min(revs)
409 409 if first == nullrev:
410 410 for i in self:
411 411 yield i
412 412 return
413 413
414 414 seen = set(revs)
415 415 for i in xrange(first + 1, len(self)):
416 416 for x in self.parentrevs(i):
417 417 if x != nullrev and x in seen:
418 418 seen.add(i)
419 419 yield i
420 420 break
421 421
422 422 def findcommonmissing(self, common=None, heads=None):
423 423 """Return a tuple of the ancestors of common and the ancestors of heads
424 424 that are not ancestors of common. In revset terminology, we return the
425 425 tuple:
426 426
427 427 ::common, (::heads) - (::common)
428 428
429 429 The list is sorted by revision number, meaning it is
430 430 topologically sorted.
431 431
432 432 'heads' and 'common' are both lists of node IDs. If heads is
433 433 not supplied, uses all of the revlog's heads. If common is not
434 434 supplied, uses nullid."""
435 435 if common is None:
436 436 common = [nullid]
437 437 if heads is None:
438 438 heads = self.heads()
439 439
440 440 common = [self.rev(n) for n in common]
441 441 heads = [self.rev(n) for n in heads]
442 442
443 443 # we want the ancestors, but inclusive
444 444 has = set(self.ancestors(*common))
445 445 has.add(nullrev)
446 446 has.update(common)
447 447
448 448 # take all ancestors from heads that aren't in has
449 449 missing = set()
450 450 visit = [r for r in heads if r not in has]
451 451 while visit:
452 452 r = visit.pop(0)
453 453 if r in missing:
454 454 continue
455 455 else:
456 456 missing.add(r)
457 457 for p in self.parentrevs(r):
458 458 if p not in has:
459 459 visit.append(p)
460 460 missing = list(missing)
461 461 missing.sort()
462 462 return has, [self.node(r) for r in missing]
463 463
464 464 def findmissing(self, common=None, heads=None):
465 465 """Return the ancestors of heads that are not ancestors of common.
466 466
467 467 More specifically, return a list of nodes N such that every N
468 468 satisfies the following constraints:
469 469
470 470 1. N is an ancestor of some node in 'heads'
471 471 2. N is not an ancestor of any node in 'common'
472 472
473 473 The list is sorted by revision number, meaning it is
474 474 topologically sorted.
475 475
476 476 'heads' and 'common' are both lists of node IDs. If heads is
477 477 not supplied, uses all of the revlog's heads. If common is not
478 478 supplied, uses nullid."""
479 479 _common, missing = self.findcommonmissing(common, heads)
480 480 return missing
481 481
482 482 def nodesbetween(self, roots=None, heads=None):
483 483 """Return a topological path from 'roots' to 'heads'.
484 484
485 485 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
486 486 topologically sorted list of all nodes N that satisfy both of
487 487 these constraints:
488 488
489 489 1. N is a descendant of some node in 'roots'
490 490 2. N is an ancestor of some node in 'heads'
491 491
492 492 Every node is considered to be both a descendant and an ancestor
493 493 of itself, so every reachable node in 'roots' and 'heads' will be
494 494 included in 'nodes'.
495 495
496 496 'outroots' is the list of reachable nodes in 'roots', i.e., the
497 497 subset of 'roots' that is returned in 'nodes'. Likewise,
498 498 'outheads' is the subset of 'heads' that is also in 'nodes'.
499 499
500 500 'roots' and 'heads' are both lists of node IDs. If 'roots' is
501 501 unspecified, uses nullid as the only root. If 'heads' is
502 502 unspecified, uses list of all of the revlog's heads."""
503 503 nonodes = ([], [], [])
504 504 if roots is not None:
505 505 roots = list(roots)
506 506 if not roots:
507 507 return nonodes
508 508 lowestrev = min([self.rev(n) for n in roots])
509 509 else:
510 510 roots = [nullid] # Everybody's a descendant of nullid
511 511 lowestrev = nullrev
512 512 if (lowestrev == nullrev) and (heads is None):
513 513 # We want _all_ the nodes!
514 514 return ([self.node(r) for r in self], [nullid], list(self.heads()))
515 515 if heads is None:
516 516 # All nodes are ancestors, so the latest ancestor is the last
517 517 # node.
518 518 highestrev = len(self) - 1
519 519 # Set ancestors to None to signal that every node is an ancestor.
520 520 ancestors = None
521 521 # Set heads to an empty dictionary for later discovery of heads
522 522 heads = {}
523 523 else:
524 524 heads = list(heads)
525 525 if not heads:
526 526 return nonodes
527 527 ancestors = set()
528 528 # Turn heads into a dictionary so we can remove 'fake' heads.
529 529 # Also, later we will be using it to filter out the heads we can't
530 530 # find from roots.
531 531 heads = dict.fromkeys(heads, False)
532 532 # Start at the top and keep marking parents until we're done.
533 533 nodestotag = set(heads)
534 534 # Remember where the top was so we can use it as a limit later.
535 535 highestrev = max([self.rev(n) for n in nodestotag])
536 536 while nodestotag:
537 537 # grab a node to tag
538 538 n = nodestotag.pop()
539 539 # Never tag nullid
540 540 if n == nullid:
541 541 continue
542 542 # A node's revision number represents its place in a
543 543 # topologically sorted list of nodes.
544 544 r = self.rev(n)
545 545 if r >= lowestrev:
546 546 if n not in ancestors:
547 547 # If we are possibly a descendant of one of the roots
548 548 # and we haven't already been marked as an ancestor
549 549 ancestors.add(n) # Mark as ancestor
550 550 # Add non-nullid parents to list of nodes to tag.
551 551 nodestotag.update([p for p in self.parents(n) if
552 552 p != nullid])
553 553 elif n in heads: # We've seen it before, is it a fake head?
554 554 # So it is, real heads should not be the ancestors of
555 555 # any other heads.
556 556 heads.pop(n)
557 557 if not ancestors:
558 558 return nonodes
559 559 # Now that we have our set of ancestors, we want to remove any
560 560 # roots that are not ancestors.
561 561
562 562 # If one of the roots was nullid, everything is included anyway.
563 563 if lowestrev > nullrev:
564 564 # But, since we weren't, let's recompute the lowest rev to not
565 565 # include roots that aren't ancestors.
566 566
567 567 # Filter out roots that aren't ancestors of heads
568 568 roots = [n for n in roots if n in ancestors]
569 569 # Recompute the lowest revision
570 570 if roots:
571 571 lowestrev = min([self.rev(n) for n in roots])
572 572 else:
573 573 # No more roots? Return empty list
574 574 return nonodes
575 575 else:
576 576 # We are descending from nullid, and don't need to care about
577 577 # any other roots.
578 578 lowestrev = nullrev
579 579 roots = [nullid]
580 580 # Transform our roots list into a set.
581 581 descendants = set(roots)
582 582 # Also, keep the original roots so we can filter out roots that aren't
583 583 # 'real' roots (i.e. are descended from other roots).
584 584 roots = descendants.copy()
585 585 # Our topologically sorted list of output nodes.
586 586 orderedout = []
587 587 # Don't start at nullid since we don't want nullid in our output list,
588 588 # and if nullid shows up in descedents, empty parents will look like
589 589 # they're descendants.
590 590 for r in xrange(max(lowestrev, 0), highestrev + 1):
591 591 n = self.node(r)
592 592 isdescendant = False
593 593 if lowestrev == nullrev: # Everybody is a descendant of nullid
594 594 isdescendant = True
595 595 elif n in descendants:
596 596 # n is already a descendant
597 597 isdescendant = True
598 598 # This check only needs to be done here because all the roots
599 599 # will start being marked is descendants before the loop.
600 600 if n in roots:
601 601 # If n was a root, check if it's a 'real' root.
602 602 p = tuple(self.parents(n))
603 603 # If any of its parents are descendants, it's not a root.
604 604 if (p[0] in descendants) or (p[1] in descendants):
605 605 roots.remove(n)
606 606 else:
607 607 p = tuple(self.parents(n))
608 608 # A node is a descendant if either of its parents are
609 609 # descendants. (We seeded the dependents list with the roots
610 610 # up there, remember?)
611 611 if (p[0] in descendants) or (p[1] in descendants):
612 612 descendants.add(n)
613 613 isdescendant = True
614 614 if isdescendant and ((ancestors is None) or (n in ancestors)):
615 615 # Only include nodes that are both descendants and ancestors.
616 616 orderedout.append(n)
617 617 if (ancestors is not None) and (n in heads):
618 618 # We're trying to figure out which heads are reachable
619 619 # from roots.
620 620 # Mark this head as having been reached
621 621 heads[n] = True
622 622 elif ancestors is None:
623 623 # Otherwise, we're trying to discover the heads.
624 624 # Assume this is a head because if it isn't, the next step
625 625 # will eventually remove it.
626 626 heads[n] = True
627 627 # But, obviously its parents aren't.
628 628 for p in self.parents(n):
629 629 heads.pop(p, None)
630 630 heads = [n for n, flag in heads.iteritems() if flag]
631 631 roots = list(roots)
632 632 assert orderedout
633 633 assert roots
634 634 assert heads
635 635 return (orderedout, roots, heads)
636 636
637 637 def headrevs(self):
638 try:
639 return self.index.headrevs()
640 except AttributeError:
641 pass
638 642 count = len(self)
639 643 if not count:
640 644 return [nullrev]
641 645 ishead = [1] * (count + 1)
642 646 index = self.index
643 647 for r in xrange(count):
644 648 e = index[r]
645 649 ishead[e[5]] = ishead[e[6]] = 0
646 650 return [r for r in xrange(count) if ishead[r]]
647 651
648 652 def heads(self, start=None, stop=None):
649 653 """return the list of all nodes that have no children
650 654
651 655 if start is specified, only heads that are descendants of
652 656 start will be returned
653 657 if stop is specified, it will consider all the revs from stop
654 658 as if they had no children
655 659 """
656 660 if start is None and stop is None:
657 661 if not len(self):
658 662 return [nullid]
659 663 return [self.node(r) for r in self.headrevs()]
660 664
661 665 if start is None:
662 666 start = nullid
663 667 if stop is None:
664 668 stop = []
665 669 stoprevs = set([self.rev(n) for n in stop])
666 670 startrev = self.rev(start)
667 671 reachable = set((startrev,))
668 672 heads = set((startrev,))
669 673
670 674 parentrevs = self.parentrevs
671 675 for r in xrange(startrev + 1, len(self)):
672 676 for p in parentrevs(r):
673 677 if p in reachable:
674 678 if r not in stoprevs:
675 679 reachable.add(r)
676 680 heads.add(r)
677 681 if p in heads and p not in stoprevs:
678 682 heads.remove(p)
679 683
680 684 return [self.node(r) for r in heads]
681 685
682 686 def children(self, node):
683 687 """find the children of a given node"""
684 688 c = []
685 689 p = self.rev(node)
686 690 for r in range(p + 1, len(self)):
687 691 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
688 692 if prevs:
689 693 for pr in prevs:
690 694 if pr == p:
691 695 c.append(self.node(r))
692 696 elif p == nullrev:
693 697 c.append(self.node(r))
694 698 return c
695 699
696 700 def descendant(self, start, end):
697 701 if start == nullrev:
698 702 return True
699 703 for i in self.descendants(start):
700 704 if i == end:
701 705 return True
702 706 elif i > end:
703 707 break
704 708 return False
705 709
706 710 def ancestor(self, a, b):
707 711 """calculate the least common ancestor of nodes a and b"""
708 712
709 713 # fast path, check if it is a descendant
710 714 a, b = self.rev(a), self.rev(b)
711 715 start, end = sorted((a, b))
712 716 if self.descendant(start, end):
713 717 return self.node(start)
714 718
715 719 def parents(rev):
716 720 return [p for p in self.parentrevs(rev) if p != nullrev]
717 721
718 722 c = ancestor.ancestor(a, b, parents)
719 723 if c is None:
720 724 return nullid
721 725
722 726 return self.node(c)
723 727
724 728 def _match(self, id):
725 729 if isinstance(id, int):
726 730 # rev
727 731 return self.node(id)
728 732 if len(id) == 20:
729 733 # possibly a binary node
730 734 # odds of a binary node being all hex in ASCII are 1 in 10**25
731 735 try:
732 736 node = id
733 737 self.rev(node) # quick search the index
734 738 return node
735 739 except LookupError:
736 740 pass # may be partial hex id
737 741 try:
738 742 # str(rev)
739 743 rev = int(id)
740 744 if str(rev) != id:
741 745 raise ValueError
742 746 if rev < 0:
743 747 rev = len(self) + rev
744 748 if rev < 0 or rev >= len(self):
745 749 raise ValueError
746 750 return self.node(rev)
747 751 except (ValueError, OverflowError):
748 752 pass
749 753 if len(id) == 40:
750 754 try:
751 755 # a full hex nodeid?
752 756 node = bin(id)
753 757 self.rev(node)
754 758 return node
755 759 except (TypeError, LookupError):
756 760 pass
757 761
758 762 def _partialmatch(self, id):
759 763 try:
760 764 return self.index.partialmatch(id)
761 765 except RevlogError:
762 766 # parsers.c radix tree lookup gave multiple matches
763 767 raise LookupError(id, self.indexfile, _("ambiguous identifier"))
764 768 except (AttributeError, ValueError):
765 769 # we are pure python, or key was too short to search radix tree
766 770 pass
767 771
768 772 if id in self._pcache:
769 773 return self._pcache[id]
770 774
771 775 if len(id) < 40:
772 776 try:
773 777 # hex(node)[:...]
774 778 l = len(id) // 2 # grab an even number of digits
775 779 prefix = bin(id[:l * 2])
776 780 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
777 781 nl = [n for n in nl if hex(n).startswith(id)]
778 782 if len(nl) > 0:
779 783 if len(nl) == 1:
780 784 self._pcache[id] = nl[0]
781 785 return nl[0]
782 786 raise LookupError(id, self.indexfile,
783 787 _('ambiguous identifier'))
784 788 return None
785 789 except TypeError:
786 790 pass
787 791
788 792 def lookup(self, id):
789 793 """locate a node based on:
790 794 - revision number or str(revision number)
791 795 - nodeid or subset of hex nodeid
792 796 """
793 797 n = self._match(id)
794 798 if n is not None:
795 799 return n
796 800 n = self._partialmatch(id)
797 801 if n:
798 802 return n
799 803
800 804 raise LookupError(id, self.indexfile, _('no match found'))
801 805
802 806 def cmp(self, node, text):
803 807 """compare text with a given file revision
804 808
805 809 returns True if text is different than what is stored.
806 810 """
807 811 p1, p2 = self.parents(node)
808 812 return hash(text, p1, p2) != node
809 813
810 814 def _addchunk(self, offset, data):
811 815 o, d = self._chunkcache
812 816 # try to add to existing cache
813 817 if o + len(d) == offset and len(d) + len(data) < _chunksize:
814 818 self._chunkcache = o, d + data
815 819 else:
816 820 self._chunkcache = offset, data
817 821
818 822 def _loadchunk(self, offset, length):
819 823 if self._inline:
820 824 df = self.opener(self.indexfile)
821 825 else:
822 826 df = self.opener(self.datafile)
823 827
824 828 readahead = max(65536, length)
825 829 df.seek(offset)
826 830 d = df.read(readahead)
827 831 df.close()
828 832 self._addchunk(offset, d)
829 833 if readahead > length:
830 834 return util.buffer(d, 0, length)
831 835 return d
832 836
833 837 def _getchunk(self, offset, length):
834 838 o, d = self._chunkcache
835 839 l = len(d)
836 840
837 841 # is it in the cache?
838 842 cachestart = offset - o
839 843 cacheend = cachestart + length
840 844 if cachestart >= 0 and cacheend <= l:
841 845 if cachestart == 0 and cacheend == l:
842 846 return d # avoid a copy
843 847 return util.buffer(d, cachestart, cacheend - cachestart)
844 848
845 849 return self._loadchunk(offset, length)
846 850
847 851 def _chunkraw(self, startrev, endrev):
848 852 start = self.start(startrev)
849 853 length = self.end(endrev) - start
850 854 if self._inline:
851 855 start += (startrev + 1) * self._io.size
852 856 return self._getchunk(start, length)
853 857
854 858 def _chunk(self, rev):
855 859 return decompress(self._chunkraw(rev, rev))
856 860
857 861 def _chunkbase(self, rev):
858 862 return self._chunk(rev)
859 863
860 864 def _chunkclear(self):
861 865 self._chunkcache = (0, '')
862 866
863 867 def deltaparent(self, rev):
864 868 """return deltaparent of the given revision"""
865 869 base = self.index[rev][3]
866 870 if base == rev:
867 871 return nullrev
868 872 elif self._generaldelta:
869 873 return base
870 874 else:
871 875 return rev - 1
872 876
873 877 def revdiff(self, rev1, rev2):
874 878 """return or calculate a delta between two revisions"""
875 879 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
876 880 return str(self._chunk(rev2))
877 881
878 882 return mdiff.textdiff(self.revision(rev1),
879 883 self.revision(rev2))
880 884
881 885 def revision(self, nodeorrev):
882 886 """return an uncompressed revision of a given node or revision
883 887 number.
884 888 """
885 889 if isinstance(nodeorrev, int):
886 890 rev = nodeorrev
887 891 node = self.node(rev)
888 892 else:
889 893 node = nodeorrev
890 894 rev = None
891 895
892 896 cachedrev = None
893 897 if node == nullid:
894 898 return ""
895 899 if self._cache:
896 900 if self._cache[0] == node:
897 901 return self._cache[2]
898 902 cachedrev = self._cache[1]
899 903
900 904 # look up what we need to read
901 905 text = None
902 906 if rev is None:
903 907 rev = self.rev(node)
904 908
905 909 # check rev flags
906 910 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
907 911 raise RevlogError(_('incompatible revision flag %x') %
908 912 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
909 913
910 914 # build delta chain
911 915 chain = []
912 916 index = self.index # for performance
913 917 generaldelta = self._generaldelta
914 918 iterrev = rev
915 919 e = index[iterrev]
916 920 while iterrev != e[3] and iterrev != cachedrev:
917 921 chain.append(iterrev)
918 922 if generaldelta:
919 923 iterrev = e[3]
920 924 else:
921 925 iterrev -= 1
922 926 e = index[iterrev]
923 927 chain.reverse()
924 928 base = iterrev
925 929
926 930 if iterrev == cachedrev:
927 931 # cache hit
928 932 text = self._cache[2]
929 933
930 934 # drop cache to save memory
931 935 self._cache = None
932 936
933 937 self._chunkraw(base, rev)
934 938 if text is None:
935 939 text = str(self._chunkbase(base))
936 940
937 941 bins = [self._chunk(r) for r in chain]
938 942 text = mdiff.patches(text, bins)
939 943
940 944 text = self._checkhash(text, node, rev)
941 945
942 946 self._cache = (node, rev, text)
943 947 return text
944 948
945 949 def _checkhash(self, text, node, rev):
946 950 p1, p2 = self.parents(node)
947 951 if node != hash(text, p1, p2):
948 952 raise RevlogError(_("integrity check failed on %s:%d")
949 953 % (self.indexfile, rev))
950 954 return text
951 955
952 956 def checkinlinesize(self, tr, fp=None):
953 957 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
954 958 return
955 959
956 960 trinfo = tr.find(self.indexfile)
957 961 if trinfo is None:
958 962 raise RevlogError(_("%s not found in the transaction")
959 963 % self.indexfile)
960 964
961 965 trindex = trinfo[2]
962 966 dataoff = self.start(trindex)
963 967
964 968 tr.add(self.datafile, dataoff)
965 969
966 970 if fp:
967 971 fp.flush()
968 972 fp.close()
969 973
970 974 df = self.opener(self.datafile, 'w')
971 975 try:
972 976 for r in self:
973 977 df.write(self._chunkraw(r, r))
974 978 finally:
975 979 df.close()
976 980
977 981 fp = self.opener(self.indexfile, 'w', atomictemp=True)
978 982 self.version &= ~(REVLOGNGINLINEDATA)
979 983 self._inline = False
980 984 for i in self:
981 985 e = self._io.packentry(self.index[i], self.node, self.version, i)
982 986 fp.write(e)
983 987
984 988 # if we don't call close, the temp file will never replace the
985 989 # real index
986 990 fp.close()
987 991
988 992 tr.replace(self.indexfile, trindex * self._io.size)
989 993 self._chunkclear()
990 994
991 995 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
992 996 """add a revision to the log
993 997
994 998 text - the revision data to add
995 999 transaction - the transaction object used for rollback
996 1000 link - the linkrev data to add
997 1001 p1, p2 - the parent nodeids of the revision
998 1002 cachedelta - an optional precomputed delta
999 1003 """
1000 1004 node = hash(text, p1, p2)
1001 1005 if node in self.nodemap:
1002 1006 return node
1003 1007
1004 1008 dfh = None
1005 1009 if not self._inline:
1006 1010 dfh = self.opener(self.datafile, "a")
1007 1011 ifh = self.opener(self.indexfile, "a+")
1008 1012 try:
1009 1013 return self._addrevision(node, text, transaction, link, p1, p2,
1010 1014 cachedelta, ifh, dfh)
1011 1015 finally:
1012 1016 if dfh:
1013 1017 dfh.close()
1014 1018 ifh.close()
1015 1019
1016 1020 def _addrevision(self, node, text, transaction, link, p1, p2,
1017 1021 cachedelta, ifh, dfh):
1018 1022 """internal function to add revisions to the log
1019 1023
1020 1024 see addrevision for argument descriptions.
1021 1025 invariants:
1022 1026 - text is optional (can be None); if not set, cachedelta must be set.
1023 1027 if both are set, they must correspond to eachother.
1024 1028 """
1025 1029 btext = [text]
1026 1030 def buildtext():
1027 1031 if btext[0] is not None:
1028 1032 return btext[0]
1029 1033 # flush any pending writes here so we can read it in revision
1030 1034 if dfh:
1031 1035 dfh.flush()
1032 1036 ifh.flush()
1033 1037 basetext = self.revision(self.node(cachedelta[0]))
1034 1038 btext[0] = mdiff.patch(basetext, cachedelta[1])
1035 1039 chk = hash(btext[0], p1, p2)
1036 1040 if chk != node:
1037 1041 raise RevlogError(_("consistency error in delta"))
1038 1042 return btext[0]
1039 1043
1040 1044 def builddelta(rev):
1041 1045 # can we use the cached delta?
1042 1046 if cachedelta and cachedelta[0] == rev:
1043 1047 delta = cachedelta[1]
1044 1048 else:
1045 1049 t = buildtext()
1046 1050 ptext = self.revision(self.node(rev))
1047 1051 delta = mdiff.textdiff(ptext, t)
1048 1052 data = compress(delta)
1049 1053 l = len(data[1]) + len(data[0])
1050 1054 if basecache[0] == rev:
1051 1055 chainbase = basecache[1]
1052 1056 else:
1053 1057 chainbase = self.chainbase(rev)
1054 1058 dist = l + offset - self.start(chainbase)
1055 1059 if self._generaldelta:
1056 1060 base = rev
1057 1061 else:
1058 1062 base = chainbase
1059 1063 return dist, l, data, base, chainbase
1060 1064
1061 1065 curr = len(self)
1062 1066 prev = curr - 1
1063 1067 base = chainbase = curr
1064 1068 offset = self.end(prev)
1065 1069 flags = 0
1066 1070 d = None
1067 1071 basecache = self._basecache
1068 1072 p1r, p2r = self.rev(p1), self.rev(p2)
1069 1073
1070 1074 # should we try to build a delta?
1071 1075 if prev != nullrev:
1072 1076 if self._generaldelta:
1073 1077 if p1r >= basecache[1]:
1074 1078 d = builddelta(p1r)
1075 1079 elif p2r >= basecache[1]:
1076 1080 d = builddelta(p2r)
1077 1081 else:
1078 1082 d = builddelta(prev)
1079 1083 else:
1080 1084 d = builddelta(prev)
1081 1085 dist, l, data, base, chainbase = d
1082 1086
1083 1087 # full versions are inserted when the needed deltas
1084 1088 # become comparable to the uncompressed text
1085 1089 if text is None:
1086 1090 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1087 1091 cachedelta[1])
1088 1092 else:
1089 1093 textlen = len(text)
1090 1094 if d is None or dist > textlen * 2:
1091 1095 text = buildtext()
1092 1096 data = compress(text)
1093 1097 l = len(data[1]) + len(data[0])
1094 1098 base = chainbase = curr
1095 1099
1096 1100 e = (offset_type(offset, flags), l, textlen,
1097 1101 base, link, p1r, p2r, node)
1098 1102 self.index.insert(-1, e)
1099 1103 self.nodemap[node] = curr
1100 1104
1101 1105 entry = self._io.packentry(e, self.node, self.version, curr)
1102 1106 if not self._inline:
1103 1107 transaction.add(self.datafile, offset)
1104 1108 transaction.add(self.indexfile, curr * len(entry))
1105 1109 if data[0]:
1106 1110 dfh.write(data[0])
1107 1111 dfh.write(data[1])
1108 1112 dfh.flush()
1109 1113 ifh.write(entry)
1110 1114 else:
1111 1115 offset += curr * self._io.size
1112 1116 transaction.add(self.indexfile, offset, curr)
1113 1117 ifh.write(entry)
1114 1118 ifh.write(data[0])
1115 1119 ifh.write(data[1])
1116 1120 self.checkinlinesize(transaction, ifh)
1117 1121
1118 1122 if type(text) == str: # only accept immutable objects
1119 1123 self._cache = (node, curr, text)
1120 1124 self._basecache = (curr, chainbase)
1121 1125 return node
1122 1126
1123 1127 def group(self, nodelist, bundler, reorder=None):
1124 1128 """Calculate a delta group, yielding a sequence of changegroup chunks
1125 1129 (strings).
1126 1130
1127 1131 Given a list of changeset revs, return a set of deltas and
1128 1132 metadata corresponding to nodes. The first delta is
1129 1133 first parent(nodelist[0]) -> nodelist[0], the receiver is
1130 1134 guaranteed to have this parent as it has all history before
1131 1135 these changesets. In the case firstparent is nullrev the
1132 1136 changegroup starts with a full revision.
1133 1137 """
1134 1138
1135 1139 # if we don't have any revisions touched by these changesets, bail
1136 1140 if len(nodelist) == 0:
1137 1141 yield bundler.close()
1138 1142 return
1139 1143
1140 1144 # for generaldelta revlogs, we linearize the revs; this will both be
1141 1145 # much quicker and generate a much smaller bundle
1142 1146 if (self._generaldelta and reorder is not False) or reorder:
1143 1147 dag = dagutil.revlogdag(self)
1144 1148 revs = set(self.rev(n) for n in nodelist)
1145 1149 revs = dag.linearize(revs)
1146 1150 else:
1147 1151 revs = sorted([self.rev(n) for n in nodelist])
1148 1152
1149 1153 # add the parent of the first rev
1150 1154 p = self.parentrevs(revs[0])[0]
1151 1155 revs.insert(0, p)
1152 1156
1153 1157 # build deltas
1154 1158 for r in xrange(len(revs) - 1):
1155 1159 prev, curr = revs[r], revs[r + 1]
1156 1160 for c in bundler.revchunk(self, curr, prev):
1157 1161 yield c
1158 1162
1159 1163 yield bundler.close()
1160 1164
1161 1165 def addgroup(self, bundle, linkmapper, transaction):
1162 1166 """
1163 1167 add a delta group
1164 1168
1165 1169 given a set of deltas, add them to the revision log. the
1166 1170 first delta is against its parent, which should be in our
1167 1171 log, the rest are against the previous delta.
1168 1172 """
1169 1173
1170 1174 # track the base of the current delta log
1171 1175 content = []
1172 1176 node = None
1173 1177
1174 1178 r = len(self)
1175 1179 end = 0
1176 1180 if r:
1177 1181 end = self.end(r - 1)
1178 1182 ifh = self.opener(self.indexfile, "a+")
1179 1183 isize = r * self._io.size
1180 1184 if self._inline:
1181 1185 transaction.add(self.indexfile, end + isize, r)
1182 1186 dfh = None
1183 1187 else:
1184 1188 transaction.add(self.indexfile, isize, r)
1185 1189 transaction.add(self.datafile, end)
1186 1190 dfh = self.opener(self.datafile, "a")
1187 1191
1188 1192 try:
1189 1193 # loop through our set of deltas
1190 1194 chain = None
1191 1195 while True:
1192 1196 chunkdata = bundle.deltachunk(chain)
1193 1197 if not chunkdata:
1194 1198 break
1195 1199 node = chunkdata['node']
1196 1200 p1 = chunkdata['p1']
1197 1201 p2 = chunkdata['p2']
1198 1202 cs = chunkdata['cs']
1199 1203 deltabase = chunkdata['deltabase']
1200 1204 delta = chunkdata['delta']
1201 1205
1202 1206 content.append(node)
1203 1207
1204 1208 link = linkmapper(cs)
1205 1209 if node in self.nodemap:
1206 1210 # this can happen if two branches make the same change
1207 1211 chain = node
1208 1212 continue
1209 1213
1210 1214 for p in (p1, p2):
1211 1215 if p not in self.nodemap:
1212 1216 raise LookupError(p, self.indexfile,
1213 1217 _('unknown parent'))
1214 1218
1215 1219 if deltabase not in self.nodemap:
1216 1220 raise LookupError(deltabase, self.indexfile,
1217 1221 _('unknown delta base'))
1218 1222
1219 1223 baserev = self.rev(deltabase)
1220 1224 chain = self._addrevision(node, None, transaction, link,
1221 1225 p1, p2, (baserev, delta), ifh, dfh)
1222 1226 if not dfh and not self._inline:
1223 1227 # addrevision switched from inline to conventional
1224 1228 # reopen the index
1225 1229 ifh.close()
1226 1230 dfh = self.opener(self.datafile, "a")
1227 1231 ifh = self.opener(self.indexfile, "a")
1228 1232 finally:
1229 1233 if dfh:
1230 1234 dfh.close()
1231 1235 ifh.close()
1232 1236
1233 1237 return content
1234 1238
1235 1239 def strip(self, minlink, transaction):
1236 1240 """truncate the revlog on the first revision with a linkrev >= minlink
1237 1241
1238 1242 This function is called when we're stripping revision minlink and
1239 1243 its descendants from the repository.
1240 1244
1241 1245 We have to remove all revisions with linkrev >= minlink, because
1242 1246 the equivalent changelog revisions will be renumbered after the
1243 1247 strip.
1244 1248
1245 1249 So we truncate the revlog on the first of these revisions, and
1246 1250 trust that the caller has saved the revisions that shouldn't be
1247 1251 removed and that it'll re-add them after this truncation.
1248 1252 """
1249 1253 if len(self) == 0:
1250 1254 return
1251 1255
1252 1256 for rev in self:
1253 1257 if self.index[rev][4] >= minlink:
1254 1258 break
1255 1259 else:
1256 1260 return
1257 1261
1258 1262 # first truncate the files on disk
1259 1263 end = self.start(rev)
1260 1264 if not self._inline:
1261 1265 transaction.add(self.datafile, end)
1262 1266 end = rev * self._io.size
1263 1267 else:
1264 1268 end += rev * self._io.size
1265 1269
1266 1270 transaction.add(self.indexfile, end)
1267 1271
1268 1272 # then reset internal state in memory to forget those revisions
1269 1273 self._cache = None
1270 1274 self._chunkclear()
1271 1275 for x in xrange(rev, len(self)):
1272 1276 del self.nodemap[self.node(x)]
1273 1277
1274 1278 del self.index[rev:-1]
1275 1279
1276 1280 def checksize(self):
1277 1281 expected = 0
1278 1282 if len(self):
1279 1283 expected = max(0, self.end(len(self) - 1))
1280 1284
1281 1285 try:
1282 1286 f = self.opener(self.datafile)
1283 1287 f.seek(0, 2)
1284 1288 actual = f.tell()
1285 1289 f.close()
1286 1290 dd = actual - expected
1287 1291 except IOError, inst:
1288 1292 if inst.errno != errno.ENOENT:
1289 1293 raise
1290 1294 dd = 0
1291 1295
1292 1296 try:
1293 1297 f = self.opener(self.indexfile)
1294 1298 f.seek(0, 2)
1295 1299 actual = f.tell()
1296 1300 f.close()
1297 1301 s = self._io.size
1298 1302 i = max(0, actual // s)
1299 1303 di = actual - (i * s)
1300 1304 if self._inline:
1301 1305 databytes = 0
1302 1306 for r in self:
1303 1307 databytes += max(0, self.length(r))
1304 1308 dd = 0
1305 1309 di = actual - len(self) * s - databytes
1306 1310 except IOError, inst:
1307 1311 if inst.errno != errno.ENOENT:
1308 1312 raise
1309 1313 di = 0
1310 1314
1311 1315 return (dd, di)
1312 1316
1313 1317 def files(self):
1314 1318 res = [self.indexfile]
1315 1319 if not self._inline:
1316 1320 res.append(self.datafile)
1317 1321 return res
General Comments 0
You need to be logged in to leave comments. Login now