##// END OF EJS Templates
manifest.c: new extension code to lazily parse manifests...
Augie Fackler -
r24214:a5f1bccd default
parent child Browse files
Show More
This diff has been collapsed as it changes many lines, (841 lines changed) Show them Hide them
@@ -0,0 +1,841 b''
1 /*
2 * manifest.c - manifest type that does on-demand parsing.
3 *
4 * Copyright 2015, Google Inc.
5 *
6 * This software may be used and distributed according to the terms of
7 * the GNU General Public License, incorporated herein by reference.
8 */
9 #include <assert.h>
10 #include <string.h>
11 #include <stdlib.h>
12
13 #include <Python.h>
14
15 /* VC9 doesn't include bool and lacks stdbool.h based on my searching */
16 #ifdef _MSC_VER
17 #define true 1
18 #define false 0
19 typedef unsigned char bool;
20 #else
21 #include <stdbool.h>
22 #endif
23
24 #define DEFAULT_LINES 100000
25
26 typedef struct {
27 char *start;
28 Py_ssize_t len; /* length of line including terminal newline */
29 char hash_suffix;
30 bool from_malloc;
31 bool deleted;
32 } line;
33
34 typedef struct {
35 PyObject_HEAD
36 PyObject *pydata;
37 line *lines;
38 int numlines; /* number of line entries */
39 int livelines; /* number of non-deleted lines */
40 int maxlines; /* allocated number of lines */
41 bool dirty;
42 } lazymanifest;
43
44 #define MANIFEST_OOM -1
45 #define MANIFEST_NOT_SORTED -2
46 #define MANIFEST_MALFORMED -3
47
48 /* defined in parsers.c */
49 PyObject *unhexlify(const char *str, int len);
50
51 /* get the length of the path for a line */
52 static size_t pathlen(line *l) {
53 return strlen(l->start);
54 }
55
56 /* get the node value of a single line */
57 static PyObject *nodeof(line *l) {
58 char *s = l->start;
59 ssize_t llen = pathlen(l);
60 PyObject *hash = unhexlify(s + llen + 1, 40);
61 if (!hash) {
62 return NULL;
63 }
64 if (l->hash_suffix != '\0') {
65 char newhash[21];
66 memcpy(newhash, PyString_AsString(hash), 20);
67 Py_DECREF(hash);
68 newhash[20] = l->hash_suffix;
69 hash = PyString_FromStringAndSize(newhash, 21);
70 }
71 return hash;
72 }
73
74 /* get the node hash and flags of a line as a tuple */
75 static PyObject *hashflags(line *l)
76 {
77 char *s = l->start;
78 size_t plen = pathlen(l);
79 PyObject *hash = nodeof(l);
80
81 /* 40 for hash, 1 for null byte, 1 for newline */
82 size_t hplen = plen + 42;
83 Py_ssize_t flen = l->len - hplen;
84 PyObject *flags;
85 PyObject *tup;
86
87 if (!hash)
88 return NULL;
89 flags = PyString_FromStringAndSize(s + hplen - 1, flen);
90 if (!flags) {
91 Py_DECREF(hash);
92 return NULL;
93 }
94 tup = PyTuple_Pack(2, hash, flags);
95 Py_DECREF(flags);
96 Py_DECREF(hash);
97 return tup;
98 }
99
100 /* if we're about to run out of space in the line index, add more */
101 static bool realloc_if_full(lazymanifest *self)
102 {
103 if (self->numlines == self->maxlines) {
104 self->maxlines *= 2;
105 self->lines = realloc(self->lines, self->maxlines * sizeof(line));
106 }
107 return self->lines;
108 }
109
110 /*
111 * Find the line boundaries in the manifest that 'data' points to and store
112 * information about each line in 'self'.
113 */
114 static int find_lines(lazymanifest *self, char *data, Py_ssize_t len)
115 {
116 char *prev = NULL;
117 while (len > 0) {
118 line *l;
119 char *next = memchr(data, '\n', len);
120 if (!next) {
121 return MANIFEST_MALFORMED;
122 }
123 next++; /* advance past newline */
124 if (!realloc_if_full(self)) {
125 return MANIFEST_OOM; /* no memory */
126 }
127 if (prev && strcmp(prev, data) > -1) {
128 /* This data isn't sorted, so we have to abort. */
129 return MANIFEST_NOT_SORTED;
130 }
131 l = self->lines + ((self->numlines)++);
132 l->start = data;
133 l->len = next - data;
134 l->hash_suffix = '\0';
135 l->from_malloc = false;
136 l->deleted = false;
137 len = len - l->len;
138 prev = data;
139 data = next;
140 }
141 self->livelines = self->numlines;
142 return 0;
143 }
144
145 static int lazymanifest_init(lazymanifest *self, PyObject *args)
146 {
147 char *data;
148 Py_ssize_t len;
149 int err, ret;
150 PyObject *pydata;
151 if (!PyArg_ParseTuple(args, "S", &pydata)) {
152 return -1;
153 }
154 err = PyString_AsStringAndSize(pydata, &data, &len);
155
156 self->dirty = false;
157 if (err == -1)
158 return -1;
159 self->pydata = pydata;
160 Py_INCREF(self->pydata);
161 Py_BEGIN_ALLOW_THREADS
162 self->lines = malloc(DEFAULT_LINES * sizeof(line));
163 self->maxlines = DEFAULT_LINES;
164 self->numlines = 0;
165 if (!self->lines)
166 ret = MANIFEST_OOM;
167 else
168 ret = find_lines(self, data, len);
169 Py_END_ALLOW_THREADS
170 switch (ret) {
171 case 0:
172 break;
173 case MANIFEST_OOM:
174 PyErr_NoMemory();
175 break;
176 case MANIFEST_NOT_SORTED:
177 PyErr_Format(PyExc_ValueError,
178 "Manifest lines not in sorted order.");
179 break;
180 case MANIFEST_MALFORMED:
181 PyErr_Format(PyExc_ValueError,
182 "Manifest did not end in a newline.");
183 break;
184 default:
185 PyErr_Format(PyExc_ValueError,
186 "Unknown problem parsing manifest.");
187 }
188 return ret == 0 ? 0 : -1;
189 }
190
191 static void lazymanifest_dealloc(lazymanifest *self)
192 {
193 /* free any extra lines we had to allocate */
194 int i;
195 for (i = 0; i < self->numlines; i++) {
196 if (self->lines[i].from_malloc) {
197 free(self->lines[i].start);
198 }
199 }
200 if (self->lines) {
201 free(self->lines);
202 self->lines = NULL;
203 }
204 if (self->pydata) {
205 Py_DECREF(self->pydata);
206 self->pydata = NULL;
207 }
208 PyObject_Del(self);
209 }
210
211 /* iteration support */
212
213 typedef struct {
214 PyObject_HEAD lazymanifest *m;
215 Py_ssize_t pos;
216 } lmIter;
217
218 static void lmiter_dealloc(PyObject *o)
219 {
220 lmIter *self = (lmIter *)o;
221 Py_DECREF(self->m);
222 PyObject_Del(self);
223 }
224
225 static PyObject *lmiter_iternext(PyObject *o)
226 {
227 size_t pl;
228 line *l;
229 Py_ssize_t consumed;
230 PyObject *ret = NULL, *path = NULL, *hash = NULL, *flags = NULL;
231 lmIter *self = (lmIter *)o;
232 do {
233 self->pos++;
234 if (self->pos >= self->m->numlines) {
235 goto bail;
236 }
237 /* skip over deleted manifest entries */
238 } while (self->m->lines[self->pos].deleted);
239 l = self->m->lines + self->pos;
240 pl = pathlen(l);
241 path = PyString_FromStringAndSize(l->start, pl);
242 hash = nodeof(l);
243 consumed = pl + 41;
244 flags = PyString_FromStringAndSize(l->start + consumed,
245 l->len - consumed - 1);
246 if (!flags) {
247 goto bail;
248 }
249 ret = PyTuple_Pack(3, path, hash, flags);
250 bail:
251 Py_XDECREF(path);
252 Py_XDECREF(hash);
253 Py_XDECREF(flags);
254 return ret;
255 }
256
257 static PyTypeObject lazymanifestIterator = {
258 PyObject_HEAD_INIT(NULL)
259 0, /*ob_size */
260 "parsers.lazymanifest.iterator", /*tp_name */
261 sizeof(lmIter), /*tp_basicsize */
262 0, /*tp_itemsize */
263 lmiter_dealloc, /*tp_dealloc */
264 0, /*tp_print */
265 0, /*tp_getattr */
266 0, /*tp_setattr */
267 0, /*tp_compare */
268 0, /*tp_repr */
269 0, /*tp_as_number */
270 0, /*tp_as_sequence */
271 0, /*tp_as_mapping */
272 0, /*tp_hash */
273 0, /*tp_call */
274 0, /*tp_str */
275 0, /*tp_getattro */
276 0, /*tp_setattro */
277 0, /*tp_as_buffer */
278 /* tp_flags: Py_TPFLAGS_HAVE_ITER tells python to
279 use tp_iter and tp_iternext fields. */
280 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_ITER,
281 "Iterator for a lazymanifest.", /* tp_doc */
282 0, /* tp_traverse */
283 0, /* tp_clear */
284 0, /* tp_richcompare */
285 0, /* tp_weaklistoffset */
286 PyObject_SelfIter, /* tp_iter: __iter__() method */
287 lmiter_iternext, /* tp_iternext: next() method */
288 };
289
290 static lazymanifest *lazymanifest_copy(lazymanifest *self);
291
292 static PyObject *lazymanifest_getiter(lazymanifest *self)
293 {
294 lmIter *i = NULL;
295 lazymanifest *t = lazymanifest_copy(self);
296 if (!t) {
297 PyErr_NoMemory();
298 return NULL;
299 }
300 i = PyObject_New(lmIter, &lazymanifestIterator);
301 if (i) {
302 i->m = t;
303 i->pos = -1;
304 } else {
305 Py_DECREF(t);
306 PyErr_NoMemory();
307 }
308 return (PyObject *)i;
309 }
310
311 /* __getitem__ and __setitem__ support */
312
313 static Py_ssize_t lazymanifest_size(lazymanifest *self)
314 {
315 return self->livelines;
316 }
317
318 static int linecmp(const void *left, const void *right)
319 {
320 return strcmp(((const line *)left)->start,
321 ((const line *)right)->start);
322 }
323
324 static PyObject *lazymanifest_getitem(lazymanifest *self, PyObject *key)
325 {
326 line needle;
327 line *hit;
328 if (!PyString_Check(key)) {
329 PyErr_Format(PyExc_TypeError,
330 "getitem: manifest keys must be a string.");
331 return NULL;
332 }
333 needle.start = PyString_AsString(key);
334 hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
335 &linecmp);
336 if (!hit || hit->deleted) {
337 PyErr_Format(PyExc_KeyError, "No such manifest entry.");
338 return NULL;
339 }
340 return hashflags(hit);
341 }
342
343 static int lazymanifest_delitem(lazymanifest *self, PyObject *key)
344 {
345 line needle;
346 line *hit;
347 if (!PyString_Check(key)) {
348 PyErr_Format(PyExc_TypeError,
349 "delitem: manifest keys must be a string.");
350 return -1;
351 }
352 needle.start = PyString_AsString(key);
353 hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
354 &linecmp);
355 if (!hit || hit->deleted) {
356 PyErr_Format(PyExc_KeyError,
357 "Tried to delete nonexistent manifest entry.");
358 return -1;
359 }
360 self->dirty = true;
361 hit->deleted = true;
362 self->livelines--;
363 return 0;
364 }
365
366 static int lazymanifest_setitem(
367 lazymanifest *self, PyObject *key, PyObject *value)
368 {
369 char *path;
370 Py_ssize_t plen;
371 PyObject *pyhash;
372 Py_ssize_t hlen;
373 char *hash;
374 PyObject *pyflags;
375 char *flags;
376 Py_ssize_t flen;
377 size_t dlen;
378 char *dest;
379 int i;
380 line new;
381 line *hit;
382 if (!PyString_Check(key)) {
383 PyErr_Format(PyExc_TypeError,
384 "setitem: manifest keys must be a string.");
385 return -1;
386 }
387 if (!value) {
388 return lazymanifest_delitem(self, key);
389 }
390 if (!PyTuple_Check(value) || PyTuple_Size(value) != 2) {
391 PyErr_Format(PyExc_TypeError,
392 "Manifest values must be a tuple of (node, flags).");
393 return -1;
394 }
395 if (PyString_AsStringAndSize(key, &path, &plen) == -1) {
396 return -1;
397 }
398
399 pyhash = PyTuple_GetItem(value, 0);
400 if (!PyString_Check(pyhash)) {
401 PyErr_Format(PyExc_TypeError,
402 "node must be a 20-byte string");
403 return -1;
404 }
405 hlen = PyString_Size(pyhash);
406 /* Some parts of the codebase try and set 21 or 22
407 * byte "hash" values in order to perturb things for
408 * status. We have to preserve at least the 21st
409 * byte. Sigh. If there's a 22nd byte, we drop it on
410 * the floor, which works fine.
411 */
412 if (hlen != 20 && hlen != 21 && hlen != 22) {
413 PyErr_Format(PyExc_TypeError,
414 "node must be a 20-byte string");
415 return -1;
416 }
417 hash = PyString_AsString(pyhash);
418
419 pyflags = PyTuple_GetItem(value, 1);
420 if (!PyString_Check(pyflags) || PyString_Size(pyflags) > 1) {
421 PyErr_Format(PyExc_TypeError,
422 "flags must a 0 or 1 byte string");
423 return -1;
424 }
425 if (PyString_AsStringAndSize(pyflags, &flags, &flen) == -1) {
426 return -1;
427 }
428 /* one null byte and one newline */
429 dlen = plen + 41 + flen + 1;
430 dest = malloc(dlen);
431 if (!dest) {
432 PyErr_NoMemory();
433 return -1;
434 }
435 memcpy(dest, path, plen + 1);
436 for (i = 0; i < 20; i++) {
437 sprintf(dest + plen + 1 + (i * 2), "%02hhx", hash[i]);
438 }
439 memcpy(dest + plen + 41, flags, flen);
440 dest[plen + 41 + flen] = '\n';
441 new.start = dest;
442 new.len = dlen;
443 new.hash_suffix = '\0';
444 if (hlen > 20) {
445 new.hash_suffix = hash[20];
446 }
447 new.from_malloc = true; /* is `start` a pointer we allocated? */
448 new.deleted = false; /* is this entry deleted? */
449 hit = bsearch(&new, self->lines, self->numlines,
450 sizeof(line), &linecmp);
451 self->dirty = true;
452 if (hit) {
453 /* updating a line we already had */
454 if (hit->from_malloc) {
455 free(hit->start);
456 }
457 if (hit->deleted) {
458 self->livelines++;
459 }
460 *hit = new;
461 } else {
462 /* new line */
463 if (!realloc_if_full(self)) {
464 PyErr_NoMemory();
465 return -1;
466 }
467 self->lines[self->numlines++] = new;
468 self->livelines++;
469 /* TODO: do a binary search and insert rather than
470 * append and qsort. Also offer a batch-insert
471 * interface, which should be a nice little
472 * performance win.
473 */
474 qsort(self->lines, self->numlines, sizeof(line), &linecmp);
475 }
476 return 0;
477 }
478
479 static PyMappingMethods lazymanifest_mapping_methods = {
480 (lenfunc)lazymanifest_size, /* mp_length */
481 (binaryfunc)lazymanifest_getitem, /* mp_subscript */
482 (objobjargproc)lazymanifest_setitem, /* mp_ass_subscript */
483 };
484
485 /* sequence methods (important or __contains__ builds an iterator */
486
487 static int lazymanifest_contains(lazymanifest *self, PyObject *key)
488 {
489 line needle;
490 line *hit;
491 if (!PyString_Check(key)) {
492 /* Our keys are always strings, so if the contains
493 * check is for a non-string, just return false. */
494 return 0;
495 }
496 needle.start = PyString_AsString(key);
497 hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
498 &linecmp);
499 if (!hit || hit->deleted) {
500 return 0;
501 }
502 return 1;
503 }
504
505 static PySequenceMethods lazymanifest_seq_meths = {
506 (lenfunc)lazymanifest_size, /* sq_length */
507 0, /* sq_concat */
508 0, /* sq_repeat */
509 0, /* sq_item */
510 0, /* sq_slice */
511 0, /* sq_ass_item */
512 0, /* sq_ass_slice */
513 (objobjproc)lazymanifest_contains, /* sq_contains */
514 0, /* sq_inplace_concat */
515 0, /* sq_inplace_repeat */
516 };
517
518
519 /* Other methods (copy, diff, etc) */
520 static PyTypeObject lazymanifestType;
521
522 /* If the manifest has changes, build the new manifest text and reindex it. */
523 static int compact(lazymanifest *self) {
524 int i;
525 ssize_t need = 0;
526 char *data;
527 line *src, *dst;
528 PyObject *pydata;
529 if (!self->dirty)
530 return 0;
531 for (i = 0; i < self->numlines; i++) {
532 if (!self->lines[i].deleted) {
533 need += self->lines[i].len;
534 }
535 }
536 pydata = PyString_FromStringAndSize(NULL, need);
537 if (!pydata)
538 return -1;
539 data = PyString_AsString(pydata);
540 if (!data) {
541 return -1;
542 }
543 src = self->lines;
544 dst = self->lines;
545 for (i = 0; i < self->numlines; i++, src++) {
546 char *tofree = NULL;
547 if (src->from_malloc) {
548 tofree = src->start;
549 }
550 if (!src->deleted) {
551 memcpy(data, src->start, src->len);
552 *dst = *src;
553 dst->start = data;
554 dst->from_malloc = false;
555 data += dst->len;
556 dst++;
557 }
558 free(tofree);
559 }
560 Py_DECREF(self->pydata);
561 self->pydata = pydata;
562 self->numlines = self->livelines;
563 self->dirty = false;
564 return 0;
565 }
566
567 static PyObject *lazymanifest_text(lazymanifest *self)
568 {
569 if (compact(self) != 0) {
570 PyErr_NoMemory();
571 return NULL;
572 }
573 Py_INCREF(self->pydata);
574 return self->pydata;
575 }
576
577 static lazymanifest *lazymanifest_copy(lazymanifest *self)
578 {
579 lazymanifest *copy = NULL;
580 if (compact(self) != 0) {
581 goto nomem;
582 }
583 copy = PyObject_New(lazymanifest, &lazymanifestType);
584 if (!copy) {
585 goto nomem;
586 }
587 copy->numlines = self->numlines;
588 copy->livelines = self->livelines;
589 copy->dirty = false;
590 copy->lines = malloc(self->maxlines *sizeof(line));
591 if (!copy->lines) {
592 goto nomem;
593 }
594 memcpy(copy->lines, self->lines, self->numlines * sizeof(line));
595 copy->maxlines = self->maxlines;
596 copy->pydata = self->pydata;
597 Py_INCREF(copy->pydata);
598 return copy;
599 nomem:
600 PyErr_NoMemory();
601 Py_XDECREF(copy);
602 return NULL;
603 }
604
605 static lazymanifest *lazymanifest_filtercopy(
606 lazymanifest *self, PyObject *matchfn)
607 {
608 lazymanifest *copy = NULL;
609 int i;
610 if (!PyCallable_Check(matchfn)) {
611 PyErr_SetString(PyExc_TypeError, "matchfn must be callable");
612 return NULL;
613 }
614 /* compact ourselves first to avoid double-frees later when we
615 * compact tmp so that it doesn't have random pointers to our
616 * underlying from_malloc-data (self->pydata is safe) */
617 if (compact(self) != 0) {
618 goto nomem;
619 }
620 copy = PyObject_New(lazymanifest, &lazymanifestType);
621 copy->dirty = true;
622 copy->lines = malloc(self->maxlines * sizeof(line));
623 if (!copy->lines) {
624 goto nomem;
625 }
626 copy->maxlines = self->maxlines;
627 copy->numlines = 0;
628 copy->pydata = self->pydata;
629 Py_INCREF(self->pydata);
630 for (i = 0; i < self->numlines; i++) {
631 PyObject *arg = PyString_FromString(self->lines[i].start);
632 PyObject *arglist = PyTuple_Pack(1, arg);
633 PyObject *result = PyObject_CallObject(matchfn, arglist);
634 Py_DECREF(arglist);
635 Py_DECREF(arg);
636 /* if the callback raised an exception, just let it
637 * through and give up */
638 if (!result) {
639 free(copy->lines);
640 Py_DECREF(self->pydata);
641 return NULL;
642 }
643 if (PyObject_IsTrue(result)) {
644 assert(!(self->lines[i].from_malloc));
645 copy->lines[copy->numlines++] = self->lines[i];
646 }
647 Py_DECREF(result);
648 }
649 copy->livelines = copy->numlines;
650 return copy;
651 nomem:
652 PyErr_NoMemory();
653 Py_XDECREF(copy);
654 return NULL;
655 }
656
657 static PyObject *lazymanifest_diff(lazymanifest *self, PyObject *args)
658 {
659 lazymanifest *other;
660 PyObject *pyclean = NULL;
661 bool listclean;
662 PyObject *emptyTup = NULL, *ret = NULL;
663 PyObject *es;
664 int sneedle = 0, oneedle = 0;
665 if (!PyArg_ParseTuple(args, "O!|O", &lazymanifestType, &other, &pyclean)) {
666 return NULL;
667 }
668 listclean = (!pyclean) ? false : PyObject_IsTrue(pyclean);
669 es = PyString_FromString("");
670 if (!es) {
671 goto nomem;
672 }
673 emptyTup = PyTuple_Pack(2, Py_None, es);
674 Py_DECREF(es);
675 if (!emptyTup) {
676 goto nomem;
677 }
678 ret = PyDict_New();
679 if (!ret) {
680 goto nomem;
681 }
682 while (sneedle != self->numlines || oneedle != other->numlines) {
683 line *left = self->lines + sneedle;
684 line *right = other->lines + oneedle;
685 int result;
686 PyObject *key;
687 PyObject *outer;
688 /* If we're looking at a deleted entry and it's not
689 * the end of the manifest, just skip it. */
690 if (left->deleted && sneedle < self->numlines) {
691 sneedle++;
692 continue;
693 }
694 if (right->deleted && oneedle < other->numlines) {
695 oneedle++;
696 continue;
697 }
698 /* if we're at the end of either manifest, then we
699 * know the remaining items are adds so we can skip
700 * the strcmp. */
701 if (sneedle == self->numlines) {
702 result = 1;
703 } else if (oneedle == other->numlines) {
704 result = -1;
705 } else {
706 result = linecmp(left, right);
707 }
708 key = result <= 0 ?
709 PyString_FromString(left->start) :
710 PyString_FromString(right->start);
711 if (!key)
712 goto nomem;
713 if (result < 0) {
714 PyObject *l = hashflags(left);
715 if (!l) {
716 goto nomem;
717 }
718 outer = PyTuple_Pack(2, l, emptyTup);
719 Py_DECREF(l);
720 if (!outer) {
721 goto nomem;
722 }
723 PyDict_SetItem(ret, key, outer);
724 Py_DECREF(outer);
725 sneedle++;
726 } else if (result > 0) {
727 PyObject *r = hashflags(right);
728 if (!r) {
729 goto nomem;
730 }
731 outer = PyTuple_Pack(2, emptyTup, r);
732 Py_DECREF(r);
733 if (!outer) {
734 goto nomem;
735 }
736 PyDict_SetItem(ret, key, outer);
737 Py_DECREF(outer);
738 oneedle++;
739 } else {
740 /* file exists in both manifests */
741 if (left->len != right->len
742 || memcmp(left->start, right->start, left->len)
743 || left->hash_suffix != right->hash_suffix) {
744 PyObject *l = hashflags(left);
745 PyObject *r;
746 if (!l) {
747 goto nomem;
748 }
749 r = hashflags(right);
750 if (!r) {
751 Py_DECREF(l);
752 goto nomem;
753 }
754 outer = PyTuple_Pack(2, l, r);
755 Py_DECREF(l);
756 Py_DECREF(r);
757 if (!outer) {
758 goto nomem;
759 }
760 PyDict_SetItem(ret, key, outer);
761 Py_DECREF(outer);
762 } else if (listclean) {
763 PyDict_SetItem(ret, key, Py_None);
764 }
765 sneedle++;
766 oneedle++;
767 }
768 Py_DECREF(key);
769 }
770 Py_DECREF(emptyTup);
771 return ret;
772 nomem:
773 PyErr_NoMemory();
774 Py_XDECREF(ret);
775 Py_XDECREF(emptyTup);
776 return NULL;
777 }
778
779 static PyMethodDef lazymanifest_methods[] = {
780 {"copy", (PyCFunction)lazymanifest_copy, METH_NOARGS,
781 "Make a copy of this lazymanifest."},
782 {"filtercopy", (PyCFunction)lazymanifest_filtercopy, METH_O,
783 "Make a copy of this manifest filtered by matchfn."},
784 {"diff", (PyCFunction)lazymanifest_diff, METH_VARARGS,
785 "Compare this lazymanifest to another one."},
786 {"text", (PyCFunction)lazymanifest_text, METH_NOARGS,
787 "Encode this manifest to text."},
788 {NULL},
789 };
790
791 static PyTypeObject lazymanifestType = {
792 PyObject_HEAD_INIT(NULL)
793 0, /* ob_size */
794 "parsers.lazymanifest", /* tp_name */
795 sizeof(lazymanifest), /* tp_basicsize */
796 0, /* tp_itemsize */
797 (destructor)lazymanifest_dealloc, /* tp_dealloc */
798 0, /* tp_print */
799 0, /* tp_getattr */
800 0, /* tp_setattr */
801 0, /* tp_compare */
802 0, /* tp_repr */
803 0, /* tp_as_number */
804 &lazymanifest_seq_meths, /* tp_as_sequence */
805 &lazymanifest_mapping_methods, /* tp_as_mapping */
806 0, /* tp_hash */
807 0, /* tp_call */
808 0, /* tp_str */
809 0, /* tp_getattro */
810 0, /* tp_setattro */
811 0, /* tp_as_buffer */
812 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_SEQUENCE_IN, /* tp_flags */
813 "TODO(augie)", /* tp_doc */
814 0, /* tp_traverse */
815 0, /* tp_clear */
816 0, /* tp_richcompare */
817 0, /* tp_weaklistoffset */
818 (getiterfunc)lazymanifest_getiter, /* tp_iter */
819 0, /* tp_iternext */
820 lazymanifest_methods, /* tp_methods */
821 0, /* tp_members */
822 0, /* tp_getset */
823 0, /* tp_base */
824 0, /* tp_dict */
825 0, /* tp_descr_get */
826 0, /* tp_descr_set */
827 0, /* tp_dictoffset */
828 (initproc)lazymanifest_init, /* tp_init */
829 0, /* tp_alloc */
830 };
831
832 void manifest_module_init(PyObject * mod)
833 {
834 lazymanifestType.tp_new = PyType_GenericNew;
835 if (PyType_Ready(&lazymanifestType) < 0)
836 return;
837 Py_INCREF(&lazymanifestType);
838
839 PyModule_AddObject(mod, "lazymanifest",
840 (PyObject *)&lazymanifestType);
841 }
@@ -0,0 +1,221 b''
1 import binascii
2 import unittest
3 import itertools
4
5 import silenttestrunner
6
7 from mercurial import parsers
8
9 HASH_1 = '1' * 40
10 HASH_2 = 'f' * 40
11 HASH_3 = '1234567890abcdef0987654321deadbeef0fcafe'
12 A_SHORT_MANIFEST = (
13 'bar/baz/qux.py\0%(hash2)s%(flag2)s\n'
14 'foo\0%(hash1)s%(flag1)s\n'
15 ) % {'hash1': HASH_1,
16 'flag1': '',
17 'hash2': HASH_2,
18 'flag2': 'l',
19 }
20
21 HUGE_MANIFEST_ENTRIES = 200001
22
23 A_HUGE_MANIFEST = ''.join(sorted(
24 'file%d\0%s%s\n' % (i, h, f) for i, h, f in
25 itertools.izip(xrange(200001),
26 itertools.cycle((HASH_1, HASH_2)),
27 itertools.cycle(('', 'x', 'l')))))
28
29 class testmanifest(unittest.TestCase):
30
31 def assertIn(self, thing, container, msg=None):
32 # assertIn new in 2.7, use it if available, otherwise polyfill
33 sup = getattr(unittest.TestCase, 'assertIn', False)
34 if sup:
35 return sup(self, thing, container, msg=msg)
36 if not msg:
37 msg = 'Expected %r in %r' % (thing, container)
38 self.assert_(thing in container, msg)
39
40 def testEmptyManifest(self):
41 m = parsers.lazymanifest('')
42 self.assertEqual(0, len(m))
43 self.assertEqual([], list(m))
44
45 def testManifest(self):
46 m = parsers.lazymanifest(A_SHORT_MANIFEST)
47 want = [
48 ('bar/baz/qux.py', binascii.unhexlify(HASH_2), 'l'),
49 ('foo', binascii.unhexlify(HASH_1), ''),
50 ]
51 self.assertEqual(len(want), len(m))
52 self.assertEqual(want, list(m))
53 self.assertEqual((binascii.unhexlify(HASH_1), ''), m['foo'])
54 self.assertRaises(KeyError, lambda : m['wat'])
55 self.assertEqual((binascii.unhexlify(HASH_2), 'l'),
56 m['bar/baz/qux.py'])
57
58 def testSetItem(self):
59 want = binascii.unhexlify(HASH_1), ''
60
61 m = parsers.lazymanifest('')
62 m['a'] = want
63 self.assertIn('a', m)
64 self.assertEqual(want, m['a'])
65 self.assertEqual('a\0' + HASH_1 + '\n', m.text())
66
67 m = parsers.lazymanifest(A_SHORT_MANIFEST)
68 m['a'] = want
69 self.assertEqual(want, m['a'])
70 self.assertEqual('a\0' + HASH_1 + '\n' + A_SHORT_MANIFEST,
71 m.text())
72 m2 = m.copy()
73 del m
74 del m2 # make sure we don't double free() anything
75
76 def testCompaction(self):
77 unhex = binascii.unhexlify
78 h1, h2 = unhex(HASH_1), unhex(HASH_2)
79 m = parsers.lazymanifest(A_SHORT_MANIFEST)
80 m['alpha'] = h1, ''
81 m['beta'] = h2, ''
82 del m['foo']
83 want = 'alpha\0%s\nbar/baz/qux.py\0%sl\nbeta\0%s\n' % (
84 HASH_1, HASH_2, HASH_2)
85 self.assertEqual(want, m.text())
86 self.assertEqual(3, len(m))
87 self.assertEqual((h1, ''), m['alpha'])
88 self.assertEqual((h2, ''), m['beta'])
89 self.assertRaises(KeyError, lambda : m['foo'])
90 w = [('alpha', h1, ''), ('bar/baz/qux.py', h2, 'l'), ('beta', h2, '')]
91 self.assertEqual(w, list(m))
92
93 def testSetGetNodeSuffix(self):
94 clean = parsers.lazymanifest(A_SHORT_MANIFEST)
95 m = parsers.lazymanifest(A_SHORT_MANIFEST)
96 h, f = m['foo']
97 want = h + 'a', f
98 # Merge code wants to set 21-byte fake hashes at times
99 m['foo'] = want
100 self.assertEqual(want, m['foo'])
101 self.assertEqual([('bar/baz/qux.py', binascii.unhexlify(HASH_2), 'l'),
102 ('foo', binascii.unhexlify(HASH_1) + 'a', '')],
103 list(m))
104 # Sometimes it even tries a 22-byte fake hash, but we can
105 # return 21 and it'll work out
106 m['foo'] = want[0] + '+', f
107 self.assertEqual(want, m['foo'])
108 # make sure the suffix survives a copy
109 m2 = m.filtercopy(lambda x: x == 'foo')
110 self.assertEqual(want, m2['foo'])
111 self.assertEqual(1, len(m2))
112 self.assertEqual(('foo\0%s\n' % HASH_1), m2.text())
113 m2 = m.copy()
114 self.assertEqual(want, m2['foo'])
115 # suffix with iteration
116 self.assertEqual([('bar/baz/qux.py', binascii.unhexlify(HASH_2), 'l'),
117 ('foo', want[0], '')], list(m))
118 # shows up in diff
119 self.assertEqual({'foo': (want, (h, ''))}, m.diff(clean))
120 self.assertEqual({'foo': ((h, ''), want)}, clean.diff(m))
121
122 def testFilterCopyException(self):
123 m = parsers.lazymanifest(A_SHORT_MANIFEST)
124 def filt(path):
125 if path == 'foo':
126 assert False
127 return True
128 self.assertRaises(AssertionError, m.filtercopy, filt)
129
130 def testRemoveItem(self):
131 m = parsers.lazymanifest(A_SHORT_MANIFEST)
132 del m['foo']
133 self.assertRaises(KeyError, lambda : m['foo'])
134 self.assertEqual(1, len(m))
135 self.assertEqual(1, len(list(m)))
136
137 def testManifestDiff(self):
138 MISSING = (None, '')
139 addl = 'z-only-in-left\0' + HASH_1 + '\n'
140 addr = 'z-only-in-right\0' + HASH_2 + 'x\n'
141 left = parsers.lazymanifest(
142 A_SHORT_MANIFEST.replace(HASH_1, HASH_3 + 'x') + addl)
143 right = parsers.lazymanifest(A_SHORT_MANIFEST + addr)
144 want = {
145 'foo': ((binascii.unhexlify(HASH_3), 'x'),
146 (binascii.unhexlify(HASH_1), '')),
147 'z-only-in-left': ((binascii.unhexlify(HASH_1), ''), MISSING),
148 'z-only-in-right': (MISSING, (binascii.unhexlify(HASH_2), 'x')),
149 }
150 self.assertEqual(want, left.diff(right))
151
152 want = {
153 'bar/baz/qux.py': (MISSING, (binascii.unhexlify(HASH_2), 'l')),
154 'foo': (MISSING, (binascii.unhexlify(HASH_3), 'x')),
155 'z-only-in-left': (MISSING, (binascii.unhexlify(HASH_1), '')),
156 }
157 self.assertEqual(want, parsers.lazymanifest('').diff(left))
158
159 want = {
160 'bar/baz/qux.py': ((binascii.unhexlify(HASH_2), 'l'), MISSING),
161 'foo': ((binascii.unhexlify(HASH_3), 'x'), MISSING),
162 'z-only-in-left': ((binascii.unhexlify(HASH_1), ''), MISSING),
163 }
164 self.assertEqual(want, left.diff(parsers.lazymanifest('')))
165 copy = right.copy()
166 del copy['z-only-in-right']
167 del right['foo']
168 want = {
169 'foo': (MISSING, (binascii.unhexlify(HASH_1), '')),
170 'z-only-in-right': ((binascii.unhexlify(HASH_2), 'x'), MISSING),
171 }
172 self.assertEqual(want, right.diff(copy))
173
174 short = parsers.lazymanifest(A_SHORT_MANIFEST)
175 pruned = short.copy()
176 del pruned['foo']
177 want = {
178 'foo': ((binascii.unhexlify(HASH_1), ''), MISSING),
179 }
180 self.assertEqual(want, short.diff(pruned))
181 want = {
182 'foo': (MISSING, (binascii.unhexlify(HASH_1), '')),
183 }
184 self.assertEqual(want, pruned.diff(short))
185 want = {
186 'bar/baz/qux.py': None,
187 'foo': (MISSING, (binascii.unhexlify(HASH_1), '')),
188 }
189 self.assertEqual(want, pruned.diff(short, True))
190
191 def testReversedLines(self):
192 backwards = ''.join(
193 l + '\n' for l in reversed(A_SHORT_MANIFEST.split('\n')) if l)
194 try:
195 parsers.lazymanifest(backwards)
196 self.fail('Should have raised ValueError')
197 except ValueError, v:
198 self.assertIn('Manifest lines not in sorted order.', str(v))
199
200 def testNoTerminalNewline(self):
201 try:
202 parsers.lazymanifest(A_SHORT_MANIFEST + 'wat')
203 self.fail('Should have raised ValueError')
204 except ValueError, v:
205 self.assertIn('Manifest did not end in a newline.', str(v))
206
207 def testNoNewLineAtAll(self):
208 try:
209 parsers.lazymanifest('wat')
210 self.fail('Should have raised ValueError')
211 except ValueError, v:
212 self.assertIn('Manifest did not end in a newline.', str(v))
213
214 def testHugeManifest(self):
215 m = parsers.lazymanifest(A_HUGE_MANIFEST)
216 self.assertEqual(HUGE_MANIFEST_ENTRIES, len(m))
217 self.assertEqual(len(m), len(list(m)))
218
219
220 if __name__ == '__main__':
221 silenttestrunner.main(__name__)
@@ -1,2407 +1,2409 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #include <Python.h>
11 11 #include <ctype.h>
12 12 #include <stddef.h>
13 13 #include <string.h>
14 14
15 15 #include "util.h"
16 16
17 17 static char *versionerrortext = "Python minor version mismatch";
18 18
19 19 static int8_t hextable[256] = {
20 20 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
21 21 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
22 22 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
23 23 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, /* 0-9 */
24 24 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* A-F */
25 25 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
26 26 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* a-f */
27 27 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
28 28 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
29 29 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
30 30 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
31 31 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
32 32 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
33 33 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
34 34 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
35 35 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
36 36 };
37 37
38 38 static char lowertable[128] = {
39 39 '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
40 40 '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
41 41 '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
42 42 '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
43 43 '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
44 44 '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
45 45 '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
46 46 '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
47 47 '\x40',
48 48 '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', /* A-G */
49 49 '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', /* H-O */
50 50 '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', /* P-W */
51 51 '\x78', '\x79', '\x7a', /* X-Z */
52 52 '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
53 53 '\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67',
54 54 '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f',
55 55 '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77',
56 56 '\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f'
57 57 };
58 58
59 59 static inline int hexdigit(const char *p, Py_ssize_t off)
60 60 {
61 61 int8_t val = hextable[(unsigned char)p[off]];
62 62
63 63 if (val >= 0) {
64 64 return val;
65 65 }
66 66
67 67 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
68 68 return 0;
69 69 }
70 70
71 71 /*
72 72 * Turn a hex-encoded string into binary.
73 73 */
74 static PyObject *unhexlify(const char *str, int len)
74 PyObject *unhexlify(const char *str, int len)
75 75 {
76 76 PyObject *ret;
77 77 char *d;
78 78 int i;
79 79
80 80 ret = PyBytes_FromStringAndSize(NULL, len / 2);
81 81
82 82 if (!ret)
83 83 return NULL;
84 84
85 85 d = PyBytes_AsString(ret);
86 86
87 87 for (i = 0; i < len;) {
88 88 int hi = hexdigit(str, i++);
89 89 int lo = hexdigit(str, i++);
90 90 *d++ = (hi << 4) | lo;
91 91 }
92 92
93 93 return ret;
94 94 }
95 95
96 96 static PyObject *asciilower(PyObject *self, PyObject *args)
97 97 {
98 98 char *str, *newstr;
99 99 int i, len;
100 100 PyObject *newobj = NULL;
101 101
102 102 if (!PyArg_ParseTuple(args, "s#", &str, &len))
103 103 goto quit;
104 104
105 105 newobj = PyBytes_FromStringAndSize(NULL, len);
106 106 if (!newobj)
107 107 goto quit;
108 108
109 109 newstr = PyBytes_AS_STRING(newobj);
110 110
111 111 for (i = 0; i < len; i++) {
112 112 char c = str[i];
113 113 if (c & 0x80) {
114 114 PyObject *err = PyUnicodeDecodeError_Create(
115 115 "ascii", str, len, i, (i + 1),
116 116 "unexpected code byte");
117 117 PyErr_SetObject(PyExc_UnicodeDecodeError, err);
118 118 Py_XDECREF(err);
119 119 goto quit;
120 120 }
121 121 newstr[i] = lowertable[(unsigned char)c];
122 122 }
123 123
124 124 return newobj;
125 125 quit:
126 126 Py_XDECREF(newobj);
127 127 return NULL;
128 128 }
129 129
130 130 /*
131 131 * This code assumes that a manifest is stitched together with newline
132 132 * ('\n') characters.
133 133 */
134 134 static PyObject *parse_manifest(PyObject *self, PyObject *args)
135 135 {
136 136 PyObject *mfdict, *fdict;
137 137 char *str, *start, *end;
138 138 int len;
139 139
140 140 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
141 141 &PyDict_Type, &mfdict,
142 142 &PyDict_Type, &fdict,
143 143 &str, &len))
144 144 goto quit;
145 145
146 146 start = str;
147 147 end = str + len;
148 148 while (start < end) {
149 149 PyObject *file = NULL, *node = NULL;
150 150 PyObject *flags = NULL;
151 151 char *zero = NULL, *newline = NULL;
152 152 ptrdiff_t nlen;
153 153
154 154 zero = memchr(start, '\0', end - start);
155 155 if (!zero) {
156 156 PyErr_SetString(PyExc_ValueError,
157 157 "manifest entry has no separator");
158 158 goto quit;
159 159 }
160 160
161 161 newline = memchr(zero + 1, '\n', end - (zero + 1));
162 162 if (!newline) {
163 163 PyErr_SetString(PyExc_ValueError,
164 164 "manifest contains trailing garbage");
165 165 goto quit;
166 166 }
167 167
168 168 file = PyBytes_FromStringAndSize(start, zero - start);
169 169
170 170 if (!file)
171 171 goto bail;
172 172
173 173 nlen = newline - zero - 1;
174 174
175 175 node = unhexlify(zero + 1, nlen > 40 ? 40 : (int)nlen);
176 176 if (!node)
177 177 goto bail;
178 178
179 179 if (nlen > 40) {
180 180 flags = PyBytes_FromStringAndSize(zero + 41,
181 181 nlen - 40);
182 182 if (!flags)
183 183 goto bail;
184 184
185 185 if (PyDict_SetItem(fdict, file, flags) == -1)
186 186 goto bail;
187 187 }
188 188
189 189 if (PyDict_SetItem(mfdict, file, node) == -1)
190 190 goto bail;
191 191
192 192 start = newline + 1;
193 193
194 194 Py_XDECREF(flags);
195 195 Py_XDECREF(node);
196 196 Py_XDECREF(file);
197 197 continue;
198 198 bail:
199 199 Py_XDECREF(flags);
200 200 Py_XDECREF(node);
201 201 Py_XDECREF(file);
202 202 goto quit;
203 203 }
204 204
205 205 Py_INCREF(Py_None);
206 206 return Py_None;
207 207 quit:
208 208 return NULL;
209 209 }
210 210
211 211 static inline dirstateTupleObject *make_dirstate_tuple(char state, int mode,
212 212 int size, int mtime)
213 213 {
214 214 dirstateTupleObject *t = PyObject_New(dirstateTupleObject,
215 215 &dirstateTupleType);
216 216 if (!t)
217 217 return NULL;
218 218 t->state = state;
219 219 t->mode = mode;
220 220 t->size = size;
221 221 t->mtime = mtime;
222 222 return t;
223 223 }
224 224
225 225 static PyObject *dirstate_tuple_new(PyTypeObject *subtype, PyObject *args,
226 226 PyObject *kwds)
227 227 {
228 228 /* We do all the initialization here and not a tp_init function because
229 229 * dirstate_tuple is immutable. */
230 230 dirstateTupleObject *t;
231 231 char state;
232 232 int size, mode, mtime;
233 233 if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime))
234 234 return NULL;
235 235
236 236 t = (dirstateTupleObject *)subtype->tp_alloc(subtype, 1);
237 237 if (!t)
238 238 return NULL;
239 239 t->state = state;
240 240 t->mode = mode;
241 241 t->size = size;
242 242 t->mtime = mtime;
243 243
244 244 return (PyObject *)t;
245 245 }
246 246
247 247 static void dirstate_tuple_dealloc(PyObject *o)
248 248 {
249 249 PyObject_Del(o);
250 250 }
251 251
252 252 static Py_ssize_t dirstate_tuple_length(PyObject *o)
253 253 {
254 254 return 4;
255 255 }
256 256
257 257 static PyObject *dirstate_tuple_item(PyObject *o, Py_ssize_t i)
258 258 {
259 259 dirstateTupleObject *t = (dirstateTupleObject *)o;
260 260 switch (i) {
261 261 case 0:
262 262 return PyBytes_FromStringAndSize(&t->state, 1);
263 263 case 1:
264 264 return PyInt_FromLong(t->mode);
265 265 case 2:
266 266 return PyInt_FromLong(t->size);
267 267 case 3:
268 268 return PyInt_FromLong(t->mtime);
269 269 default:
270 270 PyErr_SetString(PyExc_IndexError, "index out of range");
271 271 return NULL;
272 272 }
273 273 }
274 274
275 275 static PySequenceMethods dirstate_tuple_sq = {
276 276 dirstate_tuple_length, /* sq_length */
277 277 0, /* sq_concat */
278 278 0, /* sq_repeat */
279 279 dirstate_tuple_item, /* sq_item */
280 280 0, /* sq_ass_item */
281 281 0, /* sq_contains */
282 282 0, /* sq_inplace_concat */
283 283 0 /* sq_inplace_repeat */
284 284 };
285 285
286 286 PyTypeObject dirstateTupleType = {
287 287 PyVarObject_HEAD_INIT(NULL, 0)
288 288 "dirstate_tuple", /* tp_name */
289 289 sizeof(dirstateTupleObject),/* tp_basicsize */
290 290 0, /* tp_itemsize */
291 291 (destructor)dirstate_tuple_dealloc, /* tp_dealloc */
292 292 0, /* tp_print */
293 293 0, /* tp_getattr */
294 294 0, /* tp_setattr */
295 295 0, /* tp_compare */
296 296 0, /* tp_repr */
297 297 0, /* tp_as_number */
298 298 &dirstate_tuple_sq, /* tp_as_sequence */
299 299 0, /* tp_as_mapping */
300 300 0, /* tp_hash */
301 301 0, /* tp_call */
302 302 0, /* tp_str */
303 303 0, /* tp_getattro */
304 304 0, /* tp_setattro */
305 305 0, /* tp_as_buffer */
306 306 Py_TPFLAGS_DEFAULT, /* tp_flags */
307 307 "dirstate tuple", /* tp_doc */
308 308 0, /* tp_traverse */
309 309 0, /* tp_clear */
310 310 0, /* tp_richcompare */
311 311 0, /* tp_weaklistoffset */
312 312 0, /* tp_iter */
313 313 0, /* tp_iternext */
314 314 0, /* tp_methods */
315 315 0, /* tp_members */
316 316 0, /* tp_getset */
317 317 0, /* tp_base */
318 318 0, /* tp_dict */
319 319 0, /* tp_descr_get */
320 320 0, /* tp_descr_set */
321 321 0, /* tp_dictoffset */
322 322 0, /* tp_init */
323 323 0, /* tp_alloc */
324 324 dirstate_tuple_new, /* tp_new */
325 325 };
326 326
327 327 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
328 328 {
329 329 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
330 330 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
331 331 char state, *cur, *str, *cpos;
332 332 int mode, size, mtime;
333 333 unsigned int flen, len, pos = 40;
334 334 int readlen;
335 335
336 336 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
337 337 &PyDict_Type, &dmap,
338 338 &PyDict_Type, &cmap,
339 339 &str, &readlen))
340 340 goto quit;
341 341
342 342 if (readlen < 0)
343 343 goto quit;
344 344
345 345 len = readlen;
346 346
347 347 /* read parents */
348 348 if (len < 40)
349 349 goto quit;
350 350
351 351 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
352 352 if (!parents)
353 353 goto quit;
354 354
355 355 /* read filenames */
356 356 while (pos >= 40 && pos < len) {
357 357 cur = str + pos;
358 358 /* unpack header */
359 359 state = *cur;
360 360 mode = getbe32(cur + 1);
361 361 size = getbe32(cur + 5);
362 362 mtime = getbe32(cur + 9);
363 363 flen = getbe32(cur + 13);
364 364 pos += 17;
365 365 cur += 17;
366 366 if (flen > len - pos) {
367 367 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
368 368 goto quit;
369 369 }
370 370
371 371 entry = (PyObject *)make_dirstate_tuple(state, mode, size,
372 372 mtime);
373 373 cpos = memchr(cur, 0, flen);
374 374 if (cpos) {
375 375 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
376 376 cname = PyBytes_FromStringAndSize(cpos + 1,
377 377 flen - (cpos - cur) - 1);
378 378 if (!fname || !cname ||
379 379 PyDict_SetItem(cmap, fname, cname) == -1 ||
380 380 PyDict_SetItem(dmap, fname, entry) == -1)
381 381 goto quit;
382 382 Py_DECREF(cname);
383 383 } else {
384 384 fname = PyBytes_FromStringAndSize(cur, flen);
385 385 if (!fname ||
386 386 PyDict_SetItem(dmap, fname, entry) == -1)
387 387 goto quit;
388 388 }
389 389 Py_DECREF(fname);
390 390 Py_DECREF(entry);
391 391 fname = cname = entry = NULL;
392 392 pos += flen;
393 393 }
394 394
395 395 ret = parents;
396 396 Py_INCREF(ret);
397 397 quit:
398 398 Py_XDECREF(fname);
399 399 Py_XDECREF(cname);
400 400 Py_XDECREF(entry);
401 401 Py_XDECREF(parents);
402 402 return ret;
403 403 }
404 404
405 405 /*
406 406 * Efficiently pack a dirstate object into its on-disk format.
407 407 */
408 408 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
409 409 {
410 410 PyObject *packobj = NULL;
411 411 PyObject *map, *copymap, *pl, *mtime_unset = NULL;
412 412 Py_ssize_t nbytes, pos, l;
413 413 PyObject *k, *v = NULL, *pn;
414 414 char *p, *s;
415 415 double now;
416 416
417 417 if (!PyArg_ParseTuple(args, "O!O!Od:pack_dirstate",
418 418 &PyDict_Type, &map, &PyDict_Type, &copymap,
419 419 &pl, &now))
420 420 return NULL;
421 421
422 422 if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) {
423 423 PyErr_SetString(PyExc_TypeError, "expected 2-element sequence");
424 424 return NULL;
425 425 }
426 426
427 427 /* Figure out how much we need to allocate. */
428 428 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
429 429 PyObject *c;
430 430 if (!PyString_Check(k)) {
431 431 PyErr_SetString(PyExc_TypeError, "expected string key");
432 432 goto bail;
433 433 }
434 434 nbytes += PyString_GET_SIZE(k) + 17;
435 435 c = PyDict_GetItem(copymap, k);
436 436 if (c) {
437 437 if (!PyString_Check(c)) {
438 438 PyErr_SetString(PyExc_TypeError,
439 439 "expected string key");
440 440 goto bail;
441 441 }
442 442 nbytes += PyString_GET_SIZE(c) + 1;
443 443 }
444 444 }
445 445
446 446 packobj = PyString_FromStringAndSize(NULL, nbytes);
447 447 if (packobj == NULL)
448 448 goto bail;
449 449
450 450 p = PyString_AS_STRING(packobj);
451 451
452 452 pn = PySequence_ITEM(pl, 0);
453 453 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
454 454 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
455 455 goto bail;
456 456 }
457 457 memcpy(p, s, l);
458 458 p += 20;
459 459 pn = PySequence_ITEM(pl, 1);
460 460 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
461 461 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
462 462 goto bail;
463 463 }
464 464 memcpy(p, s, l);
465 465 p += 20;
466 466
467 467 for (pos = 0; PyDict_Next(map, &pos, &k, &v); ) {
468 468 dirstateTupleObject *tuple;
469 469 char state;
470 470 uint32_t mode, size, mtime;
471 471 Py_ssize_t len, l;
472 472 PyObject *o;
473 473 char *t;
474 474
475 475 if (!dirstate_tuple_check(v)) {
476 476 PyErr_SetString(PyExc_TypeError,
477 477 "expected a dirstate tuple");
478 478 goto bail;
479 479 }
480 480 tuple = (dirstateTupleObject *)v;
481 481
482 482 state = tuple->state;
483 483 mode = tuple->mode;
484 484 size = tuple->size;
485 485 mtime = tuple->mtime;
486 486 if (state == 'n' && mtime == (uint32_t)now) {
487 487 /* See pure/parsers.py:pack_dirstate for why we do
488 488 * this. */
489 489 mtime = -1;
490 490 mtime_unset = (PyObject *)make_dirstate_tuple(
491 491 state, mode, size, mtime);
492 492 if (!mtime_unset)
493 493 goto bail;
494 494 if (PyDict_SetItem(map, k, mtime_unset) == -1)
495 495 goto bail;
496 496 Py_DECREF(mtime_unset);
497 497 mtime_unset = NULL;
498 498 }
499 499 *p++ = state;
500 500 putbe32(mode, p);
501 501 putbe32(size, p + 4);
502 502 putbe32(mtime, p + 8);
503 503 t = p + 12;
504 504 p += 16;
505 505 len = PyString_GET_SIZE(k);
506 506 memcpy(p, PyString_AS_STRING(k), len);
507 507 p += len;
508 508 o = PyDict_GetItem(copymap, k);
509 509 if (o) {
510 510 *p++ = '\0';
511 511 l = PyString_GET_SIZE(o);
512 512 memcpy(p, PyString_AS_STRING(o), l);
513 513 p += l;
514 514 len += l + 1;
515 515 }
516 516 putbe32((uint32_t)len, t);
517 517 }
518 518
519 519 pos = p - PyString_AS_STRING(packobj);
520 520 if (pos != nbytes) {
521 521 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
522 522 (long)pos, (long)nbytes);
523 523 goto bail;
524 524 }
525 525
526 526 return packobj;
527 527 bail:
528 528 Py_XDECREF(mtime_unset);
529 529 Py_XDECREF(packobj);
530 530 Py_XDECREF(v);
531 531 return NULL;
532 532 }
533 533
534 534 /*
535 535 * A base-16 trie for fast node->rev mapping.
536 536 *
537 537 * Positive value is index of the next node in the trie
538 538 * Negative value is a leaf: -(rev + 1)
539 539 * Zero is empty
540 540 */
541 541 typedef struct {
542 542 int children[16];
543 543 } nodetree;
544 544
545 545 /*
546 546 * This class has two behaviours.
547 547 *
548 548 * When used in a list-like way (with integer keys), we decode an
549 549 * entry in a RevlogNG index file on demand. Our last entry is a
550 550 * sentinel, always a nullid. We have limited support for
551 551 * integer-keyed insert and delete, only at elements right before the
552 552 * sentinel.
553 553 *
554 554 * With string keys, we lazily perform a reverse mapping from node to
555 555 * rev, using a base-16 trie.
556 556 */
557 557 typedef struct {
558 558 PyObject_HEAD
559 559 /* Type-specific fields go here. */
560 560 PyObject *data; /* raw bytes of index */
561 561 PyObject **cache; /* cached tuples */
562 562 const char **offsets; /* populated on demand */
563 563 Py_ssize_t raw_length; /* original number of elements */
564 564 Py_ssize_t length; /* current number of elements */
565 565 PyObject *added; /* populated on demand */
566 566 PyObject *headrevs; /* cache, invalidated on changes */
567 567 PyObject *filteredrevs;/* filtered revs set */
568 568 nodetree *nt; /* base-16 trie */
569 569 int ntlength; /* # nodes in use */
570 570 int ntcapacity; /* # nodes allocated */
571 571 int ntdepth; /* maximum depth of tree */
572 572 int ntsplits; /* # splits performed */
573 573 int ntrev; /* last rev scanned */
574 574 int ntlookups; /* # lookups */
575 575 int ntmisses; /* # lookups that miss the cache */
576 576 int inlined;
577 577 } indexObject;
578 578
579 579 static Py_ssize_t index_length(const indexObject *self)
580 580 {
581 581 if (self->added == NULL)
582 582 return self->length;
583 583 return self->length + PyList_GET_SIZE(self->added);
584 584 }
585 585
586 586 static PyObject *nullentry;
587 587 static const char nullid[20];
588 588
589 589 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
590 590
591 591 #if LONG_MAX == 0x7fffffffL
592 592 static char *tuple_format = "Kiiiiiis#";
593 593 #else
594 594 static char *tuple_format = "kiiiiiis#";
595 595 #endif
596 596
597 597 /* A RevlogNG v1 index entry is 64 bytes long. */
598 598 static const long v1_hdrsize = 64;
599 599
600 600 /*
601 601 * Return a pointer to the beginning of a RevlogNG record.
602 602 */
603 603 static const char *index_deref(indexObject *self, Py_ssize_t pos)
604 604 {
605 605 if (self->inlined && pos > 0) {
606 606 if (self->offsets == NULL) {
607 607 self->offsets = malloc(self->raw_length *
608 608 sizeof(*self->offsets));
609 609 if (self->offsets == NULL)
610 610 return (const char *)PyErr_NoMemory();
611 611 inline_scan(self, self->offsets);
612 612 }
613 613 return self->offsets[pos];
614 614 }
615 615
616 616 return PyString_AS_STRING(self->data) + pos * v1_hdrsize;
617 617 }
618 618
619 619 /*
620 620 * RevlogNG format (all in big endian, data may be inlined):
621 621 * 6 bytes: offset
622 622 * 2 bytes: flags
623 623 * 4 bytes: compressed length
624 624 * 4 bytes: uncompressed length
625 625 * 4 bytes: base revision
626 626 * 4 bytes: link revision
627 627 * 4 bytes: parent 1 revision
628 628 * 4 bytes: parent 2 revision
629 629 * 32 bytes: nodeid (only 20 bytes used)
630 630 */
631 631 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
632 632 {
633 633 uint64_t offset_flags;
634 634 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
635 635 const char *c_node_id;
636 636 const char *data;
637 637 Py_ssize_t length = index_length(self);
638 638 PyObject *entry;
639 639
640 640 if (pos < 0)
641 641 pos += length;
642 642
643 643 if (pos < 0 || pos >= length) {
644 644 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
645 645 return NULL;
646 646 }
647 647
648 648 if (pos == length - 1) {
649 649 Py_INCREF(nullentry);
650 650 return nullentry;
651 651 }
652 652
653 653 if (pos >= self->length - 1) {
654 654 PyObject *obj;
655 655 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
656 656 Py_INCREF(obj);
657 657 return obj;
658 658 }
659 659
660 660 if (self->cache) {
661 661 if (self->cache[pos]) {
662 662 Py_INCREF(self->cache[pos]);
663 663 return self->cache[pos];
664 664 }
665 665 } else {
666 666 self->cache = calloc(self->raw_length, sizeof(PyObject *));
667 667 if (self->cache == NULL)
668 668 return PyErr_NoMemory();
669 669 }
670 670
671 671 data = index_deref(self, pos);
672 672 if (data == NULL)
673 673 return NULL;
674 674
675 675 offset_flags = getbe32(data + 4);
676 676 if (pos == 0) /* mask out version number for the first entry */
677 677 offset_flags &= 0xFFFF;
678 678 else {
679 679 uint32_t offset_high = getbe32(data);
680 680 offset_flags |= ((uint64_t)offset_high) << 32;
681 681 }
682 682
683 683 comp_len = getbe32(data + 8);
684 684 uncomp_len = getbe32(data + 12);
685 685 base_rev = getbe32(data + 16);
686 686 link_rev = getbe32(data + 20);
687 687 parent_1 = getbe32(data + 24);
688 688 parent_2 = getbe32(data + 28);
689 689 c_node_id = data + 32;
690 690
691 691 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
692 692 uncomp_len, base_rev, link_rev,
693 693 parent_1, parent_2, c_node_id, 20);
694 694
695 695 if (entry) {
696 696 PyObject_GC_UnTrack(entry);
697 697 Py_INCREF(entry);
698 698 }
699 699
700 700 self->cache[pos] = entry;
701 701
702 702 return entry;
703 703 }
704 704
705 705 /*
706 706 * Return the 20-byte SHA of the node corresponding to the given rev.
707 707 */
708 708 static const char *index_node(indexObject *self, Py_ssize_t pos)
709 709 {
710 710 Py_ssize_t length = index_length(self);
711 711 const char *data;
712 712
713 713 if (pos == length - 1 || pos == INT_MAX)
714 714 return nullid;
715 715
716 716 if (pos >= length)
717 717 return NULL;
718 718
719 719 if (pos >= self->length - 1) {
720 720 PyObject *tuple, *str;
721 721 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
722 722 str = PyTuple_GetItem(tuple, 7);
723 723 return str ? PyString_AS_STRING(str) : NULL;
724 724 }
725 725
726 726 data = index_deref(self, pos);
727 727 return data ? data + 32 : NULL;
728 728 }
729 729
730 730 static int nt_insert(indexObject *self, const char *node, int rev);
731 731
732 732 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
733 733 {
734 734 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
735 735 return -1;
736 736 if (*nodelen == 20)
737 737 return 0;
738 738 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
739 739 return -1;
740 740 }
741 741
742 742 static PyObject *index_insert(indexObject *self, PyObject *args)
743 743 {
744 744 PyObject *obj;
745 745 char *node;
746 746 int index;
747 747 Py_ssize_t len, nodelen;
748 748
749 749 if (!PyArg_ParseTuple(args, "iO", &index, &obj))
750 750 return NULL;
751 751
752 752 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
753 753 PyErr_SetString(PyExc_TypeError, "8-tuple required");
754 754 return NULL;
755 755 }
756 756
757 757 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
758 758 return NULL;
759 759
760 760 len = index_length(self);
761 761
762 762 if (index < 0)
763 763 index += len;
764 764
765 765 if (index != len - 1) {
766 766 PyErr_SetString(PyExc_IndexError,
767 767 "insert only supported at index -1");
768 768 return NULL;
769 769 }
770 770
771 771 if (self->added == NULL) {
772 772 self->added = PyList_New(0);
773 773 if (self->added == NULL)
774 774 return NULL;
775 775 }
776 776
777 777 if (PyList_Append(self->added, obj) == -1)
778 778 return NULL;
779 779
780 780 if (self->nt)
781 781 nt_insert(self, node, index);
782 782
783 783 Py_CLEAR(self->headrevs);
784 784 Py_RETURN_NONE;
785 785 }
786 786
787 787 static void _index_clearcaches(indexObject *self)
788 788 {
789 789 if (self->cache) {
790 790 Py_ssize_t i;
791 791
792 792 for (i = 0; i < self->raw_length; i++)
793 793 Py_CLEAR(self->cache[i]);
794 794 free(self->cache);
795 795 self->cache = NULL;
796 796 }
797 797 if (self->offsets) {
798 798 free(self->offsets);
799 799 self->offsets = NULL;
800 800 }
801 801 if (self->nt) {
802 802 free(self->nt);
803 803 self->nt = NULL;
804 804 }
805 805 Py_CLEAR(self->headrevs);
806 806 }
807 807
808 808 static PyObject *index_clearcaches(indexObject *self)
809 809 {
810 810 _index_clearcaches(self);
811 811 self->ntlength = self->ntcapacity = 0;
812 812 self->ntdepth = self->ntsplits = 0;
813 813 self->ntrev = -1;
814 814 self->ntlookups = self->ntmisses = 0;
815 815 Py_RETURN_NONE;
816 816 }
817 817
818 818 static PyObject *index_stats(indexObject *self)
819 819 {
820 820 PyObject *obj = PyDict_New();
821 821 PyObject *t = NULL;
822 822
823 823 if (obj == NULL)
824 824 return NULL;
825 825
826 826 #define istat(__n, __d) \
827 827 t = PyInt_FromSsize_t(self->__n); \
828 828 if (!t) \
829 829 goto bail; \
830 830 if (PyDict_SetItemString(obj, __d, t) == -1) \
831 831 goto bail; \
832 832 Py_DECREF(t);
833 833
834 834 if (self->added) {
835 835 Py_ssize_t len = PyList_GET_SIZE(self->added);
836 836 t = PyInt_FromSsize_t(len);
837 837 if (!t)
838 838 goto bail;
839 839 if (PyDict_SetItemString(obj, "index entries added", t) == -1)
840 840 goto bail;
841 841 Py_DECREF(t);
842 842 }
843 843
844 844 if (self->raw_length != self->length - 1)
845 845 istat(raw_length, "revs on disk");
846 846 istat(length, "revs in memory");
847 847 istat(ntcapacity, "node trie capacity");
848 848 istat(ntdepth, "node trie depth");
849 849 istat(ntlength, "node trie count");
850 850 istat(ntlookups, "node trie lookups");
851 851 istat(ntmisses, "node trie misses");
852 852 istat(ntrev, "node trie last rev scanned");
853 853 istat(ntsplits, "node trie splits");
854 854
855 855 #undef istat
856 856
857 857 return obj;
858 858
859 859 bail:
860 860 Py_XDECREF(obj);
861 861 Py_XDECREF(t);
862 862 return NULL;
863 863 }
864 864
865 865 /*
866 866 * When we cache a list, we want to be sure the caller can't mutate
867 867 * the cached copy.
868 868 */
869 869 static PyObject *list_copy(PyObject *list)
870 870 {
871 871 Py_ssize_t len = PyList_GET_SIZE(list);
872 872 PyObject *newlist = PyList_New(len);
873 873 Py_ssize_t i;
874 874
875 875 if (newlist == NULL)
876 876 return NULL;
877 877
878 878 for (i = 0; i < len; i++) {
879 879 PyObject *obj = PyList_GET_ITEM(list, i);
880 880 Py_INCREF(obj);
881 881 PyList_SET_ITEM(newlist, i, obj);
882 882 }
883 883
884 884 return newlist;
885 885 }
886 886
887 887 /* arg should be Py_ssize_t but Python 2.4 do not support the n format */
888 888 static int check_filter(PyObject *filter, unsigned long arg) {
889 889 if (filter) {
890 890 PyObject *arglist, *result;
891 891 int isfiltered;
892 892
893 893 arglist = Py_BuildValue("(k)", arg);
894 894 if (!arglist) {
895 895 return -1;
896 896 }
897 897
898 898 result = PyEval_CallObject(filter, arglist);
899 899 Py_DECREF(arglist);
900 900 if (!result) {
901 901 return -1;
902 902 }
903 903
904 904 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
905 905 * same as this function, so we can just return it directly.*/
906 906 isfiltered = PyObject_IsTrue(result);
907 907 Py_DECREF(result);
908 908 return isfiltered;
909 909 } else {
910 910 return 0;
911 911 }
912 912 }
913 913
914 914 static PyObject *index_headrevs(indexObject *self, PyObject *args)
915 915 {
916 916 Py_ssize_t i, len, addlen;
917 917 char *nothead = NULL;
918 918 PyObject *heads = NULL;
919 919 PyObject *filter = NULL;
920 920 PyObject *filteredrevs = Py_None;
921 921
922 922 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
923 923 return NULL;
924 924 }
925 925
926 926 if (self->headrevs && filteredrevs == self->filteredrevs)
927 927 return list_copy(self->headrevs);
928 928
929 929 Py_DECREF(self->filteredrevs);
930 930 self->filteredrevs = filteredrevs;
931 931 Py_INCREF(filteredrevs);
932 932
933 933 if (filteredrevs != Py_None) {
934 934 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
935 935 if (!filter) {
936 936 PyErr_SetString(PyExc_TypeError,
937 937 "filteredrevs has no attribute __contains__");
938 938 goto bail;
939 939 }
940 940 }
941 941
942 942 len = index_length(self) - 1;
943 943 heads = PyList_New(0);
944 944 if (heads == NULL)
945 945 goto bail;
946 946 if (len == 0) {
947 947 PyObject *nullid = PyInt_FromLong(-1);
948 948 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
949 949 Py_XDECREF(nullid);
950 950 goto bail;
951 951 }
952 952 goto done;
953 953 }
954 954
955 955 nothead = calloc(len, 1);
956 956 if (nothead == NULL)
957 957 goto bail;
958 958
959 959 for (i = 0; i < self->raw_length; i++) {
960 960 const char *data;
961 961 int parent_1, parent_2, isfiltered;
962 962
963 963 isfiltered = check_filter(filter, i);
964 964 if (isfiltered == -1) {
965 965 PyErr_SetString(PyExc_TypeError,
966 966 "unable to check filter");
967 967 goto bail;
968 968 }
969 969
970 970 if (isfiltered) {
971 971 nothead[i] = 1;
972 972 continue;
973 973 }
974 974
975 975 data = index_deref(self, i);
976 976 parent_1 = getbe32(data + 24);
977 977 parent_2 = getbe32(data + 28);
978 978
979 979 if (parent_1 >= 0)
980 980 nothead[parent_1] = 1;
981 981 if (parent_2 >= 0)
982 982 nothead[parent_2] = 1;
983 983 }
984 984
985 985 addlen = self->added ? PyList_GET_SIZE(self->added) : 0;
986 986
987 987 for (i = 0; i < addlen; i++) {
988 988 PyObject *rev = PyList_GET_ITEM(self->added, i);
989 989 PyObject *p1 = PyTuple_GET_ITEM(rev, 5);
990 990 PyObject *p2 = PyTuple_GET_ITEM(rev, 6);
991 991 long parent_1, parent_2;
992 992 int isfiltered;
993 993
994 994 if (!PyInt_Check(p1) || !PyInt_Check(p2)) {
995 995 PyErr_SetString(PyExc_TypeError,
996 996 "revlog parents are invalid");
997 997 goto bail;
998 998 }
999 999
1000 1000 isfiltered = check_filter(filter, i);
1001 1001 if (isfiltered == -1) {
1002 1002 PyErr_SetString(PyExc_TypeError,
1003 1003 "unable to check filter");
1004 1004 goto bail;
1005 1005 }
1006 1006
1007 1007 if (isfiltered) {
1008 1008 nothead[i] = 1;
1009 1009 continue;
1010 1010 }
1011 1011
1012 1012 parent_1 = PyInt_AS_LONG(p1);
1013 1013 parent_2 = PyInt_AS_LONG(p2);
1014 1014 if (parent_1 >= 0)
1015 1015 nothead[parent_1] = 1;
1016 1016 if (parent_2 >= 0)
1017 1017 nothead[parent_2] = 1;
1018 1018 }
1019 1019
1020 1020 for (i = 0; i < len; i++) {
1021 1021 PyObject *head;
1022 1022
1023 1023 if (nothead[i])
1024 1024 continue;
1025 1025 head = PyInt_FromSsize_t(i);
1026 1026 if (head == NULL || PyList_Append(heads, head) == -1) {
1027 1027 Py_XDECREF(head);
1028 1028 goto bail;
1029 1029 }
1030 1030 }
1031 1031
1032 1032 done:
1033 1033 self->headrevs = heads;
1034 1034 Py_XDECREF(filter);
1035 1035 free(nothead);
1036 1036 return list_copy(self->headrevs);
1037 1037 bail:
1038 1038 Py_XDECREF(filter);
1039 1039 Py_XDECREF(heads);
1040 1040 free(nothead);
1041 1041 return NULL;
1042 1042 }
1043 1043
1044 1044 static inline int nt_level(const char *node, Py_ssize_t level)
1045 1045 {
1046 1046 int v = node[level>>1];
1047 1047 if (!(level & 1))
1048 1048 v >>= 4;
1049 1049 return v & 0xf;
1050 1050 }
1051 1051
1052 1052 /*
1053 1053 * Return values:
1054 1054 *
1055 1055 * -4: match is ambiguous (multiple candidates)
1056 1056 * -2: not found
1057 1057 * rest: valid rev
1058 1058 */
1059 1059 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
1060 1060 int hex)
1061 1061 {
1062 1062 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1063 1063 int level, maxlevel, off;
1064 1064
1065 1065 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
1066 1066 return -1;
1067 1067
1068 1068 if (self->nt == NULL)
1069 1069 return -2;
1070 1070
1071 1071 if (hex)
1072 1072 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
1073 1073 else
1074 1074 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
1075 1075
1076 1076 for (level = off = 0; level < maxlevel; level++) {
1077 1077 int k = getnybble(node, level);
1078 1078 nodetree *n = &self->nt[off];
1079 1079 int v = n->children[k];
1080 1080
1081 1081 if (v < 0) {
1082 1082 const char *n;
1083 1083 Py_ssize_t i;
1084 1084
1085 1085 v = -v - 1;
1086 1086 n = index_node(self, v);
1087 1087 if (n == NULL)
1088 1088 return -2;
1089 1089 for (i = level; i < maxlevel; i++)
1090 1090 if (getnybble(node, i) != nt_level(n, i))
1091 1091 return -2;
1092 1092 return v;
1093 1093 }
1094 1094 if (v == 0)
1095 1095 return -2;
1096 1096 off = v;
1097 1097 }
1098 1098 /* multiple matches against an ambiguous prefix */
1099 1099 return -4;
1100 1100 }
1101 1101
1102 1102 static int nt_new(indexObject *self)
1103 1103 {
1104 1104 if (self->ntlength == self->ntcapacity) {
1105 1105 self->ntcapacity *= 2;
1106 1106 self->nt = realloc(self->nt,
1107 1107 self->ntcapacity * sizeof(nodetree));
1108 1108 if (self->nt == NULL) {
1109 1109 PyErr_SetString(PyExc_MemoryError, "out of memory");
1110 1110 return -1;
1111 1111 }
1112 1112 memset(&self->nt[self->ntlength], 0,
1113 1113 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
1114 1114 }
1115 1115 return self->ntlength++;
1116 1116 }
1117 1117
1118 1118 static int nt_insert(indexObject *self, const char *node, int rev)
1119 1119 {
1120 1120 int level = 0;
1121 1121 int off = 0;
1122 1122
1123 1123 while (level < 40) {
1124 1124 int k = nt_level(node, level);
1125 1125 nodetree *n;
1126 1126 int v;
1127 1127
1128 1128 n = &self->nt[off];
1129 1129 v = n->children[k];
1130 1130
1131 1131 if (v == 0) {
1132 1132 n->children[k] = -rev - 1;
1133 1133 return 0;
1134 1134 }
1135 1135 if (v < 0) {
1136 1136 const char *oldnode = index_node(self, -v - 1);
1137 1137 int noff;
1138 1138
1139 1139 if (!oldnode || !memcmp(oldnode, node, 20)) {
1140 1140 n->children[k] = -rev - 1;
1141 1141 return 0;
1142 1142 }
1143 1143 noff = nt_new(self);
1144 1144 if (noff == -1)
1145 1145 return -1;
1146 1146 /* self->nt may have been changed by realloc */
1147 1147 self->nt[off].children[k] = noff;
1148 1148 off = noff;
1149 1149 n = &self->nt[off];
1150 1150 n->children[nt_level(oldnode, ++level)] = v;
1151 1151 if (level > self->ntdepth)
1152 1152 self->ntdepth = level;
1153 1153 self->ntsplits += 1;
1154 1154 } else {
1155 1155 level += 1;
1156 1156 off = v;
1157 1157 }
1158 1158 }
1159 1159
1160 1160 return -1;
1161 1161 }
1162 1162
1163 1163 static int nt_init(indexObject *self)
1164 1164 {
1165 1165 if (self->nt == NULL) {
1166 1166 if (self->raw_length > INT_MAX) {
1167 1167 PyErr_SetString(PyExc_ValueError, "overflow in nt_init");
1168 1168 return -1;
1169 1169 }
1170 1170 self->ntcapacity = self->raw_length < 4
1171 1171 ? 4 : (int)self->raw_length / 2;
1172 1172
1173 1173 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
1174 1174 if (self->nt == NULL) {
1175 1175 PyErr_NoMemory();
1176 1176 return -1;
1177 1177 }
1178 1178 self->ntlength = 1;
1179 1179 self->ntrev = (int)index_length(self) - 1;
1180 1180 self->ntlookups = 1;
1181 1181 self->ntmisses = 0;
1182 1182 if (nt_insert(self, nullid, INT_MAX) == -1)
1183 1183 return -1;
1184 1184 }
1185 1185 return 0;
1186 1186 }
1187 1187
1188 1188 /*
1189 1189 * Return values:
1190 1190 *
1191 1191 * -3: error (exception set)
1192 1192 * -2: not found (no exception set)
1193 1193 * rest: valid rev
1194 1194 */
1195 1195 static int index_find_node(indexObject *self,
1196 1196 const char *node, Py_ssize_t nodelen)
1197 1197 {
1198 1198 int rev;
1199 1199
1200 1200 self->ntlookups++;
1201 1201 rev = nt_find(self, node, nodelen, 0);
1202 1202 if (rev >= -1)
1203 1203 return rev;
1204 1204
1205 1205 if (nt_init(self) == -1)
1206 1206 return -3;
1207 1207
1208 1208 /*
1209 1209 * For the first handful of lookups, we scan the entire index,
1210 1210 * and cache only the matching nodes. This optimizes for cases
1211 1211 * like "hg tip", where only a few nodes are accessed.
1212 1212 *
1213 1213 * After that, we cache every node we visit, using a single
1214 1214 * scan amortized over multiple lookups. This gives the best
1215 1215 * bulk performance, e.g. for "hg log".
1216 1216 */
1217 1217 if (self->ntmisses++ < 4) {
1218 1218 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1219 1219 const char *n = index_node(self, rev);
1220 1220 if (n == NULL)
1221 1221 return -2;
1222 1222 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1223 1223 if (nt_insert(self, n, rev) == -1)
1224 1224 return -3;
1225 1225 break;
1226 1226 }
1227 1227 }
1228 1228 } else {
1229 1229 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1230 1230 const char *n = index_node(self, rev);
1231 1231 if (n == NULL) {
1232 1232 self->ntrev = rev + 1;
1233 1233 return -2;
1234 1234 }
1235 1235 if (nt_insert(self, n, rev) == -1) {
1236 1236 self->ntrev = rev + 1;
1237 1237 return -3;
1238 1238 }
1239 1239 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1240 1240 break;
1241 1241 }
1242 1242 }
1243 1243 self->ntrev = rev;
1244 1244 }
1245 1245
1246 1246 if (rev >= 0)
1247 1247 return rev;
1248 1248 return -2;
1249 1249 }
1250 1250
1251 1251 static PyObject *raise_revlog_error(void)
1252 1252 {
1253 1253 static PyObject *errclass;
1254 1254 PyObject *mod = NULL, *errobj;
1255 1255
1256 1256 if (errclass == NULL) {
1257 1257 PyObject *dict;
1258 1258
1259 1259 mod = PyImport_ImportModule("mercurial.error");
1260 1260 if (mod == NULL)
1261 1261 goto classfail;
1262 1262
1263 1263 dict = PyModule_GetDict(mod);
1264 1264 if (dict == NULL)
1265 1265 goto classfail;
1266 1266
1267 1267 errclass = PyDict_GetItemString(dict, "RevlogError");
1268 1268 if (errclass == NULL) {
1269 1269 PyErr_SetString(PyExc_SystemError,
1270 1270 "could not find RevlogError");
1271 1271 goto classfail;
1272 1272 }
1273 1273 Py_INCREF(errclass);
1274 1274 Py_DECREF(mod);
1275 1275 }
1276 1276
1277 1277 errobj = PyObject_CallFunction(errclass, NULL);
1278 1278 if (errobj == NULL)
1279 1279 return NULL;
1280 1280 PyErr_SetObject(errclass, errobj);
1281 1281 return errobj;
1282 1282
1283 1283 classfail:
1284 1284 Py_XDECREF(mod);
1285 1285 return NULL;
1286 1286 }
1287 1287
1288 1288 static PyObject *index_getitem(indexObject *self, PyObject *value)
1289 1289 {
1290 1290 char *node;
1291 1291 Py_ssize_t nodelen;
1292 1292 int rev;
1293 1293
1294 1294 if (PyInt_Check(value))
1295 1295 return index_get(self, PyInt_AS_LONG(value));
1296 1296
1297 1297 if (node_check(value, &node, &nodelen) == -1)
1298 1298 return NULL;
1299 1299 rev = index_find_node(self, node, nodelen);
1300 1300 if (rev >= -1)
1301 1301 return PyInt_FromLong(rev);
1302 1302 if (rev == -2)
1303 1303 raise_revlog_error();
1304 1304 return NULL;
1305 1305 }
1306 1306
1307 1307 static int nt_partialmatch(indexObject *self, const char *node,
1308 1308 Py_ssize_t nodelen)
1309 1309 {
1310 1310 int rev;
1311 1311
1312 1312 if (nt_init(self) == -1)
1313 1313 return -3;
1314 1314
1315 1315 if (self->ntrev > 0) {
1316 1316 /* ensure that the radix tree is fully populated */
1317 1317 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1318 1318 const char *n = index_node(self, rev);
1319 1319 if (n == NULL)
1320 1320 return -2;
1321 1321 if (nt_insert(self, n, rev) == -1)
1322 1322 return -3;
1323 1323 }
1324 1324 self->ntrev = rev;
1325 1325 }
1326 1326
1327 1327 return nt_find(self, node, nodelen, 1);
1328 1328 }
1329 1329
1330 1330 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1331 1331 {
1332 1332 const char *fullnode;
1333 1333 int nodelen;
1334 1334 char *node;
1335 1335 int rev, i;
1336 1336
1337 1337 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
1338 1338 return NULL;
1339 1339
1340 1340 if (nodelen < 4) {
1341 1341 PyErr_SetString(PyExc_ValueError, "key too short");
1342 1342 return NULL;
1343 1343 }
1344 1344
1345 1345 if (nodelen > 40) {
1346 1346 PyErr_SetString(PyExc_ValueError, "key too long");
1347 1347 return NULL;
1348 1348 }
1349 1349
1350 1350 for (i = 0; i < nodelen; i++)
1351 1351 hexdigit(node, i);
1352 1352 if (PyErr_Occurred()) {
1353 1353 /* input contains non-hex characters */
1354 1354 PyErr_Clear();
1355 1355 Py_RETURN_NONE;
1356 1356 }
1357 1357
1358 1358 rev = nt_partialmatch(self, node, nodelen);
1359 1359
1360 1360 switch (rev) {
1361 1361 case -4:
1362 1362 raise_revlog_error();
1363 1363 case -3:
1364 1364 return NULL;
1365 1365 case -2:
1366 1366 Py_RETURN_NONE;
1367 1367 case -1:
1368 1368 return PyString_FromStringAndSize(nullid, 20);
1369 1369 }
1370 1370
1371 1371 fullnode = index_node(self, rev);
1372 1372 if (fullnode == NULL) {
1373 1373 PyErr_Format(PyExc_IndexError,
1374 1374 "could not access rev %d", rev);
1375 1375 return NULL;
1376 1376 }
1377 1377 return PyString_FromStringAndSize(fullnode, 20);
1378 1378 }
1379 1379
1380 1380 static PyObject *index_m_get(indexObject *self, PyObject *args)
1381 1381 {
1382 1382 Py_ssize_t nodelen;
1383 1383 PyObject *val;
1384 1384 char *node;
1385 1385 int rev;
1386 1386
1387 1387 if (!PyArg_ParseTuple(args, "O", &val))
1388 1388 return NULL;
1389 1389 if (node_check(val, &node, &nodelen) == -1)
1390 1390 return NULL;
1391 1391 rev = index_find_node(self, node, nodelen);
1392 1392 if (rev == -3)
1393 1393 return NULL;
1394 1394 if (rev == -2)
1395 1395 Py_RETURN_NONE;
1396 1396 return PyInt_FromLong(rev);
1397 1397 }
1398 1398
1399 1399 static int index_contains(indexObject *self, PyObject *value)
1400 1400 {
1401 1401 char *node;
1402 1402 Py_ssize_t nodelen;
1403 1403
1404 1404 if (PyInt_Check(value)) {
1405 1405 long rev = PyInt_AS_LONG(value);
1406 1406 return rev >= -1 && rev < index_length(self);
1407 1407 }
1408 1408
1409 1409 if (node_check(value, &node, &nodelen) == -1)
1410 1410 return -1;
1411 1411
1412 1412 switch (index_find_node(self, node, nodelen)) {
1413 1413 case -3:
1414 1414 return -1;
1415 1415 case -2:
1416 1416 return 0;
1417 1417 default:
1418 1418 return 1;
1419 1419 }
1420 1420 }
1421 1421
1422 1422 static inline void index_get_parents(indexObject *self, int rev, int *ps)
1423 1423 {
1424 1424 if (rev >= self->length - 1) {
1425 1425 PyObject *tuple = PyList_GET_ITEM(self->added,
1426 1426 rev - self->length + 1);
1427 1427 ps[0] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 5));
1428 1428 ps[1] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 6));
1429 1429 } else {
1430 1430 const char *data = index_deref(self, rev);
1431 1431 ps[0] = getbe32(data + 24);
1432 1432 ps[1] = getbe32(data + 28);
1433 1433 }
1434 1434 }
1435 1435
1436 1436 typedef uint64_t bitmask;
1437 1437
1438 1438 /*
1439 1439 * Given a disjoint set of revs, return all candidates for the
1440 1440 * greatest common ancestor. In revset notation, this is the set
1441 1441 * "heads(::a and ::b and ...)"
1442 1442 */
1443 1443 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
1444 1444 int revcount)
1445 1445 {
1446 1446 const bitmask allseen = (1ull << revcount) - 1;
1447 1447 const bitmask poison = 1ull << revcount;
1448 1448 PyObject *gca = PyList_New(0);
1449 1449 int i, v, interesting;
1450 1450 int maxrev = -1;
1451 1451 bitmask sp;
1452 1452 bitmask *seen;
1453 1453
1454 1454 if (gca == NULL)
1455 1455 return PyErr_NoMemory();
1456 1456
1457 1457 for (i = 0; i < revcount; i++) {
1458 1458 if (revs[i] > maxrev)
1459 1459 maxrev = revs[i];
1460 1460 }
1461 1461
1462 1462 seen = calloc(sizeof(*seen), maxrev + 1);
1463 1463 if (seen == NULL) {
1464 1464 Py_DECREF(gca);
1465 1465 return PyErr_NoMemory();
1466 1466 }
1467 1467
1468 1468 for (i = 0; i < revcount; i++)
1469 1469 seen[revs[i]] = 1ull << i;
1470 1470
1471 1471 interesting = revcount;
1472 1472
1473 1473 for (v = maxrev; v >= 0 && interesting; v--) {
1474 1474 bitmask sv = seen[v];
1475 1475 int parents[2];
1476 1476
1477 1477 if (!sv)
1478 1478 continue;
1479 1479
1480 1480 if (sv < poison) {
1481 1481 interesting -= 1;
1482 1482 if (sv == allseen) {
1483 1483 PyObject *obj = PyInt_FromLong(v);
1484 1484 if (obj == NULL)
1485 1485 goto bail;
1486 1486 if (PyList_Append(gca, obj) == -1) {
1487 1487 Py_DECREF(obj);
1488 1488 goto bail;
1489 1489 }
1490 1490 sv |= poison;
1491 1491 for (i = 0; i < revcount; i++) {
1492 1492 if (revs[i] == v)
1493 1493 goto done;
1494 1494 }
1495 1495 }
1496 1496 }
1497 1497 index_get_parents(self, v, parents);
1498 1498
1499 1499 for (i = 0; i < 2; i++) {
1500 1500 int p = parents[i];
1501 1501 if (p == -1)
1502 1502 continue;
1503 1503 sp = seen[p];
1504 1504 if (sv < poison) {
1505 1505 if (sp == 0) {
1506 1506 seen[p] = sv;
1507 1507 interesting++;
1508 1508 }
1509 1509 else if (sp != sv)
1510 1510 seen[p] |= sv;
1511 1511 } else {
1512 1512 if (sp && sp < poison)
1513 1513 interesting--;
1514 1514 seen[p] = sv;
1515 1515 }
1516 1516 }
1517 1517 }
1518 1518
1519 1519 done:
1520 1520 free(seen);
1521 1521 return gca;
1522 1522 bail:
1523 1523 free(seen);
1524 1524 Py_XDECREF(gca);
1525 1525 return NULL;
1526 1526 }
1527 1527
1528 1528 /*
1529 1529 * Given a disjoint set of revs, return the subset with the longest
1530 1530 * path to the root.
1531 1531 */
1532 1532 static PyObject *find_deepest(indexObject *self, PyObject *revs)
1533 1533 {
1534 1534 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
1535 1535 static const Py_ssize_t capacity = 24;
1536 1536 int *depth, *interesting = NULL;
1537 1537 int i, j, v, ninteresting;
1538 1538 PyObject *dict = NULL, *keys = NULL;
1539 1539 long *seen = NULL;
1540 1540 int maxrev = -1;
1541 1541 long final;
1542 1542
1543 1543 if (revcount > capacity) {
1544 1544 PyErr_Format(PyExc_OverflowError,
1545 1545 "bitset size (%ld) > capacity (%ld)",
1546 1546 (long)revcount, (long)capacity);
1547 1547 return NULL;
1548 1548 }
1549 1549
1550 1550 for (i = 0; i < revcount; i++) {
1551 1551 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1552 1552 if (n > maxrev)
1553 1553 maxrev = n;
1554 1554 }
1555 1555
1556 1556 depth = calloc(sizeof(*depth), maxrev + 1);
1557 1557 if (depth == NULL)
1558 1558 return PyErr_NoMemory();
1559 1559
1560 1560 seen = calloc(sizeof(*seen), maxrev + 1);
1561 1561 if (seen == NULL) {
1562 1562 PyErr_NoMemory();
1563 1563 goto bail;
1564 1564 }
1565 1565
1566 1566 interesting = calloc(sizeof(*interesting), 2 << revcount);
1567 1567 if (interesting == NULL) {
1568 1568 PyErr_NoMemory();
1569 1569 goto bail;
1570 1570 }
1571 1571
1572 1572 if (PyList_Sort(revs) == -1)
1573 1573 goto bail;
1574 1574
1575 1575 for (i = 0; i < revcount; i++) {
1576 1576 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1577 1577 long b = 1l << i;
1578 1578 depth[n] = 1;
1579 1579 seen[n] = b;
1580 1580 interesting[b] = 1;
1581 1581 }
1582 1582
1583 1583 ninteresting = (int)revcount;
1584 1584
1585 1585 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
1586 1586 int dv = depth[v];
1587 1587 int parents[2];
1588 1588 long sv;
1589 1589
1590 1590 if (dv == 0)
1591 1591 continue;
1592 1592
1593 1593 sv = seen[v];
1594 1594 index_get_parents(self, v, parents);
1595 1595
1596 1596 for (i = 0; i < 2; i++) {
1597 1597 int p = parents[i];
1598 1598 long nsp, sp;
1599 1599 int dp;
1600 1600
1601 1601 if (p == -1)
1602 1602 continue;
1603 1603
1604 1604 dp = depth[p];
1605 1605 nsp = sp = seen[p];
1606 1606 if (dp <= dv) {
1607 1607 depth[p] = dv + 1;
1608 1608 if (sp != sv) {
1609 1609 interesting[sv] += 1;
1610 1610 nsp = seen[p] = sv;
1611 1611 if (sp) {
1612 1612 interesting[sp] -= 1;
1613 1613 if (interesting[sp] == 0)
1614 1614 ninteresting -= 1;
1615 1615 }
1616 1616 }
1617 1617 }
1618 1618 else if (dv == dp - 1) {
1619 1619 nsp = sp | sv;
1620 1620 if (nsp == sp)
1621 1621 continue;
1622 1622 seen[p] = nsp;
1623 1623 interesting[sp] -= 1;
1624 1624 if (interesting[sp] == 0 && interesting[nsp] > 0)
1625 1625 ninteresting -= 1;
1626 1626 interesting[nsp] += 1;
1627 1627 }
1628 1628 }
1629 1629 interesting[sv] -= 1;
1630 1630 if (interesting[sv] == 0)
1631 1631 ninteresting -= 1;
1632 1632 }
1633 1633
1634 1634 final = 0;
1635 1635 j = ninteresting;
1636 1636 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
1637 1637 if (interesting[i] == 0)
1638 1638 continue;
1639 1639 final |= i;
1640 1640 j -= 1;
1641 1641 }
1642 1642 if (final == 0) {
1643 1643 keys = PyList_New(0);
1644 1644 goto bail;
1645 1645 }
1646 1646
1647 1647 dict = PyDict_New();
1648 1648 if (dict == NULL)
1649 1649 goto bail;
1650 1650
1651 1651 for (i = 0; i < revcount; i++) {
1652 1652 PyObject *key;
1653 1653
1654 1654 if ((final & (1 << i)) == 0)
1655 1655 continue;
1656 1656
1657 1657 key = PyList_GET_ITEM(revs, i);
1658 1658 Py_INCREF(key);
1659 1659 Py_INCREF(Py_None);
1660 1660 if (PyDict_SetItem(dict, key, Py_None) == -1) {
1661 1661 Py_DECREF(key);
1662 1662 Py_DECREF(Py_None);
1663 1663 goto bail;
1664 1664 }
1665 1665 }
1666 1666
1667 1667 keys = PyDict_Keys(dict);
1668 1668
1669 1669 bail:
1670 1670 free(depth);
1671 1671 free(seen);
1672 1672 free(interesting);
1673 1673 Py_XDECREF(dict);
1674 1674
1675 1675 return keys;
1676 1676 }
1677 1677
1678 1678 /*
1679 1679 * Given a (possibly overlapping) set of revs, return all the
1680 1680 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
1681 1681 */
1682 1682 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
1683 1683 {
1684 1684 PyObject *ret = NULL;
1685 1685 Py_ssize_t argcount, i, len;
1686 1686 bitmask repeat = 0;
1687 1687 int revcount = 0;
1688 1688 int *revs;
1689 1689
1690 1690 argcount = PySequence_Length(args);
1691 1691 revs = malloc(argcount * sizeof(*revs));
1692 1692 if (argcount > 0 && revs == NULL)
1693 1693 return PyErr_NoMemory();
1694 1694 len = index_length(self) - 1;
1695 1695
1696 1696 for (i = 0; i < argcount; i++) {
1697 1697 static const int capacity = 24;
1698 1698 PyObject *obj = PySequence_GetItem(args, i);
1699 1699 bitmask x;
1700 1700 long val;
1701 1701
1702 1702 if (!PyInt_Check(obj)) {
1703 1703 PyErr_SetString(PyExc_TypeError,
1704 1704 "arguments must all be ints");
1705 1705 Py_DECREF(obj);
1706 1706 goto bail;
1707 1707 }
1708 1708 val = PyInt_AsLong(obj);
1709 1709 Py_DECREF(obj);
1710 1710 if (val == -1) {
1711 1711 ret = PyList_New(0);
1712 1712 goto done;
1713 1713 }
1714 1714 if (val < 0 || val >= len) {
1715 1715 PyErr_SetString(PyExc_IndexError,
1716 1716 "index out of range");
1717 1717 goto bail;
1718 1718 }
1719 1719 /* this cheesy bloom filter lets us avoid some more
1720 1720 * expensive duplicate checks in the common set-is-disjoint
1721 1721 * case */
1722 1722 x = 1ull << (val & 0x3f);
1723 1723 if (repeat & x) {
1724 1724 int k;
1725 1725 for (k = 0; k < revcount; k++) {
1726 1726 if (val == revs[k])
1727 1727 goto duplicate;
1728 1728 }
1729 1729 }
1730 1730 else repeat |= x;
1731 1731 if (revcount >= capacity) {
1732 1732 PyErr_Format(PyExc_OverflowError,
1733 1733 "bitset size (%d) > capacity (%d)",
1734 1734 revcount, capacity);
1735 1735 goto bail;
1736 1736 }
1737 1737 revs[revcount++] = (int)val;
1738 1738 duplicate:;
1739 1739 }
1740 1740
1741 1741 if (revcount == 0) {
1742 1742 ret = PyList_New(0);
1743 1743 goto done;
1744 1744 }
1745 1745 if (revcount == 1) {
1746 1746 PyObject *obj;
1747 1747 ret = PyList_New(1);
1748 1748 if (ret == NULL)
1749 1749 goto bail;
1750 1750 obj = PyInt_FromLong(revs[0]);
1751 1751 if (obj == NULL)
1752 1752 goto bail;
1753 1753 PyList_SET_ITEM(ret, 0, obj);
1754 1754 goto done;
1755 1755 }
1756 1756
1757 1757 ret = find_gca_candidates(self, revs, revcount);
1758 1758 if (ret == NULL)
1759 1759 goto bail;
1760 1760
1761 1761 done:
1762 1762 free(revs);
1763 1763 return ret;
1764 1764
1765 1765 bail:
1766 1766 free(revs);
1767 1767 Py_XDECREF(ret);
1768 1768 return NULL;
1769 1769 }
1770 1770
1771 1771 /*
1772 1772 * Given a (possibly overlapping) set of revs, return the greatest
1773 1773 * common ancestors: those with the longest path to the root.
1774 1774 */
1775 1775 static PyObject *index_ancestors(indexObject *self, PyObject *args)
1776 1776 {
1777 1777 PyObject *gca = index_commonancestorsheads(self, args);
1778 1778 if (gca == NULL)
1779 1779 return NULL;
1780 1780
1781 1781 if (PyList_GET_SIZE(gca) <= 1) {
1782 1782 Py_INCREF(gca);
1783 1783 return gca;
1784 1784 }
1785 1785
1786 1786 return find_deepest(self, gca);
1787 1787 }
1788 1788
1789 1789 /*
1790 1790 * Invalidate any trie entries introduced by added revs.
1791 1791 */
1792 1792 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
1793 1793 {
1794 1794 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
1795 1795
1796 1796 for (i = start; i < len; i++) {
1797 1797 PyObject *tuple = PyList_GET_ITEM(self->added, i);
1798 1798 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
1799 1799
1800 1800 nt_insert(self, PyString_AS_STRING(node), -1);
1801 1801 }
1802 1802
1803 1803 if (start == 0)
1804 1804 Py_CLEAR(self->added);
1805 1805 }
1806 1806
1807 1807 /*
1808 1808 * Delete a numeric range of revs, which must be at the end of the
1809 1809 * range, but exclude the sentinel nullid entry.
1810 1810 */
1811 1811 static int index_slice_del(indexObject *self, PyObject *item)
1812 1812 {
1813 1813 Py_ssize_t start, stop, step, slicelength;
1814 1814 Py_ssize_t length = index_length(self);
1815 1815 int ret = 0;
1816 1816
1817 1817 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
1818 1818 &start, &stop, &step, &slicelength) < 0)
1819 1819 return -1;
1820 1820
1821 1821 if (slicelength <= 0)
1822 1822 return 0;
1823 1823
1824 1824 if ((step < 0 && start < stop) || (step > 0 && start > stop))
1825 1825 stop = start;
1826 1826
1827 1827 if (step < 0) {
1828 1828 stop = start + 1;
1829 1829 start = stop + step*(slicelength - 1) - 1;
1830 1830 step = -step;
1831 1831 }
1832 1832
1833 1833 if (step != 1) {
1834 1834 PyErr_SetString(PyExc_ValueError,
1835 1835 "revlog index delete requires step size of 1");
1836 1836 return -1;
1837 1837 }
1838 1838
1839 1839 if (stop != length - 1) {
1840 1840 PyErr_SetString(PyExc_IndexError,
1841 1841 "revlog index deletion indices are invalid");
1842 1842 return -1;
1843 1843 }
1844 1844
1845 1845 if (start < self->length - 1) {
1846 1846 if (self->nt) {
1847 1847 Py_ssize_t i;
1848 1848
1849 1849 for (i = start + 1; i < self->length - 1; i++) {
1850 1850 const char *node = index_node(self, i);
1851 1851
1852 1852 if (node)
1853 1853 nt_insert(self, node, -1);
1854 1854 }
1855 1855 if (self->added)
1856 1856 nt_invalidate_added(self, 0);
1857 1857 if (self->ntrev > start)
1858 1858 self->ntrev = (int)start;
1859 1859 }
1860 1860 self->length = start + 1;
1861 1861 if (start < self->raw_length) {
1862 1862 if (self->cache) {
1863 1863 Py_ssize_t i;
1864 1864 for (i = start; i < self->raw_length; i++)
1865 1865 Py_CLEAR(self->cache[i]);
1866 1866 }
1867 1867 self->raw_length = start;
1868 1868 }
1869 1869 goto done;
1870 1870 }
1871 1871
1872 1872 if (self->nt) {
1873 1873 nt_invalidate_added(self, start - self->length + 1);
1874 1874 if (self->ntrev > start)
1875 1875 self->ntrev = (int)start;
1876 1876 }
1877 1877 if (self->added)
1878 1878 ret = PyList_SetSlice(self->added, start - self->length + 1,
1879 1879 PyList_GET_SIZE(self->added), NULL);
1880 1880 done:
1881 1881 Py_CLEAR(self->headrevs);
1882 1882 return ret;
1883 1883 }
1884 1884
1885 1885 /*
1886 1886 * Supported ops:
1887 1887 *
1888 1888 * slice deletion
1889 1889 * string assignment (extend node->rev mapping)
1890 1890 * string deletion (shrink node->rev mapping)
1891 1891 */
1892 1892 static int index_assign_subscript(indexObject *self, PyObject *item,
1893 1893 PyObject *value)
1894 1894 {
1895 1895 char *node;
1896 1896 Py_ssize_t nodelen;
1897 1897 long rev;
1898 1898
1899 1899 if (PySlice_Check(item) && value == NULL)
1900 1900 return index_slice_del(self, item);
1901 1901
1902 1902 if (node_check(item, &node, &nodelen) == -1)
1903 1903 return -1;
1904 1904
1905 1905 if (value == NULL)
1906 1906 return self->nt ? nt_insert(self, node, -1) : 0;
1907 1907 rev = PyInt_AsLong(value);
1908 1908 if (rev > INT_MAX || rev < 0) {
1909 1909 if (!PyErr_Occurred())
1910 1910 PyErr_SetString(PyExc_ValueError, "rev out of range");
1911 1911 return -1;
1912 1912 }
1913 1913
1914 1914 if (nt_init(self) == -1)
1915 1915 return -1;
1916 1916 return nt_insert(self, node, (int)rev);
1917 1917 }
1918 1918
1919 1919 /*
1920 1920 * Find all RevlogNG entries in an index that has inline data. Update
1921 1921 * the optional "offsets" table with those entries.
1922 1922 */
1923 1923 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
1924 1924 {
1925 1925 const char *data = PyString_AS_STRING(self->data);
1926 1926 Py_ssize_t pos = 0;
1927 1927 Py_ssize_t end = PyString_GET_SIZE(self->data);
1928 1928 long incr = v1_hdrsize;
1929 1929 Py_ssize_t len = 0;
1930 1930
1931 1931 while (pos + v1_hdrsize <= end && pos >= 0) {
1932 1932 uint32_t comp_len;
1933 1933 /* 3rd element of header is length of compressed inline data */
1934 1934 comp_len = getbe32(data + pos + 8);
1935 1935 incr = v1_hdrsize + comp_len;
1936 1936 if (offsets)
1937 1937 offsets[len] = data + pos;
1938 1938 len++;
1939 1939 pos += incr;
1940 1940 }
1941 1941
1942 1942 if (pos != end) {
1943 1943 if (!PyErr_Occurred())
1944 1944 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1945 1945 return -1;
1946 1946 }
1947 1947
1948 1948 return len;
1949 1949 }
1950 1950
1951 1951 static int index_init(indexObject *self, PyObject *args)
1952 1952 {
1953 1953 PyObject *data_obj, *inlined_obj;
1954 1954 Py_ssize_t size;
1955 1955
1956 1956 /* Initialize before argument-checking to avoid index_dealloc() crash. */
1957 1957 self->raw_length = 0;
1958 1958 self->added = NULL;
1959 1959 self->cache = NULL;
1960 1960 self->data = NULL;
1961 1961 self->headrevs = NULL;
1962 1962 self->filteredrevs = Py_None;
1963 1963 Py_INCREF(Py_None);
1964 1964 self->nt = NULL;
1965 1965 self->offsets = NULL;
1966 1966
1967 1967 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
1968 1968 return -1;
1969 1969 if (!PyString_Check(data_obj)) {
1970 1970 PyErr_SetString(PyExc_TypeError, "data is not a string");
1971 1971 return -1;
1972 1972 }
1973 1973 size = PyString_GET_SIZE(data_obj);
1974 1974
1975 1975 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
1976 1976 self->data = data_obj;
1977 1977
1978 1978 self->ntlength = self->ntcapacity = 0;
1979 1979 self->ntdepth = self->ntsplits = 0;
1980 1980 self->ntlookups = self->ntmisses = 0;
1981 1981 self->ntrev = -1;
1982 1982 Py_INCREF(self->data);
1983 1983
1984 1984 if (self->inlined) {
1985 1985 Py_ssize_t len = inline_scan(self, NULL);
1986 1986 if (len == -1)
1987 1987 goto bail;
1988 1988 self->raw_length = len;
1989 1989 self->length = len + 1;
1990 1990 } else {
1991 1991 if (size % v1_hdrsize) {
1992 1992 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1993 1993 goto bail;
1994 1994 }
1995 1995 self->raw_length = size / v1_hdrsize;
1996 1996 self->length = self->raw_length + 1;
1997 1997 }
1998 1998
1999 1999 return 0;
2000 2000 bail:
2001 2001 return -1;
2002 2002 }
2003 2003
2004 2004 static PyObject *index_nodemap(indexObject *self)
2005 2005 {
2006 2006 Py_INCREF(self);
2007 2007 return (PyObject *)self;
2008 2008 }
2009 2009
2010 2010 static void index_dealloc(indexObject *self)
2011 2011 {
2012 2012 _index_clearcaches(self);
2013 2013 Py_XDECREF(self->filteredrevs);
2014 2014 Py_XDECREF(self->data);
2015 2015 Py_XDECREF(self->added);
2016 2016 PyObject_Del(self);
2017 2017 }
2018 2018
2019 2019 static PySequenceMethods index_sequence_methods = {
2020 2020 (lenfunc)index_length, /* sq_length */
2021 2021 0, /* sq_concat */
2022 2022 0, /* sq_repeat */
2023 2023 (ssizeargfunc)index_get, /* sq_item */
2024 2024 0, /* sq_slice */
2025 2025 0, /* sq_ass_item */
2026 2026 0, /* sq_ass_slice */
2027 2027 (objobjproc)index_contains, /* sq_contains */
2028 2028 };
2029 2029
2030 2030 static PyMappingMethods index_mapping_methods = {
2031 2031 (lenfunc)index_length, /* mp_length */
2032 2032 (binaryfunc)index_getitem, /* mp_subscript */
2033 2033 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
2034 2034 };
2035 2035
2036 2036 static PyMethodDef index_methods[] = {
2037 2037 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
2038 2038 "return the gca set of the given revs"},
2039 2039 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
2040 2040 METH_VARARGS,
2041 2041 "return the heads of the common ancestors of the given revs"},
2042 2042 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
2043 2043 "clear the index caches"},
2044 2044 {"get", (PyCFunction)index_m_get, METH_VARARGS,
2045 2045 "get an index entry"},
2046 2046 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
2047 2047 "get head revisions"}, /* Can do filtering since 3.2 */
2048 2048 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
2049 2049 "get filtered head revisions"}, /* Can always do filtering */
2050 2050 {"insert", (PyCFunction)index_insert, METH_VARARGS,
2051 2051 "insert an index entry"},
2052 2052 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2053 2053 "match a potentially ambiguous node ID"},
2054 2054 {"stats", (PyCFunction)index_stats, METH_NOARGS,
2055 2055 "stats for the index"},
2056 2056 {NULL} /* Sentinel */
2057 2057 };
2058 2058
2059 2059 static PyGetSetDef index_getset[] = {
2060 2060 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2061 2061 {NULL} /* Sentinel */
2062 2062 };
2063 2063
2064 2064 static PyTypeObject indexType = {
2065 2065 PyObject_HEAD_INIT(NULL)
2066 2066 0, /* ob_size */
2067 2067 "parsers.index", /* tp_name */
2068 2068 sizeof(indexObject), /* tp_basicsize */
2069 2069 0, /* tp_itemsize */
2070 2070 (destructor)index_dealloc, /* tp_dealloc */
2071 2071 0, /* tp_print */
2072 2072 0, /* tp_getattr */
2073 2073 0, /* tp_setattr */
2074 2074 0, /* tp_compare */
2075 2075 0, /* tp_repr */
2076 2076 0, /* tp_as_number */
2077 2077 &index_sequence_methods, /* tp_as_sequence */
2078 2078 &index_mapping_methods, /* tp_as_mapping */
2079 2079 0, /* tp_hash */
2080 2080 0, /* tp_call */
2081 2081 0, /* tp_str */
2082 2082 0, /* tp_getattro */
2083 2083 0, /* tp_setattro */
2084 2084 0, /* tp_as_buffer */
2085 2085 Py_TPFLAGS_DEFAULT, /* tp_flags */
2086 2086 "revlog index", /* tp_doc */
2087 2087 0, /* tp_traverse */
2088 2088 0, /* tp_clear */
2089 2089 0, /* tp_richcompare */
2090 2090 0, /* tp_weaklistoffset */
2091 2091 0, /* tp_iter */
2092 2092 0, /* tp_iternext */
2093 2093 index_methods, /* tp_methods */
2094 2094 0, /* tp_members */
2095 2095 index_getset, /* tp_getset */
2096 2096 0, /* tp_base */
2097 2097 0, /* tp_dict */
2098 2098 0, /* tp_descr_get */
2099 2099 0, /* tp_descr_set */
2100 2100 0, /* tp_dictoffset */
2101 2101 (initproc)index_init, /* tp_init */
2102 2102 0, /* tp_alloc */
2103 2103 };
2104 2104
2105 2105 /*
2106 2106 * returns a tuple of the form (index, index, cache) with elements as
2107 2107 * follows:
2108 2108 *
2109 2109 * index: an index object that lazily parses RevlogNG records
2110 2110 * cache: if data is inlined, a tuple (index_file_content, 0), else None
2111 2111 *
2112 2112 * added complications are for backwards compatibility
2113 2113 */
2114 2114 static PyObject *parse_index2(PyObject *self, PyObject *args)
2115 2115 {
2116 2116 PyObject *tuple = NULL, *cache = NULL;
2117 2117 indexObject *idx;
2118 2118 int ret;
2119 2119
2120 2120 idx = PyObject_New(indexObject, &indexType);
2121 2121 if (idx == NULL)
2122 2122 goto bail;
2123 2123
2124 2124 ret = index_init(idx, args);
2125 2125 if (ret == -1)
2126 2126 goto bail;
2127 2127
2128 2128 if (idx->inlined) {
2129 2129 cache = Py_BuildValue("iO", 0, idx->data);
2130 2130 if (cache == NULL)
2131 2131 goto bail;
2132 2132 } else {
2133 2133 cache = Py_None;
2134 2134 Py_INCREF(cache);
2135 2135 }
2136 2136
2137 2137 tuple = Py_BuildValue("NN", idx, cache);
2138 2138 if (!tuple)
2139 2139 goto bail;
2140 2140 return tuple;
2141 2141
2142 2142 bail:
2143 2143 Py_XDECREF(idx);
2144 2144 Py_XDECREF(cache);
2145 2145 Py_XDECREF(tuple);
2146 2146 return NULL;
2147 2147 }
2148 2148
2149 2149 #define BUMPED_FIX 1
2150 2150 #define USING_SHA_256 2
2151 2151
2152 2152 static PyObject *readshas(
2153 2153 const char *source, unsigned char num, Py_ssize_t hashwidth)
2154 2154 {
2155 2155 int i;
2156 2156 PyObject *list = PyTuple_New(num);
2157 2157 if (list == NULL) {
2158 2158 return NULL;
2159 2159 }
2160 2160 for (i = 0; i < num; i++) {
2161 2161 PyObject *hash = PyString_FromStringAndSize(source, hashwidth);
2162 2162 if (hash == NULL) {
2163 2163 Py_DECREF(list);
2164 2164 return NULL;
2165 2165 }
2166 2166 PyTuple_SetItem(list, i, hash);
2167 2167 source += hashwidth;
2168 2168 }
2169 2169 return list;
2170 2170 }
2171 2171
2172 2172 static PyObject *fm1readmarker(const char *data, uint32_t *msize)
2173 2173 {
2174 2174 const char *meta;
2175 2175
2176 2176 double mtime;
2177 2177 int16_t tz;
2178 2178 uint16_t flags;
2179 2179 unsigned char nsuccs, nparents, nmetadata;
2180 2180 Py_ssize_t hashwidth = 20;
2181 2181
2182 2182 PyObject *prec = NULL, *parents = NULL, *succs = NULL;
2183 2183 PyObject *metadata = NULL, *ret = NULL;
2184 2184 int i;
2185 2185
2186 2186 *msize = getbe32(data);
2187 2187 data += 4;
2188 2188 mtime = getbefloat64(data);
2189 2189 data += 8;
2190 2190 tz = getbeint16(data);
2191 2191 data += 2;
2192 2192 flags = getbeuint16(data);
2193 2193 data += 2;
2194 2194
2195 2195 if (flags & USING_SHA_256) {
2196 2196 hashwidth = 32;
2197 2197 }
2198 2198
2199 2199 nsuccs = (unsigned char)(*data++);
2200 2200 nparents = (unsigned char)(*data++);
2201 2201 nmetadata = (unsigned char)(*data++);
2202 2202
2203 2203 prec = PyString_FromStringAndSize(data, hashwidth);
2204 2204 data += hashwidth;
2205 2205 if (prec == NULL) {
2206 2206 goto bail;
2207 2207 }
2208 2208
2209 2209 succs = readshas(data, nsuccs, hashwidth);
2210 2210 if (succs == NULL) {
2211 2211 goto bail;
2212 2212 }
2213 2213 data += nsuccs * hashwidth;
2214 2214
2215 2215 if (nparents == 1 || nparents == 2) {
2216 2216 parents = readshas(data, nparents, hashwidth);
2217 2217 if (parents == NULL) {
2218 2218 goto bail;
2219 2219 }
2220 2220 data += nparents * hashwidth;
2221 2221 } else {
2222 2222 parents = Py_None;
2223 2223 }
2224 2224
2225 2225 meta = data + (2 * nmetadata);
2226 2226 metadata = PyTuple_New(nmetadata);
2227 2227 if (metadata == NULL) {
2228 2228 goto bail;
2229 2229 }
2230 2230 for (i = 0; i < nmetadata; i++) {
2231 2231 PyObject *tmp, *left = NULL, *right = NULL;
2232 2232 Py_ssize_t metasize = (unsigned char)(*data++);
2233 2233 left = PyString_FromStringAndSize(meta, metasize);
2234 2234 meta += metasize;
2235 2235 metasize = (unsigned char)(*data++);
2236 2236 right = PyString_FromStringAndSize(meta, metasize);
2237 2237 meta += metasize;
2238 2238 if (!left || !right) {
2239 2239 Py_XDECREF(left);
2240 2240 Py_XDECREF(right);
2241 2241 goto bail;
2242 2242 }
2243 2243 tmp = PyTuple_Pack(2, left, right);
2244 2244 Py_DECREF(left);
2245 2245 Py_DECREF(right);
2246 2246 if (!tmp) {
2247 2247 goto bail;
2248 2248 }
2249 2249 PyTuple_SetItem(metadata, i, tmp);
2250 2250 }
2251 2251 ret = Py_BuildValue("(OOHO(di)O)", prec, succs, flags,
2252 2252 metadata, mtime, (int)tz * 60, parents);
2253 2253 bail:
2254 2254 Py_XDECREF(prec);
2255 2255 Py_XDECREF(succs);
2256 2256 Py_XDECREF(metadata);
2257 2257 if (parents != Py_None)
2258 2258 Py_XDECREF(parents);
2259 2259 return ret;
2260 2260 }
2261 2261
2262 2262
2263 2263 static PyObject *fm1readmarkers(PyObject *self, PyObject *args) {
2264 2264 const char *data;
2265 2265 Py_ssize_t datalen;
2266 2266 /* only unsigned long because python 2.4, should be Py_ssize_t */
2267 2267 unsigned long offset, stop;
2268 2268 PyObject *markers = NULL;
2269 2269
2270 2270 /* replace kk with nn when we drop Python 2.4 */
2271 2271 if (!PyArg_ParseTuple(args, "s#kk", &data, &datalen, &offset, &stop)) {
2272 2272 return NULL;
2273 2273 }
2274 2274 data += offset;
2275 2275 markers = PyList_New(0);
2276 2276 if (!markers) {
2277 2277 return NULL;
2278 2278 }
2279 2279 while (offset < stop) {
2280 2280 uint32_t msize;
2281 2281 int error;
2282 2282 PyObject *record = fm1readmarker(data, &msize);
2283 2283 if (!record) {
2284 2284 goto bail;
2285 2285 }
2286 2286 error = PyList_Append(markers, record);
2287 2287 Py_DECREF(record);
2288 2288 if (error) {
2289 2289 goto bail;
2290 2290 }
2291 2291 data += msize;
2292 2292 offset += msize;
2293 2293 }
2294 2294 return markers;
2295 2295 bail:
2296 2296 Py_DECREF(markers);
2297 2297 return NULL;
2298 2298 }
2299 2299
2300 2300 static char parsers_doc[] = "Efficient content parsing.";
2301 2301
2302 2302 PyObject *encodedir(PyObject *self, PyObject *args);
2303 2303 PyObject *pathencode(PyObject *self, PyObject *args);
2304 2304 PyObject *lowerencode(PyObject *self, PyObject *args);
2305 2305
2306 2306 static PyMethodDef methods[] = {
2307 2307 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
2308 2308 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
2309 2309 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
2310 2310 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
2311 2311 {"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"},
2312 2312 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
2313 2313 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
2314 2314 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
2315 2315 {"fm1readmarkers", fm1readmarkers, METH_VARARGS,
2316 2316 "parse v1 obsolete markers\n"},
2317 2317 {NULL, NULL}
2318 2318 };
2319 2319
2320 2320 void dirs_module_init(PyObject *mod);
2321 void manifest_module_init(PyObject *mod);
2321 2322
2322 2323 static void module_init(PyObject *mod)
2323 2324 {
2324 2325 /* This module constant has two purposes. First, it lets us unit test
2325 2326 * the ImportError raised without hard-coding any error text. This
2326 2327 * means we can change the text in the future without breaking tests,
2327 2328 * even across changesets without a recompile. Second, its presence
2328 2329 * can be used to determine whether the version-checking logic is
2329 2330 * present, which also helps in testing across changesets without a
2330 2331 * recompile. Note that this means the pure-Python version of parsers
2331 2332 * should not have this module constant. */
2332 2333 PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
2333 2334
2334 2335 dirs_module_init(mod);
2336 manifest_module_init(mod);
2335 2337
2336 2338 indexType.tp_new = PyType_GenericNew;
2337 2339 if (PyType_Ready(&indexType) < 0 ||
2338 2340 PyType_Ready(&dirstateTupleType) < 0)
2339 2341 return;
2340 2342 Py_INCREF(&indexType);
2341 2343 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
2342 2344 Py_INCREF(&dirstateTupleType);
2343 2345 PyModule_AddObject(mod, "dirstatetuple",
2344 2346 (PyObject *)&dirstateTupleType);
2345 2347
2346 2348 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
2347 2349 -1, -1, -1, -1, nullid, 20);
2348 2350 if (nullentry)
2349 2351 PyObject_GC_UnTrack(nullentry);
2350 2352 }
2351 2353
2352 2354 static int check_python_version(void)
2353 2355 {
2354 2356 PyObject *sys = PyImport_ImportModule("sys"), *ver;
2355 2357 long hexversion;
2356 2358 if (!sys)
2357 2359 return -1;
2358 2360 ver = PyObject_GetAttrString(sys, "hexversion");
2359 2361 Py_DECREF(sys);
2360 2362 if (!ver)
2361 2363 return -1;
2362 2364 hexversion = PyInt_AsLong(ver);
2363 2365 Py_DECREF(ver);
2364 2366 /* sys.hexversion is a 32-bit number by default, so the -1 case
2365 2367 * should only occur in unusual circumstances (e.g. if sys.hexversion
2366 2368 * is manually set to an invalid value). */
2367 2369 if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
2368 2370 PyErr_Format(PyExc_ImportError, "%s: The Mercurial extension "
2369 2371 "modules were compiled with Python " PY_VERSION ", but "
2370 2372 "Mercurial is currently using Python with sys.hexversion=%ld: "
2371 2373 "Python %s\n at: %s", versionerrortext, hexversion,
2372 2374 Py_GetVersion(), Py_GetProgramFullPath());
2373 2375 return -1;
2374 2376 }
2375 2377 return 0;
2376 2378 }
2377 2379
2378 2380 #ifdef IS_PY3K
2379 2381 static struct PyModuleDef parsers_module = {
2380 2382 PyModuleDef_HEAD_INIT,
2381 2383 "parsers",
2382 2384 parsers_doc,
2383 2385 -1,
2384 2386 methods
2385 2387 };
2386 2388
2387 2389 PyMODINIT_FUNC PyInit_parsers(void)
2388 2390 {
2389 2391 PyObject *mod;
2390 2392
2391 2393 if (check_python_version() == -1)
2392 2394 return;
2393 2395 mod = PyModule_Create(&parsers_module);
2394 2396 module_init(mod);
2395 2397 return mod;
2396 2398 }
2397 2399 #else
2398 2400 PyMODINIT_FUNC initparsers(void)
2399 2401 {
2400 2402 PyObject *mod;
2401 2403
2402 2404 if (check_python_version() == -1)
2403 2405 return;
2404 2406 mod = Py_InitModule3("parsers", methods, parsers_doc);
2405 2407 module_init(mod);
2406 2408 }
2407 2409 #endif
@@ -1,648 +1,649 b''
1 1 #
2 2 # This is the mercurial setup script.
3 3 #
4 4 # 'python setup.py install', or
5 5 # 'python setup.py --help' for more options
6 6
7 7 import sys, platform
8 8 if getattr(sys, 'version_info', (0, 0, 0)) < (2, 4, 0, 'final'):
9 9 raise SystemExit("Mercurial requires Python 2.4 or later.")
10 10
11 11 if sys.version_info[0] >= 3:
12 12 def b(s):
13 13 '''A helper function to emulate 2.6+ bytes literals using string
14 14 literals.'''
15 15 return s.encode('latin1')
16 16 printf = eval('print')
17 17 libdir_escape = 'unicode_escape'
18 18 else:
19 19 libdir_escape = 'string_escape'
20 20 def b(s):
21 21 '''A helper function to emulate 2.6+ bytes literals using string
22 22 literals.'''
23 23 return s
24 24 def printf(*args, **kwargs):
25 25 f = kwargs.get('file', sys.stdout)
26 26 end = kwargs.get('end', '\n')
27 27 f.write(b(' ').join(args) + end)
28 28
29 29 # Solaris Python packaging brain damage
30 30 try:
31 31 import hashlib
32 32 sha = hashlib.sha1()
33 33 except ImportError:
34 34 try:
35 35 import sha
36 36 sha.sha # silence unused import warning
37 37 except ImportError:
38 38 raise SystemExit(
39 39 "Couldn't import standard hashlib (incomplete Python install).")
40 40
41 41 try:
42 42 import zlib
43 43 zlib.compressobj # silence unused import warning
44 44 except ImportError:
45 45 raise SystemExit(
46 46 "Couldn't import standard zlib (incomplete Python install).")
47 47
48 48 # The base IronPython distribution (as of 2.7.1) doesn't support bz2
49 49 isironpython = False
50 50 try:
51 51 isironpython = (platform.python_implementation()
52 52 .lower().find("ironpython") != -1)
53 53 except AttributeError:
54 54 pass
55 55
56 56 if isironpython:
57 57 sys.stderr.write("warning: IronPython detected (no bz2 support)\n")
58 58 else:
59 59 try:
60 60 import bz2
61 61 bz2.BZ2Compressor # silence unused import warning
62 62 except ImportError:
63 63 raise SystemExit(
64 64 "Couldn't import standard bz2 (incomplete Python install).")
65 65
66 66 ispypy = "PyPy" in sys.version
67 67
68 68 import os, stat, subprocess, time
69 69 import re
70 70 import shutil
71 71 import tempfile
72 72 from distutils import log
73 73 from distutils.core import setup, Command, Extension
74 74 from distutils.dist import Distribution
75 75 from distutils.command.build import build
76 76 from distutils.command.build_ext import build_ext
77 77 from distutils.command.build_py import build_py
78 78 from distutils.command.install_lib import install_lib
79 79 from distutils.command.install_scripts import install_scripts
80 80 from distutils.spawn import spawn, find_executable
81 81 from distutils import file_util
82 82 from distutils.errors import CCompilerError, DistutilsExecError
83 83 from distutils.sysconfig import get_python_inc, get_config_var
84 84 from distutils.version import StrictVersion
85 85
86 86 convert2to3 = '--c2to3' in sys.argv
87 87 if convert2to3:
88 88 try:
89 89 from distutils.command.build_py import build_py_2to3 as build_py
90 90 from lib2to3.refactor import get_fixers_from_package as getfixers
91 91 except ImportError:
92 92 if sys.version_info[0] < 3:
93 93 raise SystemExit("--c2to3 is only compatible with python3.")
94 94 raise
95 95 sys.path.append('contrib')
96 96 elif sys.version_info[0] >= 3:
97 97 raise SystemExit("setup.py with python3 needs --c2to3 (experimental)")
98 98
99 99 scripts = ['hg']
100 100 if os.name == 'nt':
101 101 scripts.append('contrib/win32/hg.bat')
102 102
103 103 # simplified version of distutils.ccompiler.CCompiler.has_function
104 104 # that actually removes its temporary files.
105 105 def hasfunction(cc, funcname):
106 106 tmpdir = tempfile.mkdtemp(prefix='hg-install-')
107 107 devnull = oldstderr = None
108 108 try:
109 109 try:
110 110 fname = os.path.join(tmpdir, 'funcname.c')
111 111 f = open(fname, 'w')
112 112 f.write('int main(void) {\n')
113 113 f.write(' %s();\n' % funcname)
114 114 f.write('}\n')
115 115 f.close()
116 116 # Redirect stderr to /dev/null to hide any error messages
117 117 # from the compiler.
118 118 # This will have to be changed if we ever have to check
119 119 # for a function on Windows.
120 120 devnull = open('/dev/null', 'w')
121 121 oldstderr = os.dup(sys.stderr.fileno())
122 122 os.dup2(devnull.fileno(), sys.stderr.fileno())
123 123 objects = cc.compile([fname], output_dir=tmpdir)
124 124 cc.link_executable(objects, os.path.join(tmpdir, "a.out"))
125 125 except Exception:
126 126 return False
127 127 return True
128 128 finally:
129 129 if oldstderr is not None:
130 130 os.dup2(oldstderr, sys.stderr.fileno())
131 131 if devnull is not None:
132 132 devnull.close()
133 133 shutil.rmtree(tmpdir)
134 134
135 135 # py2exe needs to be installed to work
136 136 try:
137 137 import py2exe
138 138 py2exe.Distribution # silence unused import warning
139 139 py2exeloaded = True
140 140 # import py2exe's patched Distribution class
141 141 from distutils.core import Distribution
142 142 except ImportError:
143 143 py2exeloaded = False
144 144
145 145 def runcmd(cmd, env):
146 146 if (sys.platform == 'plan9'
147 147 and (sys.version_info[0] == 2 and sys.version_info[1] < 7)):
148 148 # subprocess kludge to work around issues in half-baked Python
149 149 # ports, notably bichued/python:
150 150 _, out, err = os.popen3(cmd)
151 151 return str(out), str(err)
152 152 else:
153 153 p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
154 154 stderr=subprocess.PIPE, env=env)
155 155 out, err = p.communicate()
156 156 return out, err
157 157
158 158 def runhg(cmd, env):
159 159 out, err = runcmd(cmd, env)
160 160 # If root is executing setup.py, but the repository is owned by
161 161 # another user (as in "sudo python setup.py install") we will get
162 162 # trust warnings since the .hg/hgrc file is untrusted. That is
163 163 # fine, we don't want to load it anyway. Python may warn about
164 164 # a missing __init__.py in mercurial/locale, we also ignore that.
165 165 err = [e for e in err.splitlines()
166 166 if not e.startswith(b('not trusting file')) \
167 167 and not e.startswith(b('warning: Not importing')) \
168 168 and not e.startswith(b('obsolete feature not enabled'))]
169 169 if err:
170 170 printf("stderr from '%s':" % (' '.join(cmd)), file=sys.stderr)
171 171 printf(b('\n').join([b(' ') + e for e in err]), file=sys.stderr)
172 172 return ''
173 173 return out
174 174
175 175 version = ''
176 176
177 177 # Execute hg out of this directory with a custom environment which
178 178 # includes the pure Python modules in mercurial/pure. We also take
179 179 # care to not use any hgrc files and do no localization.
180 180 pypath = ['mercurial', os.path.join('mercurial', 'pure')]
181 181 env = {'PYTHONPATH': os.pathsep.join(pypath),
182 182 'HGRCPATH': '',
183 183 'LANGUAGE': 'C'}
184 184 if 'LD_LIBRARY_PATH' in os.environ:
185 185 env['LD_LIBRARY_PATH'] = os.environ['LD_LIBRARY_PATH']
186 186 if 'SystemRoot' in os.environ:
187 187 # Copy SystemRoot into the custom environment for Python 2.6
188 188 # under Windows. Otherwise, the subprocess will fail with
189 189 # error 0xc0150004. See: http://bugs.python.org/issue3440
190 190 env['SystemRoot'] = os.environ['SystemRoot']
191 191
192 192 if os.path.isdir('.hg'):
193 193 cmd = [sys.executable, 'hg', 'log', '-r', '.', '--template', '{tags}\n']
194 194 numerictags = [t for t in runhg(cmd, env).split() if t[0].isdigit()]
195 195 hgid = runhg([sys.executable, 'hg', 'id', '-i'], env).strip()
196 196 if numerictags: # tag(s) found
197 197 version = numerictags[-1]
198 198 if hgid.endswith('+'): # propagate the dirty status to the tag
199 199 version += '+'
200 200 else: # no tag found
201 201 ltagcmd = [sys.executable, 'hg', 'parents', '--template',
202 202 '{latesttag}']
203 203 ltag = runhg(ltagcmd, env)
204 204 changessincecmd = [sys.executable, 'hg', 'log', '-T', 'x\n', '-r',
205 205 "only(.,'%s')" % ltag]
206 206 changessince = len(runhg(changessincecmd, env).splitlines())
207 207 version = '%s+%s-%s' % (ltag, changessince, hgid)
208 208 if version.endswith('+'):
209 209 version += time.strftime('%Y%m%d')
210 210 elif os.path.exists('.hg_archival.txt'):
211 211 kw = dict([[t.strip() for t in l.split(':', 1)]
212 212 for l in open('.hg_archival.txt')])
213 213 if 'tag' in kw:
214 214 version = kw['tag']
215 215 elif 'latesttag' in kw:
216 216 if 'changessincelatesttag' in kw:
217 217 version = '%(latesttag)s+%(changessincelatesttag)s-%(node).12s' % kw
218 218 else:
219 219 version = '%(latesttag)s+%(latesttagdistance)s-%(node).12s' % kw
220 220 else:
221 221 version = kw.get('node', '')[:12]
222 222
223 223 if version:
224 224 f = open("mercurial/__version__.py", "w")
225 225 f.write('# this file is autogenerated by setup.py\n')
226 226 f.write('version = "%s"\n' % version)
227 227 f.close()
228 228
229 229
230 230 try:
231 231 from mercurial import __version__
232 232 version = __version__.version
233 233 except ImportError:
234 234 version = 'unknown'
235 235
236 236 class hgbuild(build):
237 237 # Insert hgbuildmo first so that files in mercurial/locale/ are found
238 238 # when build_py is run next.
239 239 sub_commands = [('build_mo', None),
240 240
241 241 # We also need build_ext before build_py. Otherwise, when 2to3 is
242 242 # called (in build_py), it will not find osutil & friends,
243 243 # thinking that those modules are global and, consequently, making
244 244 # a mess, now that all module imports are global.
245 245
246 246 ('build_ext', build.has_ext_modules),
247 247 ] + build.sub_commands
248 248
249 249 class hgbuildmo(build):
250 250
251 251 description = "build translations (.mo files)"
252 252
253 253 def run(self):
254 254 if not find_executable('msgfmt'):
255 255 self.warn("could not find msgfmt executable, no translations "
256 256 "will be built")
257 257 return
258 258
259 259 podir = 'i18n'
260 260 if not os.path.isdir(podir):
261 261 self.warn("could not find %s/ directory" % podir)
262 262 return
263 263
264 264 join = os.path.join
265 265 for po in os.listdir(podir):
266 266 if not po.endswith('.po'):
267 267 continue
268 268 pofile = join(podir, po)
269 269 modir = join('locale', po[:-3], 'LC_MESSAGES')
270 270 mofile = join(modir, 'hg.mo')
271 271 mobuildfile = join('mercurial', mofile)
272 272 cmd = ['msgfmt', '-v', '-o', mobuildfile, pofile]
273 273 if sys.platform != 'sunos5':
274 274 # msgfmt on Solaris does not know about -c
275 275 cmd.append('-c')
276 276 self.mkpath(join('mercurial', modir))
277 277 self.make_file([pofile], mobuildfile, spawn, (cmd,))
278 278
279 279
280 280 class hgdist(Distribution):
281 281 pure = ispypy
282 282
283 283 global_options = Distribution.global_options + \
284 284 [('pure', None, "use pure (slow) Python "
285 285 "code instead of C extensions"),
286 286 ('c2to3', None, "(experimental!) convert "
287 287 "code with 2to3"),
288 288 ]
289 289
290 290 def has_ext_modules(self):
291 291 # self.ext_modules is emptied in hgbuildpy.finalize_options which is
292 292 # too late for some cases
293 293 return not self.pure and Distribution.has_ext_modules(self)
294 294
295 295 class hgbuildext(build_ext):
296 296
297 297 def build_extension(self, ext):
298 298 try:
299 299 build_ext.build_extension(self, ext)
300 300 except CCompilerError:
301 301 if not getattr(ext, 'optional', False):
302 302 raise
303 303 log.warn("Failed to build optional extension '%s' (skipping)",
304 304 ext.name)
305 305
306 306 class hgbuildpy(build_py):
307 307 if convert2to3:
308 308 fixer_names = sorted(set(getfixers("lib2to3.fixes") +
309 309 getfixers("hgfixes")))
310 310
311 311 def finalize_options(self):
312 312 build_py.finalize_options(self)
313 313
314 314 if self.distribution.pure:
315 315 if self.py_modules is None:
316 316 self.py_modules = []
317 317 for ext in self.distribution.ext_modules:
318 318 if ext.name.startswith("mercurial."):
319 319 self.py_modules.append("mercurial.pure.%s" % ext.name[10:])
320 320 self.distribution.ext_modules = []
321 321 else:
322 322 h = os.path.join(get_python_inc(), 'Python.h')
323 323 if not os.path.exists(h):
324 324 raise SystemExit('Python headers are required to build '
325 325 'Mercurial but weren\'t found in %s' % h)
326 326
327 327 def find_modules(self):
328 328 modules = build_py.find_modules(self)
329 329 for module in modules:
330 330 if module[0] == "mercurial.pure":
331 331 if module[1] != "__init__":
332 332 yield ("mercurial", module[1], module[2])
333 333 else:
334 334 yield module
335 335
336 336 class buildhgextindex(Command):
337 337 description = 'generate prebuilt index of hgext (for frozen package)'
338 338 user_options = []
339 339 _indexfilename = 'hgext/__index__.py'
340 340
341 341 def initialize_options(self):
342 342 pass
343 343
344 344 def finalize_options(self):
345 345 pass
346 346
347 347 def run(self):
348 348 if os.path.exists(self._indexfilename):
349 349 f = open(self._indexfilename, 'w')
350 350 f.write('# empty\n')
351 351 f.close()
352 352
353 353 # here no extension enabled, disabled() lists up everything
354 354 code = ('import pprint; from mercurial import extensions; '
355 355 'pprint.pprint(extensions.disabled())')
356 356 out, err = runcmd([sys.executable, '-c', code], env)
357 357 if err:
358 358 raise DistutilsExecError(err)
359 359
360 360 f = open(self._indexfilename, 'w')
361 361 f.write('# this file is autogenerated by setup.py\n')
362 362 f.write('docs = ')
363 363 f.write(out)
364 364 f.close()
365 365
366 366 class buildhgexe(build_ext):
367 367 description = 'compile hg.exe from mercurial/exewrapper.c'
368 368
369 369 def build_extensions(self):
370 370 if os.name != 'nt':
371 371 return
372 372 if isinstance(self.compiler, HackedMingw32CCompiler):
373 373 self.compiler.compiler_so = self.compiler.compiler # no -mdll
374 374 self.compiler.dll_libraries = [] # no -lmsrvc90
375 375 hv = sys.hexversion
376 376 pythonlib = 'python%d%d' % (hv >> 24, (hv >> 16) & 0xff)
377 377 f = open('mercurial/hgpythonlib.h', 'wb')
378 378 f.write('/* this file is autogenerated by setup.py */\n')
379 379 f.write('#define HGPYTHONLIB "%s"\n' % pythonlib)
380 380 f.close()
381 381 objects = self.compiler.compile(['mercurial/exewrapper.c'],
382 382 output_dir=self.build_temp)
383 383 dir = os.path.dirname(self.get_ext_fullpath('dummy'))
384 384 target = os.path.join(dir, 'hg')
385 385 self.compiler.link_executable(objects, target,
386 386 libraries=[],
387 387 output_dir=self.build_temp)
388 388
389 389 class hginstalllib(install_lib):
390 390 '''
391 391 This is a specialization of install_lib that replaces the copy_file used
392 392 there so that it supports setting the mode of files after copying them,
393 393 instead of just preserving the mode that the files originally had. If your
394 394 system has a umask of something like 027, preserving the permissions when
395 395 copying will lead to a broken install.
396 396
397 397 Note that just passing keep_permissions=False to copy_file would be
398 398 insufficient, as it might still be applying a umask.
399 399 '''
400 400
401 401 def run(self):
402 402 realcopyfile = file_util.copy_file
403 403 def copyfileandsetmode(*args, **kwargs):
404 404 src, dst = args[0], args[1]
405 405 dst, copied = realcopyfile(*args, **kwargs)
406 406 if copied:
407 407 st = os.stat(src)
408 408 # Persist executable bit (apply it to group and other if user
409 409 # has it)
410 410 if st[stat.ST_MODE] & stat.S_IXUSR:
411 411 setmode = 0755
412 412 else:
413 413 setmode = 0644
414 414 os.chmod(dst, (stat.S_IMODE(st[stat.ST_MODE]) & ~0777) |
415 415 setmode)
416 416 file_util.copy_file = copyfileandsetmode
417 417 try:
418 418 install_lib.run(self)
419 419 finally:
420 420 file_util.copy_file = realcopyfile
421 421
422 422 class hginstallscripts(install_scripts):
423 423 '''
424 424 This is a specialization of install_scripts that replaces the @LIBDIR@ with
425 425 the configured directory for modules. If possible, the path is made relative
426 426 to the directory for scripts.
427 427 '''
428 428
429 429 def initialize_options(self):
430 430 install_scripts.initialize_options(self)
431 431
432 432 self.install_lib = None
433 433
434 434 def finalize_options(self):
435 435 install_scripts.finalize_options(self)
436 436 self.set_undefined_options('install',
437 437 ('install_lib', 'install_lib'))
438 438
439 439 def run(self):
440 440 install_scripts.run(self)
441 441
442 442 if (os.path.splitdrive(self.install_dir)[0] !=
443 443 os.path.splitdrive(self.install_lib)[0]):
444 444 # can't make relative paths from one drive to another, so use an
445 445 # absolute path instead
446 446 libdir = self.install_lib
447 447 else:
448 448 common = os.path.commonprefix((self.install_dir, self.install_lib))
449 449 rest = self.install_dir[len(common):]
450 450 uplevel = len([n for n in os.path.split(rest) if n])
451 451
452 452 libdir = uplevel * ('..' + os.sep) + self.install_lib[len(common):]
453 453
454 454 for outfile in self.outfiles:
455 455 fp = open(outfile, 'rb')
456 456 data = fp.read()
457 457 fp.close()
458 458
459 459 # skip binary files
460 460 if b('\0') in data:
461 461 continue
462 462
463 463 data = data.replace(b('@LIBDIR@'), libdir.encode(libdir_escape))
464 464 fp = open(outfile, 'wb')
465 465 fp.write(data)
466 466 fp.close()
467 467
468 468 cmdclass = {'build': hgbuild,
469 469 'build_mo': hgbuildmo,
470 470 'build_ext': hgbuildext,
471 471 'build_py': hgbuildpy,
472 472 'build_hgextindex': buildhgextindex,
473 473 'install_lib': hginstalllib,
474 474 'install_scripts': hginstallscripts,
475 475 'build_hgexe': buildhgexe,
476 476 }
477 477
478 478 packages = ['mercurial', 'mercurial.hgweb', 'mercurial.httpclient',
479 479 'hgext', 'hgext.convert', 'hgext.highlight', 'hgext.zeroconf',
480 480 'hgext.largefiles']
481 481
482 482 pymodules = []
483 483
484 484 common_depends = ['mercurial/util.h']
485 485
486 486 extmodules = [
487 487 Extension('mercurial.base85', ['mercurial/base85.c'],
488 488 depends=common_depends),
489 489 Extension('mercurial.bdiff', ['mercurial/bdiff.c'],
490 490 depends=common_depends),
491 491 Extension('mercurial.diffhelpers', ['mercurial/diffhelpers.c'],
492 492 depends=common_depends),
493 493 Extension('mercurial.mpatch', ['mercurial/mpatch.c'],
494 494 depends=common_depends),
495 495 Extension('mercurial.parsers', ['mercurial/dirs.c',
496 'mercurial/manifest.c',
496 497 'mercurial/parsers.c',
497 498 'mercurial/pathencode.c'],
498 499 depends=common_depends),
499 500 ]
500 501
501 502 osutil_ldflags = []
502 503
503 504 if sys.platform == 'darwin':
504 505 osutil_ldflags += ['-framework', 'ApplicationServices']
505 506
506 507 # disable osutil.c under windows + python 2.4 (issue1364)
507 508 if sys.platform == 'win32' and sys.version_info < (2, 5, 0, 'final'):
508 509 pymodules.append('mercurial.pure.osutil')
509 510 else:
510 511 extmodules.append(Extension('mercurial.osutil', ['mercurial/osutil.c'],
511 512 extra_link_args=osutil_ldflags,
512 513 depends=common_depends))
513 514
514 515 try:
515 516 from distutils import cygwinccompiler
516 517
517 518 # the -mno-cygwin option has been deprecated for years
518 519 compiler = cygwinccompiler.Mingw32CCompiler
519 520
520 521 class HackedMingw32CCompiler(cygwinccompiler.Mingw32CCompiler):
521 522 def __init__(self, *args, **kwargs):
522 523 compiler.__init__(self, *args, **kwargs)
523 524 for i in 'compiler compiler_so linker_exe linker_so'.split():
524 525 try:
525 526 getattr(self, i).remove('-mno-cygwin')
526 527 except ValueError:
527 528 pass
528 529
529 530 cygwinccompiler.Mingw32CCompiler = HackedMingw32CCompiler
530 531 except ImportError:
531 532 # the cygwinccompiler package is not available on some Python
532 533 # distributions like the ones from the optware project for Synology
533 534 # DiskStation boxes
534 535 class HackedMingw32CCompiler(object):
535 536 pass
536 537
537 538 packagedata = {'mercurial': ['locale/*/LC_MESSAGES/hg.mo',
538 539 'help/*.txt',
539 540 'default.d/*.rc',
540 541 'dummycert.pem']}
541 542
542 543 def ordinarypath(p):
543 544 return p and p[0] != '.' and p[-1] != '~'
544 545
545 546 for root in ('templates',):
546 547 for curdir, dirs, files in os.walk(os.path.join('mercurial', root)):
547 548 curdir = curdir.split(os.sep, 1)[1]
548 549 dirs[:] = filter(ordinarypath, dirs)
549 550 for f in filter(ordinarypath, files):
550 551 f = os.path.join(curdir, f)
551 552 packagedata['mercurial'].append(f)
552 553
553 554 datafiles = []
554 555 setupversion = version
555 556 extra = {}
556 557
557 558 if py2exeloaded:
558 559 extra['console'] = [
559 560 {'script':'hg',
560 561 'copyright':'Copyright (C) 2005-2015 Matt Mackall and others',
561 562 'product_version':version}]
562 563 # sub command of 'build' because 'py2exe' does not handle sub_commands
563 564 build.sub_commands.insert(0, ('build_hgextindex', None))
564 565
565 566 if os.name == 'nt':
566 567 # Windows binary file versions for exe/dll files must have the
567 568 # form W.X.Y.Z, where W,X,Y,Z are numbers in the range 0..65535
568 569 setupversion = version.split('+', 1)[0]
569 570
570 571 if sys.platform == 'darwin' and os.path.exists('/usr/bin/xcodebuild'):
571 572 version = runcmd(['/usr/bin/xcodebuild', '-version'], {})[0].splitlines()
572 573 if version:
573 574 version = version[0]
574 575 xcode4 = (version.startswith('Xcode') and
575 576 StrictVersion(version.split()[1]) >= StrictVersion('4.0'))
576 577 xcode51 = re.match(r'^Xcode\s+5\.1', version) is not None
577 578 else:
578 579 # xcodebuild returns empty on OS X Lion with XCode 4.3 not
579 580 # installed, but instead with only command-line tools. Assume
580 581 # that only happens on >= Lion, thus no PPC support.
581 582 xcode4 = True
582 583 xcode51 = False
583 584
584 585 # XCode 4.0 dropped support for ppc architecture, which is hardcoded in
585 586 # distutils.sysconfig
586 587 if xcode4:
587 588 os.environ['ARCHFLAGS'] = ''
588 589
589 590 # XCode 5.1 changes clang such that it now fails to compile if the
590 591 # -mno-fused-madd flag is passed, but the version of Python shipped with
591 592 # OS X 10.9 Mavericks includes this flag. This causes problems in all
592 593 # C extension modules, and a bug has been filed upstream at
593 594 # http://bugs.python.org/issue21244. We also need to patch this here
594 595 # so Mercurial can continue to compile in the meantime.
595 596 if xcode51:
596 597 cflags = get_config_var('CFLAGS')
597 598 if cflags and re.search(r'-mno-fused-madd\b', cflags) is not None:
598 599 os.environ['CFLAGS'] = (
599 600 os.environ.get('CFLAGS', '') + ' -Qunused-arguments')
600 601
601 602 setup(name='mercurial',
602 603 version=setupversion,
603 604 author='Matt Mackall and many others',
604 605 author_email='mercurial@selenic.com',
605 606 url='http://mercurial.selenic.com/',
606 607 download_url='http://mercurial.selenic.com/release/',
607 608 description=('Fast scalable distributed SCM (revision control, version '
608 609 'control) system'),
609 610 long_description=('Mercurial is a distributed SCM tool written in Python.'
610 611 ' It is used by a number of large projects that require'
611 612 ' fast, reliable distributed revision control, such as '
612 613 'Mozilla.'),
613 614 license='GNU GPLv2 or any later version',
614 615 classifiers=[
615 616 'Development Status :: 6 - Mature',
616 617 'Environment :: Console',
617 618 'Intended Audience :: Developers',
618 619 'Intended Audience :: System Administrators',
619 620 'License :: OSI Approved :: GNU General Public License (GPL)',
620 621 'Natural Language :: Danish',
621 622 'Natural Language :: English',
622 623 'Natural Language :: German',
623 624 'Natural Language :: Italian',
624 625 'Natural Language :: Japanese',
625 626 'Natural Language :: Portuguese (Brazilian)',
626 627 'Operating System :: Microsoft :: Windows',
627 628 'Operating System :: OS Independent',
628 629 'Operating System :: POSIX',
629 630 'Programming Language :: C',
630 631 'Programming Language :: Python',
631 632 'Topic :: Software Development :: Version Control',
632 633 ],
633 634 scripts=scripts,
634 635 packages=packages,
635 636 py_modules=pymodules,
636 637 ext_modules=extmodules,
637 638 data_files=datafiles,
638 639 package_data=packagedata,
639 640 cmdclass=cmdclass,
640 641 distclass=hgdist,
641 642 options={'py2exe': {'packages': ['hgext', 'email']},
642 643 'bdist_mpkg': {'zipdist': False,
643 644 'license': 'COPYING',
644 645 'readme': 'contrib/macosx/Readme.html',
645 646 'welcome': 'contrib/macosx/Welcome.html',
646 647 },
647 648 },
648 649 **extra)
General Comments 0
You need to be logged in to leave comments. Login now