##// END OF EJS Templates
cext: wrap before brace for functions...
Gregory Szorc -
r34441:7ed0750c default
parent child Browse files
Show More
@@ -1,937 +1,941 b''
1 1 /*
2 2 * manifest.c - manifest type that does on-demand parsing.
3 3 *
4 4 * Copyright 2015, Google Inc.
5 5 *
6 6 * This software may be used and distributed according to the terms of
7 7 * the GNU General Public License, incorporated herein by reference.
8 8 */
9 9 #include <Python.h>
10 10
11 11 #include <assert.h>
12 12 #include <stdlib.h>
13 13 #include <string.h>
14 14
15 15 #include "charencode.h"
16 16 #include "util.h"
17 17
18 18 #define DEFAULT_LINES 100000
19 19
20 20 typedef struct {
21 21 char *start;
22 22 Py_ssize_t len; /* length of line including terminal newline */
23 23 char hash_suffix;
24 24 bool from_malloc;
25 25 bool deleted;
26 26 } line;
27 27
28 28 typedef struct {
29 29 PyObject_HEAD
30 30 PyObject *pydata;
31 31 line *lines;
32 32 int numlines; /* number of line entries */
33 33 int livelines; /* number of non-deleted lines */
34 34 int maxlines; /* allocated number of lines */
35 35 bool dirty;
36 36 } lazymanifest;
37 37
38 38 #define MANIFEST_OOM -1
39 39 #define MANIFEST_NOT_SORTED -2
40 40 #define MANIFEST_MALFORMED -3
41 41
42 42 /* get the length of the path for a line */
43 static size_t pathlen(line *l) {
43 static size_t pathlen(line *l)
44 {
44 45 return strlen(l->start);
45 46 }
46 47
47 48 /* get the node value of a single line */
48 static PyObject *nodeof(line *l) {
49 static PyObject *nodeof(line *l)
50 {
49 51 char *s = l->start;
50 52 ssize_t llen = pathlen(l);
51 53 PyObject *hash = unhexlify(s + llen + 1, 40);
52 54 if (!hash) {
53 55 return NULL;
54 56 }
55 57 if (l->hash_suffix != '\0') {
56 58 char newhash[21];
57 59 memcpy(newhash, PyBytes_AsString(hash), 20);
58 60 Py_DECREF(hash);
59 61 newhash[20] = l->hash_suffix;
60 62 hash = PyBytes_FromStringAndSize(newhash, 21);
61 63 }
62 64 return hash;
63 65 }
64 66
65 67 /* get the node hash and flags of a line as a tuple */
66 68 static PyObject *hashflags(line *l)
67 69 {
68 70 char *s = l->start;
69 71 size_t plen = pathlen(l);
70 72 PyObject *hash = nodeof(l);
71 73
72 74 /* 40 for hash, 1 for null byte, 1 for newline */
73 75 size_t hplen = plen + 42;
74 76 Py_ssize_t flen = l->len - hplen;
75 77 PyObject *flags;
76 78 PyObject *tup;
77 79
78 80 if (!hash)
79 81 return NULL;
80 82 flags = PyBytes_FromStringAndSize(s + hplen - 1, flen);
81 83 if (!flags) {
82 84 Py_DECREF(hash);
83 85 return NULL;
84 86 }
85 87 tup = PyTuple_Pack(2, hash, flags);
86 88 Py_DECREF(flags);
87 89 Py_DECREF(hash);
88 90 return tup;
89 91 }
90 92
91 93 /* if we're about to run out of space in the line index, add more */
92 94 static bool realloc_if_full(lazymanifest *self)
93 95 {
94 96 if (self->numlines == self->maxlines) {
95 97 self->maxlines *= 2;
96 98 self->lines = realloc(self->lines, self->maxlines * sizeof(line));
97 99 }
98 100 return !!self->lines;
99 101 }
100 102
101 103 /*
102 104 * Find the line boundaries in the manifest that 'data' points to and store
103 105 * information about each line in 'self'.
104 106 */
105 107 static int find_lines(lazymanifest *self, char *data, Py_ssize_t len)
106 108 {
107 109 char *prev = NULL;
108 110 while (len > 0) {
109 111 line *l;
110 112 char *next = memchr(data, '\n', len);
111 113 if (!next) {
112 114 return MANIFEST_MALFORMED;
113 115 }
114 116 next++; /* advance past newline */
115 117 if (!realloc_if_full(self)) {
116 118 return MANIFEST_OOM; /* no memory */
117 119 }
118 120 if (prev && strcmp(prev, data) > -1) {
119 121 /* This data isn't sorted, so we have to abort. */
120 122 return MANIFEST_NOT_SORTED;
121 123 }
122 124 l = self->lines + ((self->numlines)++);
123 125 l->start = data;
124 126 l->len = next - data;
125 127 l->hash_suffix = '\0';
126 128 l->from_malloc = false;
127 129 l->deleted = false;
128 130 len = len - l->len;
129 131 prev = data;
130 132 data = next;
131 133 }
132 134 self->livelines = self->numlines;
133 135 return 0;
134 136 }
135 137
136 138 static int lazymanifest_init(lazymanifest *self, PyObject *args)
137 139 {
138 140 char *data;
139 141 Py_ssize_t len;
140 142 int err, ret;
141 143 PyObject *pydata;
142 144 if (!PyArg_ParseTuple(args, "S", &pydata)) {
143 145 return -1;
144 146 }
145 147 err = PyBytes_AsStringAndSize(pydata, &data, &len);
146 148
147 149 self->dirty = false;
148 150 if (err == -1)
149 151 return -1;
150 152 self->pydata = pydata;
151 153 Py_INCREF(self->pydata);
152 154 Py_BEGIN_ALLOW_THREADS
153 155 self->lines = malloc(DEFAULT_LINES * sizeof(line));
154 156 self->maxlines = DEFAULT_LINES;
155 157 self->numlines = 0;
156 158 if (!self->lines)
157 159 ret = MANIFEST_OOM;
158 160 else
159 161 ret = find_lines(self, data, len);
160 162 Py_END_ALLOW_THREADS
161 163 switch (ret) {
162 164 case 0:
163 165 break;
164 166 case MANIFEST_OOM:
165 167 PyErr_NoMemory();
166 168 break;
167 169 case MANIFEST_NOT_SORTED:
168 170 PyErr_Format(PyExc_ValueError,
169 171 "Manifest lines not in sorted order.");
170 172 break;
171 173 case MANIFEST_MALFORMED:
172 174 PyErr_Format(PyExc_ValueError,
173 175 "Manifest did not end in a newline.");
174 176 break;
175 177 default:
176 178 PyErr_Format(PyExc_ValueError,
177 179 "Unknown problem parsing manifest.");
178 180 }
179 181 return ret == 0 ? 0 : -1;
180 182 }
181 183
182 184 static void lazymanifest_dealloc(lazymanifest *self)
183 185 {
184 186 /* free any extra lines we had to allocate */
185 187 int i;
186 188 for (i = 0; i < self->numlines; i++) {
187 189 if (self->lines[i].from_malloc) {
188 190 free(self->lines[i].start);
189 191 }
190 192 }
191 193 if (self->lines) {
192 194 free(self->lines);
193 195 self->lines = NULL;
194 196 }
195 197 if (self->pydata) {
196 198 Py_DECREF(self->pydata);
197 199 self->pydata = NULL;
198 200 }
199 201 PyObject_Del(self);
200 202 }
201 203
202 204 /* iteration support */
203 205
204 206 typedef struct {
205 207 PyObject_HEAD lazymanifest *m;
206 208 Py_ssize_t pos;
207 209 } lmIter;
208 210
209 211 static void lmiter_dealloc(PyObject *o)
210 212 {
211 213 lmIter *self = (lmIter *)o;
212 214 Py_DECREF(self->m);
213 215 PyObject_Del(self);
214 216 }
215 217
216 218 static line *lmiter_nextline(lmIter *self)
217 219 {
218 220 do {
219 221 self->pos++;
220 222 if (self->pos >= self->m->numlines) {
221 223 return NULL;
222 224 }
223 225 /* skip over deleted manifest entries */
224 226 } while (self->m->lines[self->pos].deleted);
225 227 return self->m->lines + self->pos;
226 228 }
227 229
228 230 static PyObject *lmiter_iterentriesnext(PyObject *o)
229 231 {
230 232 size_t pl;
231 233 line *l;
232 234 Py_ssize_t consumed;
233 235 PyObject *ret = NULL, *path = NULL, *hash = NULL, *flags = NULL;
234 236 l = lmiter_nextline((lmIter *)o);
235 237 if (!l) {
236 238 goto done;
237 239 }
238 240 pl = pathlen(l);
239 241 path = PyBytes_FromStringAndSize(l->start, pl);
240 242 hash = nodeof(l);
241 243 consumed = pl + 41;
242 244 flags = PyBytes_FromStringAndSize(l->start + consumed,
243 245 l->len - consumed - 1);
244 246 if (!path || !hash || !flags) {
245 247 goto done;
246 248 }
247 249 ret = PyTuple_Pack(3, path, hash, flags);
248 250 done:
249 251 Py_XDECREF(path);
250 252 Py_XDECREF(hash);
251 253 Py_XDECREF(flags);
252 254 return ret;
253 255 }
254 256
255 257 #ifdef IS_PY3K
256 258 #define LAZYMANIFESTENTRIESITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT
257 259 #else
258 260 #define LAZYMANIFESTENTRIESITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT \
259 261 | Py_TPFLAGS_HAVE_ITER
260 262 #endif
261 263
262 264 static PyTypeObject lazymanifestEntriesIterator = {
263 265 PyVarObject_HEAD_INIT(NULL, 0)
264 266 "parsers.lazymanifest.entriesiterator", /*tp_name */
265 267 sizeof(lmIter), /*tp_basicsize */
266 268 0, /*tp_itemsize */
267 269 lmiter_dealloc, /*tp_dealloc */
268 270 0, /*tp_print */
269 271 0, /*tp_getattr */
270 272 0, /*tp_setattr */
271 273 0, /*tp_compare */
272 274 0, /*tp_repr */
273 275 0, /*tp_as_number */
274 276 0, /*tp_as_sequence */
275 277 0, /*tp_as_mapping */
276 278 0, /*tp_hash */
277 279 0, /*tp_call */
278 280 0, /*tp_str */
279 281 0, /*tp_getattro */
280 282 0, /*tp_setattro */
281 283 0, /*tp_as_buffer */
282 284 LAZYMANIFESTENTRIESITERATOR_TPFLAGS, /* tp_flags */
283 285 "Iterator for 3-tuples in a lazymanifest.", /* tp_doc */
284 286 0, /* tp_traverse */
285 287 0, /* tp_clear */
286 288 0, /* tp_richcompare */
287 289 0, /* tp_weaklistoffset */
288 290 PyObject_SelfIter, /* tp_iter: __iter__() method */
289 291 lmiter_iterentriesnext, /* tp_iternext: next() method */
290 292 };
291 293
292 294 static PyObject *lmiter_iterkeysnext(PyObject *o)
293 295 {
294 296 size_t pl;
295 297 line *l = lmiter_nextline((lmIter *)o);
296 298 if (!l) {
297 299 return NULL;
298 300 }
299 301 pl = pathlen(l);
300 302 return PyBytes_FromStringAndSize(l->start, pl);
301 303 }
302 304
303 305 #ifdef IS_PY3K
304 306 #define LAZYMANIFESTKEYSITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT
305 307 #else
306 308 #define LAZYMANIFESTKEYSITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT \
307 309 | Py_TPFLAGS_HAVE_ITER
308 310 #endif
309 311
310 312 static PyTypeObject lazymanifestKeysIterator = {
311 313 PyVarObject_HEAD_INIT(NULL, 0)
312 314 "parsers.lazymanifest.keysiterator", /*tp_name */
313 315 sizeof(lmIter), /*tp_basicsize */
314 316 0, /*tp_itemsize */
315 317 lmiter_dealloc, /*tp_dealloc */
316 318 0, /*tp_print */
317 319 0, /*tp_getattr */
318 320 0, /*tp_setattr */
319 321 0, /*tp_compare */
320 322 0, /*tp_repr */
321 323 0, /*tp_as_number */
322 324 0, /*tp_as_sequence */
323 325 0, /*tp_as_mapping */
324 326 0, /*tp_hash */
325 327 0, /*tp_call */
326 328 0, /*tp_str */
327 329 0, /*tp_getattro */
328 330 0, /*tp_setattro */
329 331 0, /*tp_as_buffer */
330 332 LAZYMANIFESTKEYSITERATOR_TPFLAGS, /* tp_flags */
331 333 "Keys iterator for a lazymanifest.", /* tp_doc */
332 334 0, /* tp_traverse */
333 335 0, /* tp_clear */
334 336 0, /* tp_richcompare */
335 337 0, /* tp_weaklistoffset */
336 338 PyObject_SelfIter, /* tp_iter: __iter__() method */
337 339 lmiter_iterkeysnext, /* tp_iternext: next() method */
338 340 };
339 341
340 342 static lazymanifest *lazymanifest_copy(lazymanifest *self);
341 343
342 344 static PyObject *lazymanifest_getentriesiter(lazymanifest *self)
343 345 {
344 346 lmIter *i = NULL;
345 347 lazymanifest *t = lazymanifest_copy(self);
346 348 if (!t) {
347 349 PyErr_NoMemory();
348 350 return NULL;
349 351 }
350 352 i = PyObject_New(lmIter, &lazymanifestEntriesIterator);
351 353 if (i) {
352 354 i->m = t;
353 355 i->pos = -1;
354 356 } else {
355 357 Py_DECREF(t);
356 358 PyErr_NoMemory();
357 359 }
358 360 return (PyObject *)i;
359 361 }
360 362
361 363 static PyObject *lazymanifest_getkeysiter(lazymanifest *self)
362 364 {
363 365 lmIter *i = NULL;
364 366 lazymanifest *t = lazymanifest_copy(self);
365 367 if (!t) {
366 368 PyErr_NoMemory();
367 369 return NULL;
368 370 }
369 371 i = PyObject_New(lmIter, &lazymanifestKeysIterator);
370 372 if (i) {
371 373 i->m = t;
372 374 i->pos = -1;
373 375 } else {
374 376 Py_DECREF(t);
375 377 PyErr_NoMemory();
376 378 }
377 379 return (PyObject *)i;
378 380 }
379 381
380 382 /* __getitem__ and __setitem__ support */
381 383
382 384 static Py_ssize_t lazymanifest_size(lazymanifest *self)
383 385 {
384 386 return self->livelines;
385 387 }
386 388
387 389 static int linecmp(const void *left, const void *right)
388 390 {
389 391 return strcmp(((const line *)left)->start,
390 392 ((const line *)right)->start);
391 393 }
392 394
393 395 static PyObject *lazymanifest_getitem(lazymanifest *self, PyObject *key)
394 396 {
395 397 line needle;
396 398 line *hit;
397 399 if (!PyBytes_Check(key)) {
398 400 PyErr_Format(PyExc_TypeError,
399 401 "getitem: manifest keys must be a string.");
400 402 return NULL;
401 403 }
402 404 needle.start = PyBytes_AsString(key);
403 405 hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
404 406 &linecmp);
405 407 if (!hit || hit->deleted) {
406 408 PyErr_Format(PyExc_KeyError, "No such manifest entry.");
407 409 return NULL;
408 410 }
409 411 return hashflags(hit);
410 412 }
411 413
412 414 static int lazymanifest_delitem(lazymanifest *self, PyObject *key)
413 415 {
414 416 line needle;
415 417 line *hit;
416 418 if (!PyBytes_Check(key)) {
417 419 PyErr_Format(PyExc_TypeError,
418 420 "delitem: manifest keys must be a string.");
419 421 return -1;
420 422 }
421 423 needle.start = PyBytes_AsString(key);
422 424 hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
423 425 &linecmp);
424 426 if (!hit || hit->deleted) {
425 427 PyErr_Format(PyExc_KeyError,
426 428 "Tried to delete nonexistent manifest entry.");
427 429 return -1;
428 430 }
429 431 self->dirty = true;
430 432 hit->deleted = true;
431 433 self->livelines--;
432 434 return 0;
433 435 }
434 436
435 437 /* Do a binary search for the insertion point for new, creating the
436 438 * new entry if needed. */
437 static int internalsetitem(lazymanifest *self, line *new) {
439 static int internalsetitem(lazymanifest *self, line *new)
440 {
438 441 int start = 0, end = self->numlines;
439 442 while (start < end) {
440 443 int pos = start + (end - start) / 2;
441 444 int c = linecmp(new, self->lines + pos);
442 445 if (c < 0)
443 446 end = pos;
444 447 else if (c > 0)
445 448 start = pos + 1;
446 449 else {
447 450 if (self->lines[pos].deleted)
448 451 self->livelines++;
449 452 if (self->lines[pos].from_malloc)
450 453 free(self->lines[pos].start);
451 454 start = pos;
452 455 goto finish;
453 456 }
454 457 }
455 458 /* being here means we need to do an insert */
456 459 if (!realloc_if_full(self)) {
457 460 PyErr_NoMemory();
458 461 return -1;
459 462 }
460 463 memmove(self->lines + start + 1, self->lines + start,
461 464 (self->numlines - start) * sizeof(line));
462 465 self->numlines++;
463 466 self->livelines++;
464 467 finish:
465 468 self->lines[start] = *new;
466 469 self->dirty = true;
467 470 return 0;
468 471 }
469 472
470 473 static int lazymanifest_setitem(
471 474 lazymanifest *self, PyObject *key, PyObject *value)
472 475 {
473 476 char *path;
474 477 Py_ssize_t plen;
475 478 PyObject *pyhash;
476 479 Py_ssize_t hlen;
477 480 char *hash;
478 481 PyObject *pyflags;
479 482 char *flags;
480 483 Py_ssize_t flen;
481 484 size_t dlen;
482 485 char *dest;
483 486 int i;
484 487 line new;
485 488 if (!PyBytes_Check(key)) {
486 489 PyErr_Format(PyExc_TypeError,
487 490 "setitem: manifest keys must be a string.");
488 491 return -1;
489 492 }
490 493 if (!value) {
491 494 return lazymanifest_delitem(self, key);
492 495 }
493 496 if (!PyTuple_Check(value) || PyTuple_Size(value) != 2) {
494 497 PyErr_Format(PyExc_TypeError,
495 498 "Manifest values must be a tuple of (node, flags).");
496 499 return -1;
497 500 }
498 501 if (PyBytes_AsStringAndSize(key, &path, &plen) == -1) {
499 502 return -1;
500 503 }
501 504
502 505 pyhash = PyTuple_GetItem(value, 0);
503 506 if (!PyBytes_Check(pyhash)) {
504 507 PyErr_Format(PyExc_TypeError,
505 508 "node must be a 20-byte string");
506 509 return -1;
507 510 }
508 511 hlen = PyBytes_Size(pyhash);
509 512 /* Some parts of the codebase try and set 21 or 22
510 513 * byte "hash" values in order to perturb things for
511 514 * status. We have to preserve at least the 21st
512 515 * byte. Sigh. If there's a 22nd byte, we drop it on
513 516 * the floor, which works fine.
514 517 */
515 518 if (hlen != 20 && hlen != 21 && hlen != 22) {
516 519 PyErr_Format(PyExc_TypeError,
517 520 "node must be a 20-byte string");
518 521 return -1;
519 522 }
520 523 hash = PyBytes_AsString(pyhash);
521 524
522 525 pyflags = PyTuple_GetItem(value, 1);
523 526 if (!PyBytes_Check(pyflags) || PyBytes_Size(pyflags) > 1) {
524 527 PyErr_Format(PyExc_TypeError,
525 528 "flags must a 0 or 1 byte string");
526 529 return -1;
527 530 }
528 531 if (PyBytes_AsStringAndSize(pyflags, &flags, &flen) == -1) {
529 532 return -1;
530 533 }
531 534 /* one null byte and one newline */
532 535 dlen = plen + 41 + flen + 1;
533 536 dest = malloc(dlen);
534 537 if (!dest) {
535 538 PyErr_NoMemory();
536 539 return -1;
537 540 }
538 541 memcpy(dest, path, plen + 1);
539 542 for (i = 0; i < 20; i++) {
540 543 /* Cast to unsigned, so it will not get sign-extended when promoted
541 544 * to int (as is done when passing to a variadic function)
542 545 */
543 546 sprintf(dest + plen + 1 + (i * 2), "%02x", (unsigned char)hash[i]);
544 547 }
545 548 memcpy(dest + plen + 41, flags, flen);
546 549 dest[plen + 41 + flen] = '\n';
547 550 new.start = dest;
548 551 new.len = dlen;
549 552 new.hash_suffix = '\0';
550 553 if (hlen > 20) {
551 554 new.hash_suffix = hash[20];
552 555 }
553 556 new.from_malloc = true; /* is `start` a pointer we allocated? */
554 557 new.deleted = false; /* is this entry deleted? */
555 558 if (internalsetitem(self, &new)) {
556 559 return -1;
557 560 }
558 561 return 0;
559 562 }
560 563
561 564 static PyMappingMethods lazymanifest_mapping_methods = {
562 565 (lenfunc)lazymanifest_size, /* mp_length */
563 566 (binaryfunc)lazymanifest_getitem, /* mp_subscript */
564 567 (objobjargproc)lazymanifest_setitem, /* mp_ass_subscript */
565 568 };
566 569
567 570 /* sequence methods (important or __contains__ builds an iterator) */
568 571
569 572 static int lazymanifest_contains(lazymanifest *self, PyObject *key)
570 573 {
571 574 line needle;
572 575 line *hit;
573 576 if (!PyBytes_Check(key)) {
574 577 /* Our keys are always strings, so if the contains
575 578 * check is for a non-string, just return false. */
576 579 return 0;
577 580 }
578 581 needle.start = PyBytes_AsString(key);
579 582 hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
580 583 &linecmp);
581 584 if (!hit || hit->deleted) {
582 585 return 0;
583 586 }
584 587 return 1;
585 588 }
586 589
587 590 static PySequenceMethods lazymanifest_seq_meths = {
588 591 (lenfunc)lazymanifest_size, /* sq_length */
589 592 0, /* sq_concat */
590 593 0, /* sq_repeat */
591 594 0, /* sq_item */
592 595 0, /* sq_slice */
593 596 0, /* sq_ass_item */
594 597 0, /* sq_ass_slice */
595 598 (objobjproc)lazymanifest_contains, /* sq_contains */
596 599 0, /* sq_inplace_concat */
597 600 0, /* sq_inplace_repeat */
598 601 };
599 602
600 603
601 604 /* Other methods (copy, diff, etc) */
602 605 static PyTypeObject lazymanifestType;
603 606
604 607 /* If the manifest has changes, build the new manifest text and reindex it. */
605 static int compact(lazymanifest *self) {
608 static int compact(lazymanifest *self)
609 {
606 610 int i;
607 611 ssize_t need = 0;
608 612 char *data;
609 613 line *src, *dst;
610 614 PyObject *pydata;
611 615 if (!self->dirty)
612 616 return 0;
613 617 for (i = 0; i < self->numlines; i++) {
614 618 if (!self->lines[i].deleted) {
615 619 need += self->lines[i].len;
616 620 }
617 621 }
618 622 pydata = PyBytes_FromStringAndSize(NULL, need);
619 623 if (!pydata)
620 624 return -1;
621 625 data = PyBytes_AsString(pydata);
622 626 if (!data) {
623 627 return -1;
624 628 }
625 629 src = self->lines;
626 630 dst = self->lines;
627 631 for (i = 0; i < self->numlines; i++, src++) {
628 632 char *tofree = NULL;
629 633 if (src->from_malloc) {
630 634 tofree = src->start;
631 635 }
632 636 if (!src->deleted) {
633 637 memcpy(data, src->start, src->len);
634 638 *dst = *src;
635 639 dst->start = data;
636 640 dst->from_malloc = false;
637 641 data += dst->len;
638 642 dst++;
639 643 }
640 644 free(tofree);
641 645 }
642 646 Py_DECREF(self->pydata);
643 647 self->pydata = pydata;
644 648 self->numlines = self->livelines;
645 649 self->dirty = false;
646 650 return 0;
647 651 }
648 652
649 653 static PyObject *lazymanifest_text(lazymanifest *self)
650 654 {
651 655 if (compact(self) != 0) {
652 656 PyErr_NoMemory();
653 657 return NULL;
654 658 }
655 659 Py_INCREF(self->pydata);
656 660 return self->pydata;
657 661 }
658 662
659 663 static lazymanifest *lazymanifest_copy(lazymanifest *self)
660 664 {
661 665 lazymanifest *copy = NULL;
662 666 if (compact(self) != 0) {
663 667 goto nomem;
664 668 }
665 669 copy = PyObject_New(lazymanifest, &lazymanifestType);
666 670 if (!copy) {
667 671 goto nomem;
668 672 }
669 673 copy->numlines = self->numlines;
670 674 copy->livelines = self->livelines;
671 675 copy->dirty = false;
672 676 copy->lines = malloc(self->maxlines *sizeof(line));
673 677 if (!copy->lines) {
674 678 goto nomem;
675 679 }
676 680 memcpy(copy->lines, self->lines, self->numlines * sizeof(line));
677 681 copy->maxlines = self->maxlines;
678 682 copy->pydata = self->pydata;
679 683 Py_INCREF(copy->pydata);
680 684 return copy;
681 685 nomem:
682 686 PyErr_NoMemory();
683 687 Py_XDECREF(copy);
684 688 return NULL;
685 689 }
686 690
687 691 static lazymanifest *lazymanifest_filtercopy(
688 692 lazymanifest *self, PyObject *matchfn)
689 693 {
690 694 lazymanifest *copy = NULL;
691 695 int i;
692 696 if (!PyCallable_Check(matchfn)) {
693 697 PyErr_SetString(PyExc_TypeError, "matchfn must be callable");
694 698 return NULL;
695 699 }
696 700 /* compact ourselves first to avoid double-frees later when we
697 701 * compact tmp so that it doesn't have random pointers to our
698 702 * underlying from_malloc-data (self->pydata is safe) */
699 703 if (compact(self) != 0) {
700 704 goto nomem;
701 705 }
702 706 copy = PyObject_New(lazymanifest, &lazymanifestType);
703 707 if (!copy) {
704 708 goto nomem;
705 709 }
706 710 copy->dirty = true;
707 711 copy->lines = malloc(self->maxlines * sizeof(line));
708 712 if (!copy->lines) {
709 713 goto nomem;
710 714 }
711 715 copy->maxlines = self->maxlines;
712 716 copy->numlines = 0;
713 717 copy->pydata = self->pydata;
714 718 Py_INCREF(self->pydata);
715 719 for (i = 0; i < self->numlines; i++) {
716 720 PyObject *arglist = NULL, *result = NULL;
717 721 arglist = Py_BuildValue("(s)", self->lines[i].start);
718 722 if (!arglist) {
719 723 return NULL;
720 724 }
721 725 result = PyObject_CallObject(matchfn, arglist);
722 726 Py_DECREF(arglist);
723 727 /* if the callback raised an exception, just let it
724 728 * through and give up */
725 729 if (!result) {
726 730 free(copy->lines);
727 731 Py_DECREF(self->pydata);
728 732 return NULL;
729 733 }
730 734 if (PyObject_IsTrue(result)) {
731 735 assert(!(self->lines[i].from_malloc));
732 736 copy->lines[copy->numlines++] = self->lines[i];
733 737 }
734 738 Py_DECREF(result);
735 739 }
736 740 copy->livelines = copy->numlines;
737 741 return copy;
738 742 nomem:
739 743 PyErr_NoMemory();
740 744 Py_XDECREF(copy);
741 745 return NULL;
742 746 }
743 747
744 748 static PyObject *lazymanifest_diff(lazymanifest *self, PyObject *args)
745 749 {
746 750 lazymanifest *other;
747 751 PyObject *pyclean = NULL;
748 752 bool listclean;
749 753 PyObject *emptyTup = NULL, *ret = NULL;
750 754 PyObject *es;
751 755 int sneedle = 0, oneedle = 0;
752 756 if (!PyArg_ParseTuple(args, "O!|O", &lazymanifestType, &other, &pyclean)) {
753 757 return NULL;
754 758 }
755 759 listclean = (!pyclean) ? false : PyObject_IsTrue(pyclean);
756 760 es = PyBytes_FromString("");
757 761 if (!es) {
758 762 goto nomem;
759 763 }
760 764 emptyTup = PyTuple_Pack(2, Py_None, es);
761 765 Py_DECREF(es);
762 766 if (!emptyTup) {
763 767 goto nomem;
764 768 }
765 769 ret = PyDict_New();
766 770 if (!ret) {
767 771 goto nomem;
768 772 }
769 773 while (sneedle != self->numlines || oneedle != other->numlines) {
770 774 line *left = self->lines + sneedle;
771 775 line *right = other->lines + oneedle;
772 776 int result;
773 777 PyObject *key;
774 778 PyObject *outer;
775 779 /* If we're looking at a deleted entry and it's not
776 780 * the end of the manifest, just skip it. */
777 781 if (left->deleted && sneedle < self->numlines) {
778 782 sneedle++;
779 783 continue;
780 784 }
781 785 if (right->deleted && oneedle < other->numlines) {
782 786 oneedle++;
783 787 continue;
784 788 }
785 789 /* if we're at the end of either manifest, then we
786 790 * know the remaining items are adds so we can skip
787 791 * the strcmp. */
788 792 if (sneedle == self->numlines) {
789 793 result = 1;
790 794 } else if (oneedle == other->numlines) {
791 795 result = -1;
792 796 } else {
793 797 result = linecmp(left, right);
794 798 }
795 799 key = result <= 0 ?
796 800 PyBytes_FromString(left->start) :
797 801 PyBytes_FromString(right->start);
798 802 if (!key)
799 803 goto nomem;
800 804 if (result < 0) {
801 805 PyObject *l = hashflags(left);
802 806 if (!l) {
803 807 goto nomem;
804 808 }
805 809 outer = PyTuple_Pack(2, l, emptyTup);
806 810 Py_DECREF(l);
807 811 if (!outer) {
808 812 goto nomem;
809 813 }
810 814 PyDict_SetItem(ret, key, outer);
811 815 Py_DECREF(outer);
812 816 sneedle++;
813 817 } else if (result > 0) {
814 818 PyObject *r = hashflags(right);
815 819 if (!r) {
816 820 goto nomem;
817 821 }
818 822 outer = PyTuple_Pack(2, emptyTup, r);
819 823 Py_DECREF(r);
820 824 if (!outer) {
821 825 goto nomem;
822 826 }
823 827 PyDict_SetItem(ret, key, outer);
824 828 Py_DECREF(outer);
825 829 oneedle++;
826 830 } else {
827 831 /* file exists in both manifests */
828 832 if (left->len != right->len
829 833 || memcmp(left->start, right->start, left->len)
830 834 || left->hash_suffix != right->hash_suffix) {
831 835 PyObject *l = hashflags(left);
832 836 PyObject *r;
833 837 if (!l) {
834 838 goto nomem;
835 839 }
836 840 r = hashflags(right);
837 841 if (!r) {
838 842 Py_DECREF(l);
839 843 goto nomem;
840 844 }
841 845 outer = PyTuple_Pack(2, l, r);
842 846 Py_DECREF(l);
843 847 Py_DECREF(r);
844 848 if (!outer) {
845 849 goto nomem;
846 850 }
847 851 PyDict_SetItem(ret, key, outer);
848 852 Py_DECREF(outer);
849 853 } else if (listclean) {
850 854 PyDict_SetItem(ret, key, Py_None);
851 855 }
852 856 sneedle++;
853 857 oneedle++;
854 858 }
855 859 Py_DECREF(key);
856 860 }
857 861 Py_DECREF(emptyTup);
858 862 return ret;
859 863 nomem:
860 864 PyErr_NoMemory();
861 865 Py_XDECREF(ret);
862 866 Py_XDECREF(emptyTup);
863 867 return NULL;
864 868 }
865 869
866 870 static PyMethodDef lazymanifest_methods[] = {
867 871 {"iterkeys", (PyCFunction)lazymanifest_getkeysiter, METH_NOARGS,
868 872 "Iterate over file names in this lazymanifest."},
869 873 {"iterentries", (PyCFunction)lazymanifest_getentriesiter, METH_NOARGS,
870 874 "Iterate over (path, nodeid, flags) tuples in this lazymanifest."},
871 875 {"copy", (PyCFunction)lazymanifest_copy, METH_NOARGS,
872 876 "Make a copy of this lazymanifest."},
873 877 {"filtercopy", (PyCFunction)lazymanifest_filtercopy, METH_O,
874 878 "Make a copy of this manifest filtered by matchfn."},
875 879 {"diff", (PyCFunction)lazymanifest_diff, METH_VARARGS,
876 880 "Compare this lazymanifest to another one."},
877 881 {"text", (PyCFunction)lazymanifest_text, METH_NOARGS,
878 882 "Encode this manifest to text."},
879 883 {NULL},
880 884 };
881 885
882 886 #ifdef IS_PY3K
883 887 #define LAZYMANIFEST_TPFLAGS Py_TPFLAGS_DEFAULT
884 888 #else
885 889 #define LAZYMANIFEST_TPFLAGS Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_SEQUENCE_IN
886 890 #endif
887 891
888 892 static PyTypeObject lazymanifestType = {
889 893 PyVarObject_HEAD_INIT(NULL, 0)
890 894 "parsers.lazymanifest", /* tp_name */
891 895 sizeof(lazymanifest), /* tp_basicsize */
892 896 0, /* tp_itemsize */
893 897 (destructor)lazymanifest_dealloc, /* tp_dealloc */
894 898 0, /* tp_print */
895 899 0, /* tp_getattr */
896 900 0, /* tp_setattr */
897 901 0, /* tp_compare */
898 902 0, /* tp_repr */
899 903 0, /* tp_as_number */
900 904 &lazymanifest_seq_meths, /* tp_as_sequence */
901 905 &lazymanifest_mapping_methods, /* tp_as_mapping */
902 906 0, /* tp_hash */
903 907 0, /* tp_call */
904 908 0, /* tp_str */
905 909 0, /* tp_getattro */
906 910 0, /* tp_setattro */
907 911 0, /* tp_as_buffer */
908 912 LAZYMANIFEST_TPFLAGS, /* tp_flags */
909 913 "TODO(augie)", /* tp_doc */
910 914 0, /* tp_traverse */
911 915 0, /* tp_clear */
912 916 0, /* tp_richcompare */
913 917 0, /* tp_weaklistoffset */
914 918 (getiterfunc)lazymanifest_getkeysiter, /* tp_iter */
915 919 0, /* tp_iternext */
916 920 lazymanifest_methods, /* tp_methods */
917 921 0, /* tp_members */
918 922 0, /* tp_getset */
919 923 0, /* tp_base */
920 924 0, /* tp_dict */
921 925 0, /* tp_descr_get */
922 926 0, /* tp_descr_set */
923 927 0, /* tp_dictoffset */
924 928 (initproc)lazymanifest_init, /* tp_init */
925 929 0, /* tp_alloc */
926 930 };
927 931
928 932 void manifest_module_init(PyObject * mod)
929 933 {
930 934 lazymanifestType.tp_new = PyType_GenericNew;
931 935 if (PyType_Ready(&lazymanifestType) < 0)
932 936 return;
933 937 Py_INCREF(&lazymanifestType);
934 938
935 939 PyModule_AddObject(mod, "lazymanifest",
936 940 (PyObject *)&lazymanifestType);
937 941 }
@@ -1,802 +1,804 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #include <Python.h>
11 11 #include <ctype.h>
12 12 #include <stddef.h>
13 13 #include <string.h>
14 14
15 15 #include "bitmanipulation.h"
16 16 #include "charencode.h"
17 17 #include "util.h"
18 18
19 19 #ifdef IS_PY3K
20 20 /* The mapping of Python types is meant to be temporary to get Python
21 21 * 3 to compile. We should remove this once Python 3 support is fully
22 22 * supported and proper types are used in the extensions themselves. */
23 23 #define PyInt_Check PyLong_Check
24 24 #define PyInt_FromLong PyLong_FromLong
25 25 #define PyInt_FromSsize_t PyLong_FromSsize_t
26 26 #define PyInt_AsLong PyLong_AsLong
27 27 #endif
28 28
29 29 static const char *const versionerrortext = "Python minor version mismatch";
30 30
31 31 static PyObject *dict_new_presized(PyObject *self, PyObject *args)
32 32 {
33 33 Py_ssize_t expected_size;
34 34
35 35 if (!PyArg_ParseTuple(args, "n:make_presized_dict", &expected_size))
36 36 return NULL;
37 37
38 38 return _dict_new_presized(expected_size);
39 39 }
40 40
41 41 /*
42 42 * This code assumes that a manifest is stitched together with newline
43 43 * ('\n') characters.
44 44 */
45 45 static PyObject *parse_manifest(PyObject *self, PyObject *args)
46 46 {
47 47 PyObject *mfdict, *fdict;
48 48 char *str, *start, *end;
49 49 int len;
50 50
51 51 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
52 52 &PyDict_Type, &mfdict,
53 53 &PyDict_Type, &fdict,
54 54 &str, &len))
55 55 goto quit;
56 56
57 57 start = str;
58 58 end = str + len;
59 59 while (start < end) {
60 60 PyObject *file = NULL, *node = NULL;
61 61 PyObject *flags = NULL;
62 62 char *zero = NULL, *newline = NULL;
63 63 ptrdiff_t nlen;
64 64
65 65 zero = memchr(start, '\0', end - start);
66 66 if (!zero) {
67 67 PyErr_SetString(PyExc_ValueError,
68 68 "manifest entry has no separator");
69 69 goto quit;
70 70 }
71 71
72 72 newline = memchr(zero + 1, '\n', end - (zero + 1));
73 73 if (!newline) {
74 74 PyErr_SetString(PyExc_ValueError,
75 75 "manifest contains trailing garbage");
76 76 goto quit;
77 77 }
78 78
79 79 file = PyBytes_FromStringAndSize(start, zero - start);
80 80
81 81 if (!file)
82 82 goto bail;
83 83
84 84 nlen = newline - zero - 1;
85 85
86 86 node = unhexlify(zero + 1, nlen > 40 ? 40 : (Py_ssize_t)nlen);
87 87 if (!node)
88 88 goto bail;
89 89
90 90 if (nlen > 40) {
91 91 flags = PyBytes_FromStringAndSize(zero + 41,
92 92 nlen - 40);
93 93 if (!flags)
94 94 goto bail;
95 95
96 96 if (PyDict_SetItem(fdict, file, flags) == -1)
97 97 goto bail;
98 98 }
99 99
100 100 if (PyDict_SetItem(mfdict, file, node) == -1)
101 101 goto bail;
102 102
103 103 start = newline + 1;
104 104
105 105 Py_XDECREF(flags);
106 106 Py_XDECREF(node);
107 107 Py_XDECREF(file);
108 108 continue;
109 109 bail:
110 110 Py_XDECREF(flags);
111 111 Py_XDECREF(node);
112 112 Py_XDECREF(file);
113 113 goto quit;
114 114 }
115 115
116 116 Py_INCREF(Py_None);
117 117 return Py_None;
118 118 quit:
119 119 return NULL;
120 120 }
121 121
122 122 static inline dirstateTupleObject *make_dirstate_tuple(char state, int mode,
123 123 int size, int mtime)
124 124 {
125 125 dirstateTupleObject *t = PyObject_New(dirstateTupleObject,
126 126 &dirstateTupleType);
127 127 if (!t)
128 128 return NULL;
129 129 t->state = state;
130 130 t->mode = mode;
131 131 t->size = size;
132 132 t->mtime = mtime;
133 133 return t;
134 134 }
135 135
136 136 static PyObject *dirstate_tuple_new(PyTypeObject *subtype, PyObject *args,
137 137 PyObject *kwds)
138 138 {
139 139 /* We do all the initialization here and not a tp_init function because
140 140 * dirstate_tuple is immutable. */
141 141 dirstateTupleObject *t;
142 142 char state;
143 143 int size, mode, mtime;
144 144 if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime))
145 145 return NULL;
146 146
147 147 t = (dirstateTupleObject *)subtype->tp_alloc(subtype, 1);
148 148 if (!t)
149 149 return NULL;
150 150 t->state = state;
151 151 t->mode = mode;
152 152 t->size = size;
153 153 t->mtime = mtime;
154 154
155 155 return (PyObject *)t;
156 156 }
157 157
158 158 static void dirstate_tuple_dealloc(PyObject *o)
159 159 {
160 160 PyObject_Del(o);
161 161 }
162 162
163 163 static Py_ssize_t dirstate_tuple_length(PyObject *o)
164 164 {
165 165 return 4;
166 166 }
167 167
168 168 static PyObject *dirstate_tuple_item(PyObject *o, Py_ssize_t i)
169 169 {
170 170 dirstateTupleObject *t = (dirstateTupleObject *)o;
171 171 switch (i) {
172 172 case 0:
173 173 return PyBytes_FromStringAndSize(&t->state, 1);
174 174 case 1:
175 175 return PyInt_FromLong(t->mode);
176 176 case 2:
177 177 return PyInt_FromLong(t->size);
178 178 case 3:
179 179 return PyInt_FromLong(t->mtime);
180 180 default:
181 181 PyErr_SetString(PyExc_IndexError, "index out of range");
182 182 return NULL;
183 183 }
184 184 }
185 185
186 186 static PySequenceMethods dirstate_tuple_sq = {
187 187 dirstate_tuple_length, /* sq_length */
188 188 0, /* sq_concat */
189 189 0, /* sq_repeat */
190 190 dirstate_tuple_item, /* sq_item */
191 191 0, /* sq_ass_item */
192 192 0, /* sq_contains */
193 193 0, /* sq_inplace_concat */
194 194 0 /* sq_inplace_repeat */
195 195 };
196 196
197 197 PyTypeObject dirstateTupleType = {
198 198 PyVarObject_HEAD_INIT(NULL, 0)
199 199 "dirstate_tuple", /* tp_name */
200 200 sizeof(dirstateTupleObject),/* tp_basicsize */
201 201 0, /* tp_itemsize */
202 202 (destructor)dirstate_tuple_dealloc, /* tp_dealloc */
203 203 0, /* tp_print */
204 204 0, /* tp_getattr */
205 205 0, /* tp_setattr */
206 206 0, /* tp_compare */
207 207 0, /* tp_repr */
208 208 0, /* tp_as_number */
209 209 &dirstate_tuple_sq, /* tp_as_sequence */
210 210 0, /* tp_as_mapping */
211 211 0, /* tp_hash */
212 212 0, /* tp_call */
213 213 0, /* tp_str */
214 214 0, /* tp_getattro */
215 215 0, /* tp_setattro */
216 216 0, /* tp_as_buffer */
217 217 Py_TPFLAGS_DEFAULT, /* tp_flags */
218 218 "dirstate tuple", /* tp_doc */
219 219 0, /* tp_traverse */
220 220 0, /* tp_clear */
221 221 0, /* tp_richcompare */
222 222 0, /* tp_weaklistoffset */
223 223 0, /* tp_iter */
224 224 0, /* tp_iternext */
225 225 0, /* tp_methods */
226 226 0, /* tp_members */
227 227 0, /* tp_getset */
228 228 0, /* tp_base */
229 229 0, /* tp_dict */
230 230 0, /* tp_descr_get */
231 231 0, /* tp_descr_set */
232 232 0, /* tp_dictoffset */
233 233 0, /* tp_init */
234 234 0, /* tp_alloc */
235 235 dirstate_tuple_new, /* tp_new */
236 236 };
237 237
238 238 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
239 239 {
240 240 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
241 241 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
242 242 char state, *cur, *str, *cpos;
243 243 int mode, size, mtime;
244 244 unsigned int flen, len, pos = 40;
245 245 int readlen;
246 246
247 247 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
248 248 &PyDict_Type, &dmap,
249 249 &PyDict_Type, &cmap,
250 250 &str, &readlen))
251 251 goto quit;
252 252
253 253 len = readlen;
254 254
255 255 /* read parents */
256 256 if (len < 40) {
257 257 PyErr_SetString(
258 258 PyExc_ValueError, "too little data for parents");
259 259 goto quit;
260 260 }
261 261
262 262 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
263 263 if (!parents)
264 264 goto quit;
265 265
266 266 /* read filenames */
267 267 while (pos >= 40 && pos < len) {
268 268 if (pos + 17 > len) {
269 269 PyErr_SetString(PyExc_ValueError,
270 270 "overflow in dirstate");
271 271 goto quit;
272 272 }
273 273 cur = str + pos;
274 274 /* unpack header */
275 275 state = *cur;
276 276 mode = getbe32(cur + 1);
277 277 size = getbe32(cur + 5);
278 278 mtime = getbe32(cur + 9);
279 279 flen = getbe32(cur + 13);
280 280 pos += 17;
281 281 cur += 17;
282 282 if (flen > len - pos) {
283 283 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
284 284 goto quit;
285 285 }
286 286
287 287 entry = (PyObject *)make_dirstate_tuple(state, mode, size,
288 288 mtime);
289 289 cpos = memchr(cur, 0, flen);
290 290 if (cpos) {
291 291 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
292 292 cname = PyBytes_FromStringAndSize(cpos + 1,
293 293 flen - (cpos - cur) - 1);
294 294 if (!fname || !cname ||
295 295 PyDict_SetItem(cmap, fname, cname) == -1 ||
296 296 PyDict_SetItem(dmap, fname, entry) == -1)
297 297 goto quit;
298 298 Py_DECREF(cname);
299 299 } else {
300 300 fname = PyBytes_FromStringAndSize(cur, flen);
301 301 if (!fname ||
302 302 PyDict_SetItem(dmap, fname, entry) == -1)
303 303 goto quit;
304 304 }
305 305 Py_DECREF(fname);
306 306 Py_DECREF(entry);
307 307 fname = cname = entry = NULL;
308 308 pos += flen;
309 309 }
310 310
311 311 ret = parents;
312 312 Py_INCREF(ret);
313 313 quit:
314 314 Py_XDECREF(fname);
315 315 Py_XDECREF(cname);
316 316 Py_XDECREF(entry);
317 317 Py_XDECREF(parents);
318 318 return ret;
319 319 }
320 320
321 321 /*
322 322 * Build a set of non-normal and other parent entries from the dirstate dmap
323 323 */
324 static PyObject *nonnormalotherparententries(PyObject *self, PyObject *args) {
324 static PyObject *nonnormalotherparententries(PyObject *self, PyObject *args)
325 {
325 326 PyObject *dmap, *fname, *v;
326 327 PyObject *nonnset = NULL, *otherpset = NULL, *result = NULL;
327 328 Py_ssize_t pos;
328 329
329 330 if (!PyArg_ParseTuple(args, "O!:nonnormalentries",
330 331 &PyDict_Type, &dmap))
331 332 goto bail;
332 333
333 334 nonnset = PySet_New(NULL);
334 335 if (nonnset == NULL)
335 336 goto bail;
336 337
337 338 otherpset = PySet_New(NULL);
338 339 if (otherpset == NULL)
339 340 goto bail;
340 341
341 342 pos = 0;
342 343 while (PyDict_Next(dmap, &pos, &fname, &v)) {
343 344 dirstateTupleObject *t;
344 345 if (!dirstate_tuple_check(v)) {
345 346 PyErr_SetString(PyExc_TypeError,
346 347 "expected a dirstate tuple");
347 348 goto bail;
348 349 }
349 350 t = (dirstateTupleObject *)v;
350 351
351 352 if (t->state == 'n' && t->size == -2) {
352 353 if (PySet_Add(otherpset, fname) == -1) {
353 354 goto bail;
354 355 }
355 356 }
356 357
357 358 if (t->state == 'n' && t->mtime != -1)
358 359 continue;
359 360 if (PySet_Add(nonnset, fname) == -1)
360 361 goto bail;
361 362 }
362 363
363 364 result = Py_BuildValue("(OO)", nonnset, otherpset);
364 365 if (result == NULL)
365 366 goto bail;
366 367 Py_DECREF(nonnset);
367 368 Py_DECREF(otherpset);
368 369 return result;
369 370 bail:
370 371 Py_XDECREF(nonnset);
371 372 Py_XDECREF(otherpset);
372 373 Py_XDECREF(result);
373 374 return NULL;
374 375 }
375 376
376 377 /*
377 378 * Efficiently pack a dirstate object into its on-disk format.
378 379 */
379 380 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
380 381 {
381 382 PyObject *packobj = NULL;
382 383 PyObject *map, *copymap, *pl, *mtime_unset = NULL;
383 384 Py_ssize_t nbytes, pos, l;
384 385 PyObject *k, *v = NULL, *pn;
385 386 char *p, *s;
386 387 int now;
387 388
388 389 if (!PyArg_ParseTuple(args, "O!O!Oi:pack_dirstate",
389 390 &PyDict_Type, &map, &PyDict_Type, &copymap,
390 391 &pl, &now))
391 392 return NULL;
392 393
393 394 if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) {
394 395 PyErr_SetString(PyExc_TypeError, "expected 2-element sequence");
395 396 return NULL;
396 397 }
397 398
398 399 /* Figure out how much we need to allocate. */
399 400 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
400 401 PyObject *c;
401 402 if (!PyBytes_Check(k)) {
402 403 PyErr_SetString(PyExc_TypeError, "expected string key");
403 404 goto bail;
404 405 }
405 406 nbytes += PyBytes_GET_SIZE(k) + 17;
406 407 c = PyDict_GetItem(copymap, k);
407 408 if (c) {
408 409 if (!PyBytes_Check(c)) {
409 410 PyErr_SetString(PyExc_TypeError,
410 411 "expected string key");
411 412 goto bail;
412 413 }
413 414 nbytes += PyBytes_GET_SIZE(c) + 1;
414 415 }
415 416 }
416 417
417 418 packobj = PyBytes_FromStringAndSize(NULL, nbytes);
418 419 if (packobj == NULL)
419 420 goto bail;
420 421
421 422 p = PyBytes_AS_STRING(packobj);
422 423
423 424 pn = PySequence_ITEM(pl, 0);
424 425 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
425 426 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
426 427 goto bail;
427 428 }
428 429 memcpy(p, s, l);
429 430 p += 20;
430 431 pn = PySequence_ITEM(pl, 1);
431 432 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
432 433 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
433 434 goto bail;
434 435 }
435 436 memcpy(p, s, l);
436 437 p += 20;
437 438
438 439 for (pos = 0; PyDict_Next(map, &pos, &k, &v); ) {
439 440 dirstateTupleObject *tuple;
440 441 char state;
441 442 int mode, size, mtime;
442 443 Py_ssize_t len, l;
443 444 PyObject *o;
444 445 char *t;
445 446
446 447 if (!dirstate_tuple_check(v)) {
447 448 PyErr_SetString(PyExc_TypeError,
448 449 "expected a dirstate tuple");
449 450 goto bail;
450 451 }
451 452 tuple = (dirstateTupleObject *)v;
452 453
453 454 state = tuple->state;
454 455 mode = tuple->mode;
455 456 size = tuple->size;
456 457 mtime = tuple->mtime;
457 458 if (state == 'n' && mtime == now) {
458 459 /* See pure/parsers.py:pack_dirstate for why we do
459 460 * this. */
460 461 mtime = -1;
461 462 mtime_unset = (PyObject *)make_dirstate_tuple(
462 463 state, mode, size, mtime);
463 464 if (!mtime_unset)
464 465 goto bail;
465 466 if (PyDict_SetItem(map, k, mtime_unset) == -1)
466 467 goto bail;
467 468 Py_DECREF(mtime_unset);
468 469 mtime_unset = NULL;
469 470 }
470 471 *p++ = state;
471 472 putbe32((uint32_t)mode, p);
472 473 putbe32((uint32_t)size, p + 4);
473 474 putbe32((uint32_t)mtime, p + 8);
474 475 t = p + 12;
475 476 p += 16;
476 477 len = PyBytes_GET_SIZE(k);
477 478 memcpy(p, PyBytes_AS_STRING(k), len);
478 479 p += len;
479 480 o = PyDict_GetItem(copymap, k);
480 481 if (o) {
481 482 *p++ = '\0';
482 483 l = PyBytes_GET_SIZE(o);
483 484 memcpy(p, PyBytes_AS_STRING(o), l);
484 485 p += l;
485 486 len += l + 1;
486 487 }
487 488 putbe32((uint32_t)len, t);
488 489 }
489 490
490 491 pos = p - PyBytes_AS_STRING(packobj);
491 492 if (pos != nbytes) {
492 493 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
493 494 (long)pos, (long)nbytes);
494 495 goto bail;
495 496 }
496 497
497 498 return packobj;
498 499 bail:
499 500 Py_XDECREF(mtime_unset);
500 501 Py_XDECREF(packobj);
501 502 Py_XDECREF(v);
502 503 return NULL;
503 504 }
504 505
505 506 #define BUMPED_FIX 1
506 507 #define USING_SHA_256 2
507 508 #define FM1_HEADER_SIZE (4 + 8 + 2 + 2 + 1 + 1 + 1)
508 509
509 510 static PyObject *readshas(
510 511 const char *source, unsigned char num, Py_ssize_t hashwidth)
511 512 {
512 513 int i;
513 514 PyObject *list = PyTuple_New(num);
514 515 if (list == NULL) {
515 516 return NULL;
516 517 }
517 518 for (i = 0; i < num; i++) {
518 519 PyObject *hash = PyBytes_FromStringAndSize(source, hashwidth);
519 520 if (hash == NULL) {
520 521 Py_DECREF(list);
521 522 return NULL;
522 523 }
523 524 PyTuple_SET_ITEM(list, i, hash);
524 525 source += hashwidth;
525 526 }
526 527 return list;
527 528 }
528 529
529 530 static PyObject *fm1readmarker(const char *databegin, const char *dataend,
530 531 uint32_t *msize)
531 532 {
532 533 const char *data = databegin;
533 534 const char *meta;
534 535
535 536 double mtime;
536 537 int16_t tz;
537 538 uint16_t flags;
538 539 unsigned char nsuccs, nparents, nmetadata;
539 540 Py_ssize_t hashwidth = 20;
540 541
541 542 PyObject *prec = NULL, *parents = NULL, *succs = NULL;
542 543 PyObject *metadata = NULL, *ret = NULL;
543 544 int i;
544 545
545 546 if (data + FM1_HEADER_SIZE > dataend) {
546 547 goto overflow;
547 548 }
548 549
549 550 *msize = getbe32(data);
550 551 data += 4;
551 552 mtime = getbefloat64(data);
552 553 data += 8;
553 554 tz = getbeint16(data);
554 555 data += 2;
555 556 flags = getbeuint16(data);
556 557 data += 2;
557 558
558 559 if (flags & USING_SHA_256) {
559 560 hashwidth = 32;
560 561 }
561 562
562 563 nsuccs = (unsigned char)(*data++);
563 564 nparents = (unsigned char)(*data++);
564 565 nmetadata = (unsigned char)(*data++);
565 566
566 567 if (databegin + *msize > dataend) {
567 568 goto overflow;
568 569 }
569 570 dataend = databegin + *msize; /* narrow down to marker size */
570 571
571 572 if (data + hashwidth > dataend) {
572 573 goto overflow;
573 574 }
574 575 prec = PyBytes_FromStringAndSize(data, hashwidth);
575 576 data += hashwidth;
576 577 if (prec == NULL) {
577 578 goto bail;
578 579 }
579 580
580 581 if (data + nsuccs * hashwidth > dataend) {
581 582 goto overflow;
582 583 }
583 584 succs = readshas(data, nsuccs, hashwidth);
584 585 if (succs == NULL) {
585 586 goto bail;
586 587 }
587 588 data += nsuccs * hashwidth;
588 589
589 590 if (nparents == 1 || nparents == 2) {
590 591 if (data + nparents * hashwidth > dataend) {
591 592 goto overflow;
592 593 }
593 594 parents = readshas(data, nparents, hashwidth);
594 595 if (parents == NULL) {
595 596 goto bail;
596 597 }
597 598 data += nparents * hashwidth;
598 599 } else {
599 600 parents = Py_None;
600 601 Py_INCREF(parents);
601 602 }
602 603
603 604 if (data + 2 * nmetadata > dataend) {
604 605 goto overflow;
605 606 }
606 607 meta = data + (2 * nmetadata);
607 608 metadata = PyTuple_New(nmetadata);
608 609 if (metadata == NULL) {
609 610 goto bail;
610 611 }
611 612 for (i = 0; i < nmetadata; i++) {
612 613 PyObject *tmp, *left = NULL, *right = NULL;
613 614 Py_ssize_t leftsize = (unsigned char)(*data++);
614 615 Py_ssize_t rightsize = (unsigned char)(*data++);
615 616 if (meta + leftsize + rightsize > dataend) {
616 617 goto overflow;
617 618 }
618 619 left = PyBytes_FromStringAndSize(meta, leftsize);
619 620 meta += leftsize;
620 621 right = PyBytes_FromStringAndSize(meta, rightsize);
621 622 meta += rightsize;
622 623 tmp = PyTuple_New(2);
623 624 if (!left || !right || !tmp) {
624 625 Py_XDECREF(left);
625 626 Py_XDECREF(right);
626 627 Py_XDECREF(tmp);
627 628 goto bail;
628 629 }
629 630 PyTuple_SET_ITEM(tmp, 0, left);
630 631 PyTuple_SET_ITEM(tmp, 1, right);
631 632 PyTuple_SET_ITEM(metadata, i, tmp);
632 633 }
633 634 ret = Py_BuildValue("(OOHO(di)O)", prec, succs, flags,
634 635 metadata, mtime, (int)tz * 60, parents);
635 636 goto bail; /* return successfully */
636 637
637 638 overflow:
638 639 PyErr_SetString(PyExc_ValueError, "overflow in obsstore");
639 640 bail:
640 641 Py_XDECREF(prec);
641 642 Py_XDECREF(succs);
642 643 Py_XDECREF(metadata);
643 644 Py_XDECREF(parents);
644 645 return ret;
645 646 }
646 647
647 648
648 static PyObject *fm1readmarkers(PyObject *self, PyObject *args) {
649 static PyObject *fm1readmarkers(PyObject *self, PyObject *args)
650 {
649 651 const char *data, *dataend;
650 652 int datalen;
651 653 Py_ssize_t offset, stop;
652 654 PyObject *markers = NULL;
653 655
654 656 if (!PyArg_ParseTuple(args, "s#nn", &data, &datalen, &offset, &stop)) {
655 657 return NULL;
656 658 }
657 659 dataend = data + datalen;
658 660 data += offset;
659 661 markers = PyList_New(0);
660 662 if (!markers) {
661 663 return NULL;
662 664 }
663 665 while (offset < stop) {
664 666 uint32_t msize;
665 667 int error;
666 668 PyObject *record = fm1readmarker(data, dataend, &msize);
667 669 if (!record) {
668 670 goto bail;
669 671 }
670 672 error = PyList_Append(markers, record);
671 673 Py_DECREF(record);
672 674 if (error) {
673 675 goto bail;
674 676 }
675 677 data += msize;
676 678 offset += msize;
677 679 }
678 680 return markers;
679 681 bail:
680 682 Py_DECREF(markers);
681 683 return NULL;
682 684 }
683 685
684 686 static char parsers_doc[] = "Efficient content parsing.";
685 687
686 688 PyObject *encodedir(PyObject *self, PyObject *args);
687 689 PyObject *pathencode(PyObject *self, PyObject *args);
688 690 PyObject *lowerencode(PyObject *self, PyObject *args);
689 691 PyObject *parse_index2(PyObject *self, PyObject *args);
690 692
691 693 static PyMethodDef methods[] = {
692 694 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
693 695 {"nonnormalotherparententries", nonnormalotherparententries, METH_VARARGS,
694 696 "create a set containing non-normal and other parent entries of given "
695 697 "dirstate\n"},
696 698 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
697 699 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
698 700 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
699 701 {"isasciistr", isasciistr, METH_VARARGS, "check if an ASCII string\n"},
700 702 {"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"},
701 703 {"asciiupper", asciiupper, METH_VARARGS, "uppercase an ASCII string\n"},
702 704 {"dict_new_presized", dict_new_presized, METH_VARARGS,
703 705 "construct a dict with an expected size\n"},
704 706 {"make_file_foldmap", make_file_foldmap, METH_VARARGS,
705 707 "make file foldmap\n"},
706 708 {"jsonescapeu8fast", jsonescapeu8fast, METH_VARARGS,
707 709 "escape a UTF-8 byte string to JSON (fast path)\n"},
708 710 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
709 711 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
710 712 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
711 713 {"fm1readmarkers", fm1readmarkers, METH_VARARGS,
712 714 "parse v1 obsolete markers\n"},
713 715 {NULL, NULL}
714 716 };
715 717
716 718 void dirs_module_init(PyObject *mod);
717 719 void manifest_module_init(PyObject *mod);
718 720 void revlog_module_init(PyObject *mod);
719 721
720 722 static const int version = 3;
721 723
722 724 static void module_init(PyObject *mod)
723 725 {
724 726 PyModule_AddIntConstant(mod, "version", version);
725 727
726 728 /* This module constant has two purposes. First, it lets us unit test
727 729 * the ImportError raised without hard-coding any error text. This
728 730 * means we can change the text in the future without breaking tests,
729 731 * even across changesets without a recompile. Second, its presence
730 732 * can be used to determine whether the version-checking logic is
731 733 * present, which also helps in testing across changesets without a
732 734 * recompile. Note that this means the pure-Python version of parsers
733 735 * should not have this module constant. */
734 736 PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
735 737
736 738 dirs_module_init(mod);
737 739 manifest_module_init(mod);
738 740 revlog_module_init(mod);
739 741
740 742 if (PyType_Ready(&dirstateTupleType) < 0)
741 743 return;
742 744 Py_INCREF(&dirstateTupleType);
743 745 PyModule_AddObject(mod, "dirstatetuple",
744 746 (PyObject *)&dirstateTupleType);
745 747 }
746 748
747 749 static int check_python_version(void)
748 750 {
749 751 PyObject *sys = PyImport_ImportModule("sys"), *ver;
750 752 long hexversion;
751 753 if (!sys)
752 754 return -1;
753 755 ver = PyObject_GetAttrString(sys, "hexversion");
754 756 Py_DECREF(sys);
755 757 if (!ver)
756 758 return -1;
757 759 hexversion = PyInt_AsLong(ver);
758 760 Py_DECREF(ver);
759 761 /* sys.hexversion is a 32-bit number by default, so the -1 case
760 762 * should only occur in unusual circumstances (e.g. if sys.hexversion
761 763 * is manually set to an invalid value). */
762 764 if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
763 765 PyErr_Format(PyExc_ImportError, "%s: The Mercurial extension "
764 766 "modules were compiled with Python " PY_VERSION ", but "
765 767 "Mercurial is currently using Python with sys.hexversion=%ld: "
766 768 "Python %s\n at: %s", versionerrortext, hexversion,
767 769 Py_GetVersion(), Py_GetProgramFullPath());
768 770 return -1;
769 771 }
770 772 return 0;
771 773 }
772 774
773 775 #ifdef IS_PY3K
774 776 static struct PyModuleDef parsers_module = {
775 777 PyModuleDef_HEAD_INIT,
776 778 "parsers",
777 779 parsers_doc,
778 780 -1,
779 781 methods
780 782 };
781 783
782 784 PyMODINIT_FUNC PyInit_parsers(void)
783 785 {
784 786 PyObject *mod;
785 787
786 788 if (check_python_version() == -1)
787 789 return NULL;
788 790 mod = PyModule_Create(&parsers_module);
789 791 module_init(mod);
790 792 return mod;
791 793 }
792 794 #else
793 795 PyMODINIT_FUNC initparsers(void)
794 796 {
795 797 PyObject *mod;
796 798
797 799 if (check_python_version() == -1)
798 800 return;
799 801 mod = Py_InitModule3("parsers", methods, parsers_doc);
800 802 module_init(mod);
801 803 }
802 804 #endif
@@ -1,2089 +1,2090 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #include <Python.h>
11 11 #include <assert.h>
12 12 #include <ctype.h>
13 13 #include <stddef.h>
14 14 #include <string.h>
15 15
16 16 #include "bitmanipulation.h"
17 17 #include "charencode.h"
18 18 #include "util.h"
19 19
20 20 #ifdef IS_PY3K
21 21 /* The mapping of Python types is meant to be temporary to get Python
22 22 * 3 to compile. We should remove this once Python 3 support is fully
23 23 * supported and proper types are used in the extensions themselves. */
24 24 #define PyInt_Check PyLong_Check
25 25 #define PyInt_FromLong PyLong_FromLong
26 26 #define PyInt_FromSsize_t PyLong_FromSsize_t
27 27 #define PyInt_AS_LONG PyLong_AS_LONG
28 28 #define PyInt_AsLong PyLong_AsLong
29 29 #endif
30 30
31 31 /*
32 32 * A base-16 trie for fast node->rev mapping.
33 33 *
34 34 * Positive value is index of the next node in the trie
35 35 * Negative value is a leaf: -(rev + 1)
36 36 * Zero is empty
37 37 */
38 38 typedef struct {
39 39 int children[16];
40 40 } nodetree;
41 41
42 42 /*
43 43 * This class has two behaviors.
44 44 *
45 45 * When used in a list-like way (with integer keys), we decode an
46 46 * entry in a RevlogNG index file on demand. Our last entry is a
47 47 * sentinel, always a nullid. We have limited support for
48 48 * integer-keyed insert and delete, only at elements right before the
49 49 * sentinel.
50 50 *
51 51 * With string keys, we lazily perform a reverse mapping from node to
52 52 * rev, using a base-16 trie.
53 53 */
54 54 typedef struct {
55 55 PyObject_HEAD
56 56 /* Type-specific fields go here. */
57 57 PyObject *data; /* raw bytes of index */
58 58 Py_buffer buf; /* buffer of data */
59 59 PyObject **cache; /* cached tuples */
60 60 const char **offsets; /* populated on demand */
61 61 Py_ssize_t raw_length; /* original number of elements */
62 62 Py_ssize_t length; /* current number of elements */
63 63 PyObject *added; /* populated on demand */
64 64 PyObject *headrevs; /* cache, invalidated on changes */
65 65 PyObject *filteredrevs;/* filtered revs set */
66 66 nodetree *nt; /* base-16 trie */
67 67 unsigned ntlength; /* # nodes in use */
68 68 unsigned ntcapacity; /* # nodes allocated */
69 69 int ntdepth; /* maximum depth of tree */
70 70 int ntsplits; /* # splits performed */
71 71 int ntrev; /* last rev scanned */
72 72 int ntlookups; /* # lookups */
73 73 int ntmisses; /* # lookups that miss the cache */
74 74 int inlined;
75 75 } indexObject;
76 76
77 77 static Py_ssize_t index_length(const indexObject *self)
78 78 {
79 79 if (self->added == NULL)
80 80 return self->length;
81 81 return self->length + PyList_GET_SIZE(self->added);
82 82 }
83 83
84 84 static PyObject *nullentry;
85 85 static const char nullid[20];
86 86
87 87 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
88 88
89 89 #if LONG_MAX == 0x7fffffffL
90 90 static char *tuple_format = "Kiiiiiis#";
91 91 #else
92 92 static char *tuple_format = "kiiiiiis#";
93 93 #endif
94 94
95 95 /* A RevlogNG v1 index entry is 64 bytes long. */
96 96 static const long v1_hdrsize = 64;
97 97
98 98 /*
99 99 * Return a pointer to the beginning of a RevlogNG record.
100 100 */
101 101 static const char *index_deref(indexObject *self, Py_ssize_t pos)
102 102 {
103 103 if (self->inlined && pos > 0) {
104 104 if (self->offsets == NULL) {
105 105 self->offsets = PyMem_Malloc(self->raw_length *
106 106 sizeof(*self->offsets));
107 107 if (self->offsets == NULL)
108 108 return (const char *)PyErr_NoMemory();
109 109 inline_scan(self, self->offsets);
110 110 }
111 111 return self->offsets[pos];
112 112 }
113 113
114 114 return (const char *)(self->buf.buf) + pos * v1_hdrsize;
115 115 }
116 116
117 117 static inline int index_get_parents(indexObject *self, Py_ssize_t rev,
118 118 int *ps, int maxrev)
119 119 {
120 120 if (rev >= self->length - 1) {
121 121 PyObject *tuple = PyList_GET_ITEM(self->added,
122 122 rev - self->length + 1);
123 123 ps[0] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 5));
124 124 ps[1] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 6));
125 125 } else {
126 126 const char *data = index_deref(self, rev);
127 127 ps[0] = getbe32(data + 24);
128 128 ps[1] = getbe32(data + 28);
129 129 }
130 130 /* If index file is corrupted, ps[] may point to invalid revisions. So
131 131 * there is a risk of buffer overflow to trust them unconditionally. */
132 132 if (ps[0] > maxrev || ps[1] > maxrev) {
133 133 PyErr_SetString(PyExc_ValueError, "parent out of range");
134 134 return -1;
135 135 }
136 136 return 0;
137 137 }
138 138
139 139
140 140 /*
141 141 * RevlogNG format (all in big endian, data may be inlined):
142 142 * 6 bytes: offset
143 143 * 2 bytes: flags
144 144 * 4 bytes: compressed length
145 145 * 4 bytes: uncompressed length
146 146 * 4 bytes: base revision
147 147 * 4 bytes: link revision
148 148 * 4 bytes: parent 1 revision
149 149 * 4 bytes: parent 2 revision
150 150 * 32 bytes: nodeid (only 20 bytes used)
151 151 */
152 152 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
153 153 {
154 154 uint64_t offset_flags;
155 155 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
156 156 const char *c_node_id;
157 157 const char *data;
158 158 Py_ssize_t length = index_length(self);
159 159 PyObject *entry;
160 160
161 161 if (pos < 0)
162 162 pos += length;
163 163
164 164 if (pos < 0 || pos >= length) {
165 165 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
166 166 return NULL;
167 167 }
168 168
169 169 if (pos == length - 1) {
170 170 Py_INCREF(nullentry);
171 171 return nullentry;
172 172 }
173 173
174 174 if (pos >= self->length - 1) {
175 175 PyObject *obj;
176 176 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
177 177 Py_INCREF(obj);
178 178 return obj;
179 179 }
180 180
181 181 if (self->cache) {
182 182 if (self->cache[pos]) {
183 183 Py_INCREF(self->cache[pos]);
184 184 return self->cache[pos];
185 185 }
186 186 } else {
187 187 self->cache = calloc(self->raw_length, sizeof(PyObject *));
188 188 if (self->cache == NULL)
189 189 return PyErr_NoMemory();
190 190 }
191 191
192 192 data = index_deref(self, pos);
193 193 if (data == NULL)
194 194 return NULL;
195 195
196 196 offset_flags = getbe32(data + 4);
197 197 if (pos == 0) /* mask out version number for the first entry */
198 198 offset_flags &= 0xFFFF;
199 199 else {
200 200 uint32_t offset_high = getbe32(data);
201 201 offset_flags |= ((uint64_t)offset_high) << 32;
202 202 }
203 203
204 204 comp_len = getbe32(data + 8);
205 205 uncomp_len = getbe32(data + 12);
206 206 base_rev = getbe32(data + 16);
207 207 link_rev = getbe32(data + 20);
208 208 parent_1 = getbe32(data + 24);
209 209 parent_2 = getbe32(data + 28);
210 210 c_node_id = data + 32;
211 211
212 212 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
213 213 uncomp_len, base_rev, link_rev,
214 214 parent_1, parent_2, c_node_id, 20);
215 215
216 216 if (entry) {
217 217 PyObject_GC_UnTrack(entry);
218 218 Py_INCREF(entry);
219 219 }
220 220
221 221 self->cache[pos] = entry;
222 222
223 223 return entry;
224 224 }
225 225
226 226 /*
227 227 * Return the 20-byte SHA of the node corresponding to the given rev.
228 228 */
229 229 static const char *index_node(indexObject *self, Py_ssize_t pos)
230 230 {
231 231 Py_ssize_t length = index_length(self);
232 232 const char *data;
233 233
234 234 if (pos == length - 1 || pos == INT_MAX)
235 235 return nullid;
236 236
237 237 if (pos >= length)
238 238 return NULL;
239 239
240 240 if (pos >= self->length - 1) {
241 241 PyObject *tuple, *str;
242 242 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
243 243 str = PyTuple_GetItem(tuple, 7);
244 244 return str ? PyBytes_AS_STRING(str) : NULL;
245 245 }
246 246
247 247 data = index_deref(self, pos);
248 248 return data ? data + 32 : NULL;
249 249 }
250 250
251 251 static int nt_insert(indexObject *self, const char *node, int rev);
252 252
253 253 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
254 254 {
255 255 if (PyBytes_AsStringAndSize(obj, node, nodelen) == -1)
256 256 return -1;
257 257 if (*nodelen == 20)
258 258 return 0;
259 259 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
260 260 return -1;
261 261 }
262 262
263 263 static PyObject *index_insert(indexObject *self, PyObject *args)
264 264 {
265 265 PyObject *obj;
266 266 char *node;
267 267 int index;
268 268 Py_ssize_t len, nodelen;
269 269
270 270 if (!PyArg_ParseTuple(args, "iO", &index, &obj))
271 271 return NULL;
272 272
273 273 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
274 274 PyErr_SetString(PyExc_TypeError, "8-tuple required");
275 275 return NULL;
276 276 }
277 277
278 278 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
279 279 return NULL;
280 280
281 281 len = index_length(self);
282 282
283 283 if (index < 0)
284 284 index += len;
285 285
286 286 if (index != len - 1) {
287 287 PyErr_SetString(PyExc_IndexError,
288 288 "insert only supported at index -1");
289 289 return NULL;
290 290 }
291 291
292 292 if (self->added == NULL) {
293 293 self->added = PyList_New(0);
294 294 if (self->added == NULL)
295 295 return NULL;
296 296 }
297 297
298 298 if (PyList_Append(self->added, obj) == -1)
299 299 return NULL;
300 300
301 301 if (self->nt)
302 302 nt_insert(self, node, index);
303 303
304 304 Py_CLEAR(self->headrevs);
305 305 Py_RETURN_NONE;
306 306 }
307 307
308 308 static void _index_clearcaches(indexObject *self)
309 309 {
310 310 if (self->cache) {
311 311 Py_ssize_t i;
312 312
313 313 for (i = 0; i < self->raw_length; i++)
314 314 Py_CLEAR(self->cache[i]);
315 315 free(self->cache);
316 316 self->cache = NULL;
317 317 }
318 318 if (self->offsets) {
319 319 PyMem_Free(self->offsets);
320 320 self->offsets = NULL;
321 321 }
322 322 if (self->nt) {
323 323 free(self->nt);
324 324 self->nt = NULL;
325 325 }
326 326 Py_CLEAR(self->headrevs);
327 327 }
328 328
329 329 static PyObject *index_clearcaches(indexObject *self)
330 330 {
331 331 _index_clearcaches(self);
332 332 self->ntlength = self->ntcapacity = 0;
333 333 self->ntdepth = self->ntsplits = 0;
334 334 self->ntrev = -1;
335 335 self->ntlookups = self->ntmisses = 0;
336 336 Py_RETURN_NONE;
337 337 }
338 338
339 339 static PyObject *index_stats(indexObject *self)
340 340 {
341 341 PyObject *obj = PyDict_New();
342 342 PyObject *t = NULL;
343 343
344 344 if (obj == NULL)
345 345 return NULL;
346 346
347 347 #define istat(__n, __d) \
348 348 do { \
349 349 t = PyInt_FromSsize_t(self->__n); \
350 350 if (!t) \
351 351 goto bail; \
352 352 if (PyDict_SetItemString(obj, __d, t) == -1) \
353 353 goto bail; \
354 354 Py_DECREF(t); \
355 355 } while (0)
356 356
357 357 if (self->added) {
358 358 Py_ssize_t len = PyList_GET_SIZE(self->added);
359 359 t = PyInt_FromSsize_t(len);
360 360 if (!t)
361 361 goto bail;
362 362 if (PyDict_SetItemString(obj, "index entries added", t) == -1)
363 363 goto bail;
364 364 Py_DECREF(t);
365 365 }
366 366
367 367 if (self->raw_length != self->length - 1)
368 368 istat(raw_length, "revs on disk");
369 369 istat(length, "revs in memory");
370 370 istat(ntcapacity, "node trie capacity");
371 371 istat(ntdepth, "node trie depth");
372 372 istat(ntlength, "node trie count");
373 373 istat(ntlookups, "node trie lookups");
374 374 istat(ntmisses, "node trie misses");
375 375 istat(ntrev, "node trie last rev scanned");
376 376 istat(ntsplits, "node trie splits");
377 377
378 378 #undef istat
379 379
380 380 return obj;
381 381
382 382 bail:
383 383 Py_XDECREF(obj);
384 384 Py_XDECREF(t);
385 385 return NULL;
386 386 }
387 387
388 388 /*
389 389 * When we cache a list, we want to be sure the caller can't mutate
390 390 * the cached copy.
391 391 */
392 392 static PyObject *list_copy(PyObject *list)
393 393 {
394 394 Py_ssize_t len = PyList_GET_SIZE(list);
395 395 PyObject *newlist = PyList_New(len);
396 396 Py_ssize_t i;
397 397
398 398 if (newlist == NULL)
399 399 return NULL;
400 400
401 401 for (i = 0; i < len; i++) {
402 402 PyObject *obj = PyList_GET_ITEM(list, i);
403 403 Py_INCREF(obj);
404 404 PyList_SET_ITEM(newlist, i, obj);
405 405 }
406 406
407 407 return newlist;
408 408 }
409 409
410 static int check_filter(PyObject *filter, Py_ssize_t arg) {
410 static int check_filter(PyObject *filter, Py_ssize_t arg)
411 {
411 412 if (filter) {
412 413 PyObject *arglist, *result;
413 414 int isfiltered;
414 415
415 416 arglist = Py_BuildValue("(n)", arg);
416 417 if (!arglist) {
417 418 return -1;
418 419 }
419 420
420 421 result = PyEval_CallObject(filter, arglist);
421 422 Py_DECREF(arglist);
422 423 if (!result) {
423 424 return -1;
424 425 }
425 426
426 427 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
427 428 * same as this function, so we can just return it directly.*/
428 429 isfiltered = PyObject_IsTrue(result);
429 430 Py_DECREF(result);
430 431 return isfiltered;
431 432 } else {
432 433 return 0;
433 434 }
434 435 }
435 436
436 437 static Py_ssize_t add_roots_get_min(indexObject *self, PyObject *list,
437 438 Py_ssize_t marker, char *phases)
438 439 {
439 440 PyObject *iter = NULL;
440 441 PyObject *iter_item = NULL;
441 442 Py_ssize_t min_idx = index_length(self) + 1;
442 443 long iter_item_long;
443 444
444 445 if (PyList_GET_SIZE(list) != 0) {
445 446 iter = PyObject_GetIter(list);
446 447 if (iter == NULL)
447 448 return -2;
448 449 while ((iter_item = PyIter_Next(iter))) {
449 450 iter_item_long = PyInt_AS_LONG(iter_item);
450 451 Py_DECREF(iter_item);
451 452 if (iter_item_long < min_idx)
452 453 min_idx = iter_item_long;
453 454 phases[iter_item_long] = marker;
454 455 }
455 456 Py_DECREF(iter);
456 457 }
457 458
458 459 return min_idx;
459 460 }
460 461
461 462 static inline void set_phase_from_parents(char *phases, int parent_1,
462 463 int parent_2, Py_ssize_t i)
463 464 {
464 465 if (parent_1 >= 0 && phases[parent_1] > phases[i])
465 466 phases[i] = phases[parent_1];
466 467 if (parent_2 >= 0 && phases[parent_2] > phases[i])
467 468 phases[i] = phases[parent_2];
468 469 }
469 470
470 471 static PyObject *reachableroots2(indexObject *self, PyObject *args)
471 472 {
472 473
473 474 /* Input */
474 475 long minroot;
475 476 PyObject *includepatharg = NULL;
476 477 int includepath = 0;
477 478 /* heads and roots are lists */
478 479 PyObject *heads = NULL;
479 480 PyObject *roots = NULL;
480 481 PyObject *reachable = NULL;
481 482
482 483 PyObject *val;
483 484 Py_ssize_t len = index_length(self) - 1;
484 485 long revnum;
485 486 Py_ssize_t k;
486 487 Py_ssize_t i;
487 488 Py_ssize_t l;
488 489 int r;
489 490 int parents[2];
490 491
491 492 /* Internal data structure:
492 493 * tovisit: array of length len+1 (all revs + nullrev), filled upto lentovisit
493 494 * revstates: array of length len+1 (all revs + nullrev) */
494 495 int *tovisit = NULL;
495 496 long lentovisit = 0;
496 497 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
497 498 char *revstates = NULL;
498 499
499 500 /* Get arguments */
500 501 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
501 502 &PyList_Type, &roots,
502 503 &PyBool_Type, &includepatharg))
503 504 goto bail;
504 505
505 506 if (includepatharg == Py_True)
506 507 includepath = 1;
507 508
508 509 /* Initialize return set */
509 510 reachable = PyList_New(0);
510 511 if (reachable == NULL)
511 512 goto bail;
512 513
513 514 /* Initialize internal datastructures */
514 515 tovisit = (int *)malloc((len + 1) * sizeof(int));
515 516 if (tovisit == NULL) {
516 517 PyErr_NoMemory();
517 518 goto bail;
518 519 }
519 520
520 521 revstates = (char *)calloc(len + 1, 1);
521 522 if (revstates == NULL) {
522 523 PyErr_NoMemory();
523 524 goto bail;
524 525 }
525 526
526 527 l = PyList_GET_SIZE(roots);
527 528 for (i = 0; i < l; i++) {
528 529 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
529 530 if (revnum == -1 && PyErr_Occurred())
530 531 goto bail;
531 532 /* If root is out of range, e.g. wdir(), it must be unreachable
532 533 * from heads. So we can just ignore it. */
533 534 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
534 535 continue;
535 536 revstates[revnum + 1] |= RS_ROOT;
536 537 }
537 538
538 539 /* Populate tovisit with all the heads */
539 540 l = PyList_GET_SIZE(heads);
540 541 for (i = 0; i < l; i++) {
541 542 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
542 543 if (revnum == -1 && PyErr_Occurred())
543 544 goto bail;
544 545 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
545 546 PyErr_SetString(PyExc_IndexError, "head out of range");
546 547 goto bail;
547 548 }
548 549 if (!(revstates[revnum + 1] & RS_SEEN)) {
549 550 tovisit[lentovisit++] = (int)revnum;
550 551 revstates[revnum + 1] |= RS_SEEN;
551 552 }
552 553 }
553 554
554 555 /* Visit the tovisit list and find the reachable roots */
555 556 k = 0;
556 557 while (k < lentovisit) {
557 558 /* Add the node to reachable if it is a root*/
558 559 revnum = tovisit[k++];
559 560 if (revstates[revnum + 1] & RS_ROOT) {
560 561 revstates[revnum + 1] |= RS_REACHABLE;
561 562 val = PyInt_FromLong(revnum);
562 563 if (val == NULL)
563 564 goto bail;
564 565 r = PyList_Append(reachable, val);
565 566 Py_DECREF(val);
566 567 if (r < 0)
567 568 goto bail;
568 569 if (includepath == 0)
569 570 continue;
570 571 }
571 572
572 573 /* Add its parents to the list of nodes to visit */
573 574 if (revnum == -1)
574 575 continue;
575 576 r = index_get_parents(self, revnum, parents, (int)len - 1);
576 577 if (r < 0)
577 578 goto bail;
578 579 for (i = 0; i < 2; i++) {
579 580 if (!(revstates[parents[i] + 1] & RS_SEEN)
580 581 && parents[i] >= minroot) {
581 582 tovisit[lentovisit++] = parents[i];
582 583 revstates[parents[i] + 1] |= RS_SEEN;
583 584 }
584 585 }
585 586 }
586 587
587 588 /* Find all the nodes in between the roots we found and the heads
588 589 * and add them to the reachable set */
589 590 if (includepath == 1) {
590 591 long minidx = minroot;
591 592 if (minidx < 0)
592 593 minidx = 0;
593 594 for (i = minidx; i < len; i++) {
594 595 if (!(revstates[i + 1] & RS_SEEN))
595 596 continue;
596 597 r = index_get_parents(self, i, parents, (int)len - 1);
597 598 /* Corrupted index file, error is set from
598 599 * index_get_parents */
599 600 if (r < 0)
600 601 goto bail;
601 602 if (((revstates[parents[0] + 1] |
602 603 revstates[parents[1] + 1]) & RS_REACHABLE)
603 604 && !(revstates[i + 1] & RS_REACHABLE)) {
604 605 revstates[i + 1] |= RS_REACHABLE;
605 606 val = PyInt_FromLong(i);
606 607 if (val == NULL)
607 608 goto bail;
608 609 r = PyList_Append(reachable, val);
609 610 Py_DECREF(val);
610 611 if (r < 0)
611 612 goto bail;
612 613 }
613 614 }
614 615 }
615 616
616 617 free(revstates);
617 618 free(tovisit);
618 619 return reachable;
619 620 bail:
620 621 Py_XDECREF(reachable);
621 622 free(revstates);
622 623 free(tovisit);
623 624 return NULL;
624 625 }
625 626
626 627 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
627 628 {
628 629 PyObject *roots = Py_None;
629 630 PyObject *ret = NULL;
630 631 PyObject *phaseslist = NULL;
631 632 PyObject *phaseroots = NULL;
632 633 PyObject *phaseset = NULL;
633 634 PyObject *phasessetlist = NULL;
634 635 PyObject *rev = NULL;
635 636 Py_ssize_t len = index_length(self) - 1;
636 637 Py_ssize_t numphase = 0;
637 638 Py_ssize_t minrevallphases = 0;
638 639 Py_ssize_t minrevphase = 0;
639 640 Py_ssize_t i = 0;
640 641 char *phases = NULL;
641 642 long phase;
642 643
643 644 if (!PyArg_ParseTuple(args, "O", &roots))
644 645 goto done;
645 646 if (roots == NULL || !PyList_Check(roots))
646 647 goto done;
647 648
648 649 phases = calloc(len, 1); /* phase per rev: {0: public, 1: draft, 2: secret} */
649 650 if (phases == NULL) {
650 651 PyErr_NoMemory();
651 652 goto done;
652 653 }
653 654 /* Put the phase information of all the roots in phases */
654 655 numphase = PyList_GET_SIZE(roots)+1;
655 656 minrevallphases = len + 1;
656 657 phasessetlist = PyList_New(numphase);
657 658 if (phasessetlist == NULL)
658 659 goto done;
659 660
660 661 PyList_SET_ITEM(phasessetlist, 0, Py_None);
661 662 Py_INCREF(Py_None);
662 663
663 664 for (i = 0; i < numphase-1; i++) {
664 665 phaseroots = PyList_GET_ITEM(roots, i);
665 666 phaseset = PySet_New(NULL);
666 667 if (phaseset == NULL)
667 668 goto release;
668 669 PyList_SET_ITEM(phasessetlist, i+1, phaseset);
669 670 if (!PyList_Check(phaseroots))
670 671 goto release;
671 672 minrevphase = add_roots_get_min(self, phaseroots, i+1, phases);
672 673 if (minrevphase == -2) /* Error from add_roots_get_min */
673 674 goto release;
674 675 minrevallphases = MIN(minrevallphases, minrevphase);
675 676 }
676 677 /* Propagate the phase information from the roots to the revs */
677 678 if (minrevallphases != -1) {
678 679 int parents[2];
679 680 for (i = minrevallphases; i < len; i++) {
680 681 if (index_get_parents(self, i, parents,
681 682 (int)len - 1) < 0)
682 683 goto release;
683 684 set_phase_from_parents(phases, parents[0], parents[1], i);
684 685 }
685 686 }
686 687 /* Transform phase list to a python list */
687 688 phaseslist = PyList_New(len);
688 689 if (phaseslist == NULL)
689 690 goto release;
690 691 for (i = 0; i < len; i++) {
691 692 PyObject *phaseval;
692 693
693 694 phase = phases[i];
694 695 /* We only store the sets of phase for non public phase, the public phase
695 696 * is computed as a difference */
696 697 if (phase != 0) {
697 698 phaseset = PyList_GET_ITEM(phasessetlist, phase);
698 699 rev = PyInt_FromLong(i);
699 700 if (rev == NULL)
700 701 goto release;
701 702 PySet_Add(phaseset, rev);
702 703 Py_XDECREF(rev);
703 704 }
704 705 phaseval = PyInt_FromLong(phase);
705 706 if (phaseval == NULL)
706 707 goto release;
707 708 PyList_SET_ITEM(phaseslist, i, phaseval);
708 709 }
709 710 ret = PyTuple_Pack(2, phaseslist, phasessetlist);
710 711
711 712 release:
712 713 Py_XDECREF(phaseslist);
713 714 Py_XDECREF(phasessetlist);
714 715 done:
715 716 free(phases);
716 717 return ret;
717 718 }
718 719
719 720 static PyObject *index_headrevs(indexObject *self, PyObject *args)
720 721 {
721 722 Py_ssize_t i, j, len;
722 723 char *nothead = NULL;
723 724 PyObject *heads = NULL;
724 725 PyObject *filter = NULL;
725 726 PyObject *filteredrevs = Py_None;
726 727
727 728 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
728 729 return NULL;
729 730 }
730 731
731 732 if (self->headrevs && filteredrevs == self->filteredrevs)
732 733 return list_copy(self->headrevs);
733 734
734 735 Py_DECREF(self->filteredrevs);
735 736 self->filteredrevs = filteredrevs;
736 737 Py_INCREF(filteredrevs);
737 738
738 739 if (filteredrevs != Py_None) {
739 740 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
740 741 if (!filter) {
741 742 PyErr_SetString(PyExc_TypeError,
742 743 "filteredrevs has no attribute __contains__");
743 744 goto bail;
744 745 }
745 746 }
746 747
747 748 len = index_length(self) - 1;
748 749 heads = PyList_New(0);
749 750 if (heads == NULL)
750 751 goto bail;
751 752 if (len == 0) {
752 753 PyObject *nullid = PyInt_FromLong(-1);
753 754 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
754 755 Py_XDECREF(nullid);
755 756 goto bail;
756 757 }
757 758 goto done;
758 759 }
759 760
760 761 nothead = calloc(len, 1);
761 762 if (nothead == NULL) {
762 763 PyErr_NoMemory();
763 764 goto bail;
764 765 }
765 766
766 767 for (i = len - 1; i >= 0; i--) {
767 768 int isfiltered;
768 769 int parents[2];
769 770
770 771 /* If nothead[i] == 1, it means we've seen an unfiltered child of this
771 772 * node already, and therefore this node is not filtered. So we can skip
772 773 * the expensive check_filter step.
773 774 */
774 775 if (nothead[i] != 1) {
775 776 isfiltered = check_filter(filter, i);
776 777 if (isfiltered == -1) {
777 778 PyErr_SetString(PyExc_TypeError,
778 779 "unable to check filter");
779 780 goto bail;
780 781 }
781 782
782 783 if (isfiltered) {
783 784 nothead[i] = 1;
784 785 continue;
785 786 }
786 787 }
787 788
788 789 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
789 790 goto bail;
790 791 for (j = 0; j < 2; j++) {
791 792 if (parents[j] >= 0)
792 793 nothead[parents[j]] = 1;
793 794 }
794 795 }
795 796
796 797 for (i = 0; i < len; i++) {
797 798 PyObject *head;
798 799
799 800 if (nothead[i])
800 801 continue;
801 802 head = PyInt_FromSsize_t(i);
802 803 if (head == NULL || PyList_Append(heads, head) == -1) {
803 804 Py_XDECREF(head);
804 805 goto bail;
805 806 }
806 807 }
807 808
808 809 done:
809 810 self->headrevs = heads;
810 811 Py_XDECREF(filter);
811 812 free(nothead);
812 813 return list_copy(self->headrevs);
813 814 bail:
814 815 Py_XDECREF(filter);
815 816 Py_XDECREF(heads);
816 817 free(nothead);
817 818 return NULL;
818 819 }
819 820
820 821 /**
821 822 * Obtain the base revision index entry.
822 823 *
823 824 * Callers must ensure that rev >= 0 or illegal memory access may occur.
824 825 */
825 826 static inline int index_baserev(indexObject *self, int rev)
826 827 {
827 828 const char *data;
828 829
829 830 if (rev >= self->length - 1) {
830 831 PyObject *tuple = PyList_GET_ITEM(self->added,
831 832 rev - self->length + 1);
832 833 return (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 3));
833 834 }
834 835 else {
835 836 data = index_deref(self, rev);
836 837 if (data == NULL) {
837 838 return -2;
838 839 }
839 840
840 841 return getbe32(data + 16);
841 842 }
842 843 }
843 844
844 845 static PyObject *index_deltachain(indexObject *self, PyObject *args)
845 846 {
846 847 int rev, generaldelta;
847 848 PyObject *stoparg;
848 849 int stoprev, iterrev, baserev = -1;
849 850 int stopped;
850 851 PyObject *chain = NULL, *result = NULL;
851 852 const Py_ssize_t length = index_length(self);
852 853
853 854 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
854 855 return NULL;
855 856 }
856 857
857 858 if (PyInt_Check(stoparg)) {
858 859 stoprev = (int)PyInt_AsLong(stoparg);
859 860 if (stoprev == -1 && PyErr_Occurred()) {
860 861 return NULL;
861 862 }
862 863 }
863 864 else if (stoparg == Py_None) {
864 865 stoprev = -2;
865 866 }
866 867 else {
867 868 PyErr_SetString(PyExc_ValueError,
868 869 "stoprev must be integer or None");
869 870 return NULL;
870 871 }
871 872
872 873 if (rev < 0 || rev >= length - 1) {
873 874 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
874 875 return NULL;
875 876 }
876 877
877 878 chain = PyList_New(0);
878 879 if (chain == NULL) {
879 880 return NULL;
880 881 }
881 882
882 883 baserev = index_baserev(self, rev);
883 884
884 885 /* This should never happen. */
885 886 if (baserev <= -2) {
886 887 /* Error should be set by index_deref() */
887 888 assert(PyErr_Occurred());
888 889 goto bail;
889 890 }
890 891
891 892 iterrev = rev;
892 893
893 894 while (iterrev != baserev && iterrev != stoprev) {
894 895 PyObject *value = PyInt_FromLong(iterrev);
895 896 if (value == NULL) {
896 897 goto bail;
897 898 }
898 899 if (PyList_Append(chain, value)) {
899 900 Py_DECREF(value);
900 901 goto bail;
901 902 }
902 903 Py_DECREF(value);
903 904
904 905 if (generaldelta) {
905 906 iterrev = baserev;
906 907 }
907 908 else {
908 909 iterrev--;
909 910 }
910 911
911 912 if (iterrev < 0) {
912 913 break;
913 914 }
914 915
915 916 if (iterrev >= length - 1) {
916 917 PyErr_SetString(PyExc_IndexError, "revision outside index");
917 918 return NULL;
918 919 }
919 920
920 921 baserev = index_baserev(self, iterrev);
921 922
922 923 /* This should never happen. */
923 924 if (baserev <= -2) {
924 925 /* Error should be set by index_deref() */
925 926 assert(PyErr_Occurred());
926 927 goto bail;
927 928 }
928 929 }
929 930
930 931 if (iterrev == stoprev) {
931 932 stopped = 1;
932 933 }
933 934 else {
934 935 PyObject *value = PyInt_FromLong(iterrev);
935 936 if (value == NULL) {
936 937 goto bail;
937 938 }
938 939 if (PyList_Append(chain, value)) {
939 940 Py_DECREF(value);
940 941 goto bail;
941 942 }
942 943 Py_DECREF(value);
943 944
944 945 stopped = 0;
945 946 }
946 947
947 948 if (PyList_Reverse(chain)) {
948 949 goto bail;
949 950 }
950 951
951 952 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
952 953 Py_DECREF(chain);
953 954 return result;
954 955
955 956 bail:
956 957 Py_DECREF(chain);
957 958 return NULL;
958 959 }
959 960
960 961 static inline int nt_level(const char *node, Py_ssize_t level)
961 962 {
962 963 int v = node[level>>1];
963 964 if (!(level & 1))
964 965 v >>= 4;
965 966 return v & 0xf;
966 967 }
967 968
968 969 /*
969 970 * Return values:
970 971 *
971 972 * -4: match is ambiguous (multiple candidates)
972 973 * -2: not found
973 974 * rest: valid rev
974 975 */
975 976 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
976 977 int hex)
977 978 {
978 979 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
979 980 int level, maxlevel, off;
980 981
981 982 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
982 983 return -1;
983 984
984 985 if (self->nt == NULL)
985 986 return -2;
986 987
987 988 if (hex)
988 989 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
989 990 else
990 991 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
991 992
992 993 for (level = off = 0; level < maxlevel; level++) {
993 994 int k = getnybble(node, level);
994 995 nodetree *n = &self->nt[off];
995 996 int v = n->children[k];
996 997
997 998 if (v < 0) {
998 999 const char *n;
999 1000 Py_ssize_t i;
1000 1001
1001 1002 v = -(v + 1);
1002 1003 n = index_node(self, v);
1003 1004 if (n == NULL)
1004 1005 return -2;
1005 1006 for (i = level; i < maxlevel; i++)
1006 1007 if (getnybble(node, i) != nt_level(n, i))
1007 1008 return -2;
1008 1009 return v;
1009 1010 }
1010 1011 if (v == 0)
1011 1012 return -2;
1012 1013 off = v;
1013 1014 }
1014 1015 /* multiple matches against an ambiguous prefix */
1015 1016 return -4;
1016 1017 }
1017 1018
1018 1019 static int nt_new(indexObject *self)
1019 1020 {
1020 1021 if (self->ntlength == self->ntcapacity) {
1021 1022 if (self->ntcapacity >= INT_MAX / (sizeof(nodetree) * 2)) {
1022 1023 PyErr_SetString(PyExc_MemoryError,
1023 1024 "overflow in nt_new");
1024 1025 return -1;
1025 1026 }
1026 1027 self->ntcapacity *= 2;
1027 1028 self->nt = realloc(self->nt,
1028 1029 self->ntcapacity * sizeof(nodetree));
1029 1030 if (self->nt == NULL) {
1030 1031 PyErr_SetString(PyExc_MemoryError, "out of memory");
1031 1032 return -1;
1032 1033 }
1033 1034 memset(&self->nt[self->ntlength], 0,
1034 1035 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
1035 1036 }
1036 1037 return self->ntlength++;
1037 1038 }
1038 1039
1039 1040 static int nt_insert(indexObject *self, const char *node, int rev)
1040 1041 {
1041 1042 int level = 0;
1042 1043 int off = 0;
1043 1044
1044 1045 while (level < 40) {
1045 1046 int k = nt_level(node, level);
1046 1047 nodetree *n;
1047 1048 int v;
1048 1049
1049 1050 n = &self->nt[off];
1050 1051 v = n->children[k];
1051 1052
1052 1053 if (v == 0) {
1053 1054 n->children[k] = -rev - 1;
1054 1055 return 0;
1055 1056 }
1056 1057 if (v < 0) {
1057 1058 const char *oldnode = index_node(self, -(v + 1));
1058 1059 int noff;
1059 1060
1060 1061 if (!oldnode || !memcmp(oldnode, node, 20)) {
1061 1062 n->children[k] = -rev - 1;
1062 1063 return 0;
1063 1064 }
1064 1065 noff = nt_new(self);
1065 1066 if (noff == -1)
1066 1067 return -1;
1067 1068 /* self->nt may have been changed by realloc */
1068 1069 self->nt[off].children[k] = noff;
1069 1070 off = noff;
1070 1071 n = &self->nt[off];
1071 1072 n->children[nt_level(oldnode, ++level)] = v;
1072 1073 if (level > self->ntdepth)
1073 1074 self->ntdepth = level;
1074 1075 self->ntsplits += 1;
1075 1076 } else {
1076 1077 level += 1;
1077 1078 off = v;
1078 1079 }
1079 1080 }
1080 1081
1081 1082 return -1;
1082 1083 }
1083 1084
1084 1085 static int nt_init(indexObject *self)
1085 1086 {
1086 1087 if (self->nt == NULL) {
1087 1088 if ((size_t)self->raw_length > INT_MAX / sizeof(nodetree)) {
1088 1089 PyErr_SetString(PyExc_ValueError, "overflow in nt_init");
1089 1090 return -1;
1090 1091 }
1091 1092 self->ntcapacity = self->raw_length < 4
1092 1093 ? 4 : (int)self->raw_length / 2;
1093 1094
1094 1095 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
1095 1096 if (self->nt == NULL) {
1096 1097 PyErr_NoMemory();
1097 1098 return -1;
1098 1099 }
1099 1100 self->ntlength = 1;
1100 1101 self->ntrev = (int)index_length(self) - 1;
1101 1102 self->ntlookups = 1;
1102 1103 self->ntmisses = 0;
1103 1104 if (nt_insert(self, nullid, INT_MAX) == -1)
1104 1105 return -1;
1105 1106 }
1106 1107 return 0;
1107 1108 }
1108 1109
1109 1110 /*
1110 1111 * Return values:
1111 1112 *
1112 1113 * -3: error (exception set)
1113 1114 * -2: not found (no exception set)
1114 1115 * rest: valid rev
1115 1116 */
1116 1117 static int index_find_node(indexObject *self,
1117 1118 const char *node, Py_ssize_t nodelen)
1118 1119 {
1119 1120 int rev;
1120 1121
1121 1122 self->ntlookups++;
1122 1123 rev = nt_find(self, node, nodelen, 0);
1123 1124 if (rev >= -1)
1124 1125 return rev;
1125 1126
1126 1127 if (nt_init(self) == -1)
1127 1128 return -3;
1128 1129
1129 1130 /*
1130 1131 * For the first handful of lookups, we scan the entire index,
1131 1132 * and cache only the matching nodes. This optimizes for cases
1132 1133 * like "hg tip", where only a few nodes are accessed.
1133 1134 *
1134 1135 * After that, we cache every node we visit, using a single
1135 1136 * scan amortized over multiple lookups. This gives the best
1136 1137 * bulk performance, e.g. for "hg log".
1137 1138 */
1138 1139 if (self->ntmisses++ < 4) {
1139 1140 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1140 1141 const char *n = index_node(self, rev);
1141 1142 if (n == NULL)
1142 1143 return -2;
1143 1144 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1144 1145 if (nt_insert(self, n, rev) == -1)
1145 1146 return -3;
1146 1147 break;
1147 1148 }
1148 1149 }
1149 1150 } else {
1150 1151 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1151 1152 const char *n = index_node(self, rev);
1152 1153 if (n == NULL) {
1153 1154 self->ntrev = rev + 1;
1154 1155 return -2;
1155 1156 }
1156 1157 if (nt_insert(self, n, rev) == -1) {
1157 1158 self->ntrev = rev + 1;
1158 1159 return -3;
1159 1160 }
1160 1161 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1161 1162 break;
1162 1163 }
1163 1164 }
1164 1165 self->ntrev = rev;
1165 1166 }
1166 1167
1167 1168 if (rev >= 0)
1168 1169 return rev;
1169 1170 return -2;
1170 1171 }
1171 1172
1172 1173 static void raise_revlog_error(void)
1173 1174 {
1174 1175 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
1175 1176
1176 1177 mod = PyImport_ImportModule("mercurial.error");
1177 1178 if (mod == NULL) {
1178 1179 goto cleanup;
1179 1180 }
1180 1181
1181 1182 dict = PyModule_GetDict(mod);
1182 1183 if (dict == NULL) {
1183 1184 goto cleanup;
1184 1185 }
1185 1186 Py_INCREF(dict);
1186 1187
1187 1188 errclass = PyDict_GetItemString(dict, "RevlogError");
1188 1189 if (errclass == NULL) {
1189 1190 PyErr_SetString(PyExc_SystemError,
1190 1191 "could not find RevlogError");
1191 1192 goto cleanup;
1192 1193 }
1193 1194
1194 1195 /* value of exception is ignored by callers */
1195 1196 PyErr_SetString(errclass, "RevlogError");
1196 1197
1197 1198 cleanup:
1198 1199 Py_XDECREF(dict);
1199 1200 Py_XDECREF(mod);
1200 1201 }
1201 1202
1202 1203 static PyObject *index_getitem(indexObject *self, PyObject *value)
1203 1204 {
1204 1205 char *node;
1205 1206 Py_ssize_t nodelen;
1206 1207 int rev;
1207 1208
1208 1209 if (PyInt_Check(value))
1209 1210 return index_get(self, PyInt_AS_LONG(value));
1210 1211
1211 1212 if (node_check(value, &node, &nodelen) == -1)
1212 1213 return NULL;
1213 1214 rev = index_find_node(self, node, nodelen);
1214 1215 if (rev >= -1)
1215 1216 return PyInt_FromLong(rev);
1216 1217 if (rev == -2)
1217 1218 raise_revlog_error();
1218 1219 return NULL;
1219 1220 }
1220 1221
1221 1222 static int nt_partialmatch(indexObject *self, const char *node,
1222 1223 Py_ssize_t nodelen)
1223 1224 {
1224 1225 int rev;
1225 1226
1226 1227 if (nt_init(self) == -1)
1227 1228 return -3;
1228 1229
1229 1230 if (self->ntrev > 0) {
1230 1231 /* ensure that the radix tree is fully populated */
1231 1232 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1232 1233 const char *n = index_node(self, rev);
1233 1234 if (n == NULL)
1234 1235 return -2;
1235 1236 if (nt_insert(self, n, rev) == -1)
1236 1237 return -3;
1237 1238 }
1238 1239 self->ntrev = rev;
1239 1240 }
1240 1241
1241 1242 return nt_find(self, node, nodelen, 1);
1242 1243 }
1243 1244
1244 1245 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1245 1246 {
1246 1247 const char *fullnode;
1247 1248 int nodelen;
1248 1249 char *node;
1249 1250 int rev, i;
1250 1251
1251 1252 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
1252 1253 return NULL;
1253 1254
1254 1255 if (nodelen < 4) {
1255 1256 PyErr_SetString(PyExc_ValueError, "key too short");
1256 1257 return NULL;
1257 1258 }
1258 1259
1259 1260 if (nodelen > 40) {
1260 1261 PyErr_SetString(PyExc_ValueError, "key too long");
1261 1262 return NULL;
1262 1263 }
1263 1264
1264 1265 for (i = 0; i < nodelen; i++)
1265 1266 hexdigit(node, i);
1266 1267 if (PyErr_Occurred()) {
1267 1268 /* input contains non-hex characters */
1268 1269 PyErr_Clear();
1269 1270 Py_RETURN_NONE;
1270 1271 }
1271 1272
1272 1273 rev = nt_partialmatch(self, node, nodelen);
1273 1274
1274 1275 switch (rev) {
1275 1276 case -4:
1276 1277 raise_revlog_error();
1277 1278 case -3:
1278 1279 return NULL;
1279 1280 case -2:
1280 1281 Py_RETURN_NONE;
1281 1282 case -1:
1282 1283 return PyBytes_FromStringAndSize(nullid, 20);
1283 1284 }
1284 1285
1285 1286 fullnode = index_node(self, rev);
1286 1287 if (fullnode == NULL) {
1287 1288 PyErr_Format(PyExc_IndexError,
1288 1289 "could not access rev %d", rev);
1289 1290 return NULL;
1290 1291 }
1291 1292 return PyBytes_FromStringAndSize(fullnode, 20);
1292 1293 }
1293 1294
1294 1295 static PyObject *index_m_get(indexObject *self, PyObject *args)
1295 1296 {
1296 1297 Py_ssize_t nodelen;
1297 1298 PyObject *val;
1298 1299 char *node;
1299 1300 int rev;
1300 1301
1301 1302 if (!PyArg_ParseTuple(args, "O", &val))
1302 1303 return NULL;
1303 1304 if (node_check(val, &node, &nodelen) == -1)
1304 1305 return NULL;
1305 1306 rev = index_find_node(self, node, nodelen);
1306 1307 if (rev == -3)
1307 1308 return NULL;
1308 1309 if (rev == -2)
1309 1310 Py_RETURN_NONE;
1310 1311 return PyInt_FromLong(rev);
1311 1312 }
1312 1313
1313 1314 static int index_contains(indexObject *self, PyObject *value)
1314 1315 {
1315 1316 char *node;
1316 1317 Py_ssize_t nodelen;
1317 1318
1318 1319 if (PyInt_Check(value)) {
1319 1320 long rev = PyInt_AS_LONG(value);
1320 1321 return rev >= -1 && rev < index_length(self);
1321 1322 }
1322 1323
1323 1324 if (node_check(value, &node, &nodelen) == -1)
1324 1325 return -1;
1325 1326
1326 1327 switch (index_find_node(self, node, nodelen)) {
1327 1328 case -3:
1328 1329 return -1;
1329 1330 case -2:
1330 1331 return 0;
1331 1332 default:
1332 1333 return 1;
1333 1334 }
1334 1335 }
1335 1336
1336 1337 typedef uint64_t bitmask;
1337 1338
1338 1339 /*
1339 1340 * Given a disjoint set of revs, return all candidates for the
1340 1341 * greatest common ancestor. In revset notation, this is the set
1341 1342 * "heads(::a and ::b and ...)"
1342 1343 */
1343 1344 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
1344 1345 int revcount)
1345 1346 {
1346 1347 const bitmask allseen = (1ull << revcount) - 1;
1347 1348 const bitmask poison = 1ull << revcount;
1348 1349 PyObject *gca = PyList_New(0);
1349 1350 int i, v, interesting;
1350 1351 int maxrev = -1;
1351 1352 bitmask sp;
1352 1353 bitmask *seen;
1353 1354
1354 1355 if (gca == NULL)
1355 1356 return PyErr_NoMemory();
1356 1357
1357 1358 for (i = 0; i < revcount; i++) {
1358 1359 if (revs[i] > maxrev)
1359 1360 maxrev = revs[i];
1360 1361 }
1361 1362
1362 1363 seen = calloc(sizeof(*seen), maxrev + 1);
1363 1364 if (seen == NULL) {
1364 1365 Py_DECREF(gca);
1365 1366 return PyErr_NoMemory();
1366 1367 }
1367 1368
1368 1369 for (i = 0; i < revcount; i++)
1369 1370 seen[revs[i]] = 1ull << i;
1370 1371
1371 1372 interesting = revcount;
1372 1373
1373 1374 for (v = maxrev; v >= 0 && interesting; v--) {
1374 1375 bitmask sv = seen[v];
1375 1376 int parents[2];
1376 1377
1377 1378 if (!sv)
1378 1379 continue;
1379 1380
1380 1381 if (sv < poison) {
1381 1382 interesting -= 1;
1382 1383 if (sv == allseen) {
1383 1384 PyObject *obj = PyInt_FromLong(v);
1384 1385 if (obj == NULL)
1385 1386 goto bail;
1386 1387 if (PyList_Append(gca, obj) == -1) {
1387 1388 Py_DECREF(obj);
1388 1389 goto bail;
1389 1390 }
1390 1391 sv |= poison;
1391 1392 for (i = 0; i < revcount; i++) {
1392 1393 if (revs[i] == v)
1393 1394 goto done;
1394 1395 }
1395 1396 }
1396 1397 }
1397 1398 if (index_get_parents(self, v, parents, maxrev) < 0)
1398 1399 goto bail;
1399 1400
1400 1401 for (i = 0; i < 2; i++) {
1401 1402 int p = parents[i];
1402 1403 if (p == -1)
1403 1404 continue;
1404 1405 sp = seen[p];
1405 1406 if (sv < poison) {
1406 1407 if (sp == 0) {
1407 1408 seen[p] = sv;
1408 1409 interesting++;
1409 1410 }
1410 1411 else if (sp != sv)
1411 1412 seen[p] |= sv;
1412 1413 } else {
1413 1414 if (sp && sp < poison)
1414 1415 interesting--;
1415 1416 seen[p] = sv;
1416 1417 }
1417 1418 }
1418 1419 }
1419 1420
1420 1421 done:
1421 1422 free(seen);
1422 1423 return gca;
1423 1424 bail:
1424 1425 free(seen);
1425 1426 Py_XDECREF(gca);
1426 1427 return NULL;
1427 1428 }
1428 1429
1429 1430 /*
1430 1431 * Given a disjoint set of revs, return the subset with the longest
1431 1432 * path to the root.
1432 1433 */
1433 1434 static PyObject *find_deepest(indexObject *self, PyObject *revs)
1434 1435 {
1435 1436 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
1436 1437 static const Py_ssize_t capacity = 24;
1437 1438 int *depth, *interesting = NULL;
1438 1439 int i, j, v, ninteresting;
1439 1440 PyObject *dict = NULL, *keys = NULL;
1440 1441 long *seen = NULL;
1441 1442 int maxrev = -1;
1442 1443 long final;
1443 1444
1444 1445 if (revcount > capacity) {
1445 1446 PyErr_Format(PyExc_OverflowError,
1446 1447 "bitset size (%ld) > capacity (%ld)",
1447 1448 (long)revcount, (long)capacity);
1448 1449 return NULL;
1449 1450 }
1450 1451
1451 1452 for (i = 0; i < revcount; i++) {
1452 1453 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1453 1454 if (n > maxrev)
1454 1455 maxrev = n;
1455 1456 }
1456 1457
1457 1458 depth = calloc(sizeof(*depth), maxrev + 1);
1458 1459 if (depth == NULL)
1459 1460 return PyErr_NoMemory();
1460 1461
1461 1462 seen = calloc(sizeof(*seen), maxrev + 1);
1462 1463 if (seen == NULL) {
1463 1464 PyErr_NoMemory();
1464 1465 goto bail;
1465 1466 }
1466 1467
1467 1468 interesting = calloc(sizeof(*interesting), 1 << revcount);
1468 1469 if (interesting == NULL) {
1469 1470 PyErr_NoMemory();
1470 1471 goto bail;
1471 1472 }
1472 1473
1473 1474 if (PyList_Sort(revs) == -1)
1474 1475 goto bail;
1475 1476
1476 1477 for (i = 0; i < revcount; i++) {
1477 1478 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1478 1479 long b = 1l << i;
1479 1480 depth[n] = 1;
1480 1481 seen[n] = b;
1481 1482 interesting[b] = 1;
1482 1483 }
1483 1484
1484 1485 /* invariant: ninteresting is the number of non-zero entries in
1485 1486 * interesting. */
1486 1487 ninteresting = (int)revcount;
1487 1488
1488 1489 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
1489 1490 int dv = depth[v];
1490 1491 int parents[2];
1491 1492 long sv;
1492 1493
1493 1494 if (dv == 0)
1494 1495 continue;
1495 1496
1496 1497 sv = seen[v];
1497 1498 if (index_get_parents(self, v, parents, maxrev) < 0)
1498 1499 goto bail;
1499 1500
1500 1501 for (i = 0; i < 2; i++) {
1501 1502 int p = parents[i];
1502 1503 long sp;
1503 1504 int dp;
1504 1505
1505 1506 if (p == -1)
1506 1507 continue;
1507 1508
1508 1509 dp = depth[p];
1509 1510 sp = seen[p];
1510 1511 if (dp <= dv) {
1511 1512 depth[p] = dv + 1;
1512 1513 if (sp != sv) {
1513 1514 interesting[sv] += 1;
1514 1515 seen[p] = sv;
1515 1516 if (sp) {
1516 1517 interesting[sp] -= 1;
1517 1518 if (interesting[sp] == 0)
1518 1519 ninteresting -= 1;
1519 1520 }
1520 1521 }
1521 1522 }
1522 1523 else if (dv == dp - 1) {
1523 1524 long nsp = sp | sv;
1524 1525 if (nsp == sp)
1525 1526 continue;
1526 1527 seen[p] = nsp;
1527 1528 interesting[sp] -= 1;
1528 1529 if (interesting[sp] == 0)
1529 1530 ninteresting -= 1;
1530 1531 if (interesting[nsp] == 0)
1531 1532 ninteresting += 1;
1532 1533 interesting[nsp] += 1;
1533 1534 }
1534 1535 }
1535 1536 interesting[sv] -= 1;
1536 1537 if (interesting[sv] == 0)
1537 1538 ninteresting -= 1;
1538 1539 }
1539 1540
1540 1541 final = 0;
1541 1542 j = ninteresting;
1542 1543 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
1543 1544 if (interesting[i] == 0)
1544 1545 continue;
1545 1546 final |= i;
1546 1547 j -= 1;
1547 1548 }
1548 1549 if (final == 0) {
1549 1550 keys = PyList_New(0);
1550 1551 goto bail;
1551 1552 }
1552 1553
1553 1554 dict = PyDict_New();
1554 1555 if (dict == NULL)
1555 1556 goto bail;
1556 1557
1557 1558 for (i = 0; i < revcount; i++) {
1558 1559 PyObject *key;
1559 1560
1560 1561 if ((final & (1 << i)) == 0)
1561 1562 continue;
1562 1563
1563 1564 key = PyList_GET_ITEM(revs, i);
1564 1565 Py_INCREF(key);
1565 1566 Py_INCREF(Py_None);
1566 1567 if (PyDict_SetItem(dict, key, Py_None) == -1) {
1567 1568 Py_DECREF(key);
1568 1569 Py_DECREF(Py_None);
1569 1570 goto bail;
1570 1571 }
1571 1572 }
1572 1573
1573 1574 keys = PyDict_Keys(dict);
1574 1575
1575 1576 bail:
1576 1577 free(depth);
1577 1578 free(seen);
1578 1579 free(interesting);
1579 1580 Py_XDECREF(dict);
1580 1581
1581 1582 return keys;
1582 1583 }
1583 1584
1584 1585 /*
1585 1586 * Given a (possibly overlapping) set of revs, return all the
1586 1587 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
1587 1588 */
1588 1589 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
1589 1590 {
1590 1591 PyObject *ret = NULL;
1591 1592 Py_ssize_t argcount, i, len;
1592 1593 bitmask repeat = 0;
1593 1594 int revcount = 0;
1594 1595 int *revs;
1595 1596
1596 1597 argcount = PySequence_Length(args);
1597 1598 revs = PyMem_Malloc(argcount * sizeof(*revs));
1598 1599 if (argcount > 0 && revs == NULL)
1599 1600 return PyErr_NoMemory();
1600 1601 len = index_length(self) - 1;
1601 1602
1602 1603 for (i = 0; i < argcount; i++) {
1603 1604 static const int capacity = 24;
1604 1605 PyObject *obj = PySequence_GetItem(args, i);
1605 1606 bitmask x;
1606 1607 long val;
1607 1608
1608 1609 if (!PyInt_Check(obj)) {
1609 1610 PyErr_SetString(PyExc_TypeError,
1610 1611 "arguments must all be ints");
1611 1612 Py_DECREF(obj);
1612 1613 goto bail;
1613 1614 }
1614 1615 val = PyInt_AsLong(obj);
1615 1616 Py_DECREF(obj);
1616 1617 if (val == -1) {
1617 1618 ret = PyList_New(0);
1618 1619 goto done;
1619 1620 }
1620 1621 if (val < 0 || val >= len) {
1621 1622 PyErr_SetString(PyExc_IndexError,
1622 1623 "index out of range");
1623 1624 goto bail;
1624 1625 }
1625 1626 /* this cheesy bloom filter lets us avoid some more
1626 1627 * expensive duplicate checks in the common set-is-disjoint
1627 1628 * case */
1628 1629 x = 1ull << (val & 0x3f);
1629 1630 if (repeat & x) {
1630 1631 int k;
1631 1632 for (k = 0; k < revcount; k++) {
1632 1633 if (val == revs[k])
1633 1634 goto duplicate;
1634 1635 }
1635 1636 }
1636 1637 else repeat |= x;
1637 1638 if (revcount >= capacity) {
1638 1639 PyErr_Format(PyExc_OverflowError,
1639 1640 "bitset size (%d) > capacity (%d)",
1640 1641 revcount, capacity);
1641 1642 goto bail;
1642 1643 }
1643 1644 revs[revcount++] = (int)val;
1644 1645 duplicate:;
1645 1646 }
1646 1647
1647 1648 if (revcount == 0) {
1648 1649 ret = PyList_New(0);
1649 1650 goto done;
1650 1651 }
1651 1652 if (revcount == 1) {
1652 1653 PyObject *obj;
1653 1654 ret = PyList_New(1);
1654 1655 if (ret == NULL)
1655 1656 goto bail;
1656 1657 obj = PyInt_FromLong(revs[0]);
1657 1658 if (obj == NULL)
1658 1659 goto bail;
1659 1660 PyList_SET_ITEM(ret, 0, obj);
1660 1661 goto done;
1661 1662 }
1662 1663
1663 1664 ret = find_gca_candidates(self, revs, revcount);
1664 1665 if (ret == NULL)
1665 1666 goto bail;
1666 1667
1667 1668 done:
1668 1669 PyMem_Free(revs);
1669 1670 return ret;
1670 1671
1671 1672 bail:
1672 1673 PyMem_Free(revs);
1673 1674 Py_XDECREF(ret);
1674 1675 return NULL;
1675 1676 }
1676 1677
1677 1678 /*
1678 1679 * Given a (possibly overlapping) set of revs, return the greatest
1679 1680 * common ancestors: those with the longest path to the root.
1680 1681 */
1681 1682 static PyObject *index_ancestors(indexObject *self, PyObject *args)
1682 1683 {
1683 1684 PyObject *ret;
1684 1685 PyObject *gca = index_commonancestorsheads(self, args);
1685 1686 if (gca == NULL)
1686 1687 return NULL;
1687 1688
1688 1689 if (PyList_GET_SIZE(gca) <= 1) {
1689 1690 return gca;
1690 1691 }
1691 1692
1692 1693 ret = find_deepest(self, gca);
1693 1694 Py_DECREF(gca);
1694 1695 return ret;
1695 1696 }
1696 1697
1697 1698 /*
1698 1699 * Invalidate any trie entries introduced by added revs.
1699 1700 */
1700 1701 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
1701 1702 {
1702 1703 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
1703 1704
1704 1705 for (i = start; i < len; i++) {
1705 1706 PyObject *tuple = PyList_GET_ITEM(self->added, i);
1706 1707 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
1707 1708
1708 1709 nt_insert(self, PyBytes_AS_STRING(node), -1);
1709 1710 }
1710 1711
1711 1712 if (start == 0)
1712 1713 Py_CLEAR(self->added);
1713 1714 }
1714 1715
1715 1716 /*
1716 1717 * Delete a numeric range of revs, which must be at the end of the
1717 1718 * range, but exclude the sentinel nullid entry.
1718 1719 */
1719 1720 static int index_slice_del(indexObject *self, PyObject *item)
1720 1721 {
1721 1722 Py_ssize_t start, stop, step, slicelength;
1722 1723 Py_ssize_t length = index_length(self);
1723 1724 int ret = 0;
1724 1725
1725 1726 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
1726 1727 #ifdef IS_PY3K
1727 1728 if (PySlice_GetIndicesEx(item, length,
1728 1729 #else
1729 1730 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
1730 1731 #endif
1731 1732 &start, &stop, &step, &slicelength) < 0)
1732 1733 return -1;
1733 1734
1734 1735 if (slicelength <= 0)
1735 1736 return 0;
1736 1737
1737 1738 if ((step < 0 && start < stop) || (step > 0 && start > stop))
1738 1739 stop = start;
1739 1740
1740 1741 if (step < 0) {
1741 1742 stop = start + 1;
1742 1743 start = stop + step*(slicelength - 1) - 1;
1743 1744 step = -step;
1744 1745 }
1745 1746
1746 1747 if (step != 1) {
1747 1748 PyErr_SetString(PyExc_ValueError,
1748 1749 "revlog index delete requires step size of 1");
1749 1750 return -1;
1750 1751 }
1751 1752
1752 1753 if (stop != length - 1) {
1753 1754 PyErr_SetString(PyExc_IndexError,
1754 1755 "revlog index deletion indices are invalid");
1755 1756 return -1;
1756 1757 }
1757 1758
1758 1759 if (start < self->length - 1) {
1759 1760 if (self->nt) {
1760 1761 Py_ssize_t i;
1761 1762
1762 1763 for (i = start + 1; i < self->length - 1; i++) {
1763 1764 const char *node = index_node(self, i);
1764 1765
1765 1766 if (node)
1766 1767 nt_insert(self, node, -1);
1767 1768 }
1768 1769 if (self->added)
1769 1770 nt_invalidate_added(self, 0);
1770 1771 if (self->ntrev > start)
1771 1772 self->ntrev = (int)start;
1772 1773 }
1773 1774 self->length = start + 1;
1774 1775 if (start < self->raw_length) {
1775 1776 if (self->cache) {
1776 1777 Py_ssize_t i;
1777 1778 for (i = start; i < self->raw_length; i++)
1778 1779 Py_CLEAR(self->cache[i]);
1779 1780 }
1780 1781 self->raw_length = start;
1781 1782 }
1782 1783 goto done;
1783 1784 }
1784 1785
1785 1786 if (self->nt) {
1786 1787 nt_invalidate_added(self, start - self->length + 1);
1787 1788 if (self->ntrev > start)
1788 1789 self->ntrev = (int)start;
1789 1790 }
1790 1791 if (self->added)
1791 1792 ret = PyList_SetSlice(self->added, start - self->length + 1,
1792 1793 PyList_GET_SIZE(self->added), NULL);
1793 1794 done:
1794 1795 Py_CLEAR(self->headrevs);
1795 1796 return ret;
1796 1797 }
1797 1798
1798 1799 /*
1799 1800 * Supported ops:
1800 1801 *
1801 1802 * slice deletion
1802 1803 * string assignment (extend node->rev mapping)
1803 1804 * string deletion (shrink node->rev mapping)
1804 1805 */
1805 1806 static int index_assign_subscript(indexObject *self, PyObject *item,
1806 1807 PyObject *value)
1807 1808 {
1808 1809 char *node;
1809 1810 Py_ssize_t nodelen;
1810 1811 long rev;
1811 1812
1812 1813 if (PySlice_Check(item) && value == NULL)
1813 1814 return index_slice_del(self, item);
1814 1815
1815 1816 if (node_check(item, &node, &nodelen) == -1)
1816 1817 return -1;
1817 1818
1818 1819 if (value == NULL)
1819 1820 return self->nt ? nt_insert(self, node, -1) : 0;
1820 1821 rev = PyInt_AsLong(value);
1821 1822 if (rev > INT_MAX || rev < 0) {
1822 1823 if (!PyErr_Occurred())
1823 1824 PyErr_SetString(PyExc_ValueError, "rev out of range");
1824 1825 return -1;
1825 1826 }
1826 1827
1827 1828 if (nt_init(self) == -1)
1828 1829 return -1;
1829 1830 return nt_insert(self, node, (int)rev);
1830 1831 }
1831 1832
1832 1833 /*
1833 1834 * Find all RevlogNG entries in an index that has inline data. Update
1834 1835 * the optional "offsets" table with those entries.
1835 1836 */
1836 1837 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
1837 1838 {
1838 1839 const char *data = (const char *)self->buf.buf;
1839 1840 Py_ssize_t pos = 0;
1840 1841 Py_ssize_t end = self->buf.len;
1841 1842 long incr = v1_hdrsize;
1842 1843 Py_ssize_t len = 0;
1843 1844
1844 1845 while (pos + v1_hdrsize <= end && pos >= 0) {
1845 1846 uint32_t comp_len;
1846 1847 /* 3rd element of header is length of compressed inline data */
1847 1848 comp_len = getbe32(data + pos + 8);
1848 1849 incr = v1_hdrsize + comp_len;
1849 1850 if (offsets)
1850 1851 offsets[len] = data + pos;
1851 1852 len++;
1852 1853 pos += incr;
1853 1854 }
1854 1855
1855 1856 if (pos != end) {
1856 1857 if (!PyErr_Occurred())
1857 1858 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1858 1859 return -1;
1859 1860 }
1860 1861
1861 1862 return len;
1862 1863 }
1863 1864
1864 1865 static int index_init(indexObject *self, PyObject *args)
1865 1866 {
1866 1867 PyObject *data_obj, *inlined_obj;
1867 1868 Py_ssize_t size;
1868 1869
1869 1870 /* Initialize before argument-checking to avoid index_dealloc() crash. */
1870 1871 self->raw_length = 0;
1871 1872 self->added = NULL;
1872 1873 self->cache = NULL;
1873 1874 self->data = NULL;
1874 1875 memset(&self->buf, 0, sizeof(self->buf));
1875 1876 self->headrevs = NULL;
1876 1877 self->filteredrevs = Py_None;
1877 1878 Py_INCREF(Py_None);
1878 1879 self->nt = NULL;
1879 1880 self->offsets = NULL;
1880 1881
1881 1882 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
1882 1883 return -1;
1883 1884 if (!PyObject_CheckBuffer(data_obj)) {
1884 1885 PyErr_SetString(PyExc_TypeError,
1885 1886 "data does not support buffer interface");
1886 1887 return -1;
1887 1888 }
1888 1889
1889 1890 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
1890 1891 return -1;
1891 1892 size = self->buf.len;
1892 1893
1893 1894 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
1894 1895 self->data = data_obj;
1895 1896
1896 1897 self->ntlength = self->ntcapacity = 0;
1897 1898 self->ntdepth = self->ntsplits = 0;
1898 1899 self->ntlookups = self->ntmisses = 0;
1899 1900 self->ntrev = -1;
1900 1901 Py_INCREF(self->data);
1901 1902
1902 1903 if (self->inlined) {
1903 1904 Py_ssize_t len = inline_scan(self, NULL);
1904 1905 if (len == -1)
1905 1906 goto bail;
1906 1907 self->raw_length = len;
1907 1908 self->length = len + 1;
1908 1909 } else {
1909 1910 if (size % v1_hdrsize) {
1910 1911 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1911 1912 goto bail;
1912 1913 }
1913 1914 self->raw_length = size / v1_hdrsize;
1914 1915 self->length = self->raw_length + 1;
1915 1916 }
1916 1917
1917 1918 return 0;
1918 1919 bail:
1919 1920 return -1;
1920 1921 }
1921 1922
1922 1923 static PyObject *index_nodemap(indexObject *self)
1923 1924 {
1924 1925 Py_INCREF(self);
1925 1926 return (PyObject *)self;
1926 1927 }
1927 1928
1928 1929 static void index_dealloc(indexObject *self)
1929 1930 {
1930 1931 _index_clearcaches(self);
1931 1932 Py_XDECREF(self->filteredrevs);
1932 1933 if (self->buf.buf) {
1933 1934 PyBuffer_Release(&self->buf);
1934 1935 memset(&self->buf, 0, sizeof(self->buf));
1935 1936 }
1936 1937 Py_XDECREF(self->data);
1937 1938 Py_XDECREF(self->added);
1938 1939 PyObject_Del(self);
1939 1940 }
1940 1941
1941 1942 static PySequenceMethods index_sequence_methods = {
1942 1943 (lenfunc)index_length, /* sq_length */
1943 1944 0, /* sq_concat */
1944 1945 0, /* sq_repeat */
1945 1946 (ssizeargfunc)index_get, /* sq_item */
1946 1947 0, /* sq_slice */
1947 1948 0, /* sq_ass_item */
1948 1949 0, /* sq_ass_slice */
1949 1950 (objobjproc)index_contains, /* sq_contains */
1950 1951 };
1951 1952
1952 1953 static PyMappingMethods index_mapping_methods = {
1953 1954 (lenfunc)index_length, /* mp_length */
1954 1955 (binaryfunc)index_getitem, /* mp_subscript */
1955 1956 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
1956 1957 };
1957 1958
1958 1959 static PyMethodDef index_methods[] = {
1959 1960 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
1960 1961 "return the gca set of the given revs"},
1961 1962 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
1962 1963 METH_VARARGS,
1963 1964 "return the heads of the common ancestors of the given revs"},
1964 1965 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
1965 1966 "clear the index caches"},
1966 1967 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1967 1968 "get an index entry"},
1968 1969 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets,
1969 1970 METH_VARARGS, "compute phases"},
1970 1971 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
1971 1972 "reachableroots"},
1972 1973 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
1973 1974 "get head revisions"}, /* Can do filtering since 3.2 */
1974 1975 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
1975 1976 "get filtered head revisions"}, /* Can always do filtering */
1976 1977 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
1977 1978 "determine revisions with deltas to reconstruct fulltext"},
1978 1979 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1979 1980 "insert an index entry"},
1980 1981 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
1981 1982 "match a potentially ambiguous node ID"},
1982 1983 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1983 1984 "stats for the index"},
1984 1985 {NULL} /* Sentinel */
1985 1986 };
1986 1987
1987 1988 static PyGetSetDef index_getset[] = {
1988 1989 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
1989 1990 {NULL} /* Sentinel */
1990 1991 };
1991 1992
1992 1993 static PyTypeObject indexType = {
1993 1994 PyVarObject_HEAD_INIT(NULL, 0)
1994 1995 "parsers.index", /* tp_name */
1995 1996 sizeof(indexObject), /* tp_basicsize */
1996 1997 0, /* tp_itemsize */
1997 1998 (destructor)index_dealloc, /* tp_dealloc */
1998 1999 0, /* tp_print */
1999 2000 0, /* tp_getattr */
2000 2001 0, /* tp_setattr */
2001 2002 0, /* tp_compare */
2002 2003 0, /* tp_repr */
2003 2004 0, /* tp_as_number */
2004 2005 &index_sequence_methods, /* tp_as_sequence */
2005 2006 &index_mapping_methods, /* tp_as_mapping */
2006 2007 0, /* tp_hash */
2007 2008 0, /* tp_call */
2008 2009 0, /* tp_str */
2009 2010 0, /* tp_getattro */
2010 2011 0, /* tp_setattro */
2011 2012 0, /* tp_as_buffer */
2012 2013 Py_TPFLAGS_DEFAULT, /* tp_flags */
2013 2014 "revlog index", /* tp_doc */
2014 2015 0, /* tp_traverse */
2015 2016 0, /* tp_clear */
2016 2017 0, /* tp_richcompare */
2017 2018 0, /* tp_weaklistoffset */
2018 2019 0, /* tp_iter */
2019 2020 0, /* tp_iternext */
2020 2021 index_methods, /* tp_methods */
2021 2022 0, /* tp_members */
2022 2023 index_getset, /* tp_getset */
2023 2024 0, /* tp_base */
2024 2025 0, /* tp_dict */
2025 2026 0, /* tp_descr_get */
2026 2027 0, /* tp_descr_set */
2027 2028 0, /* tp_dictoffset */
2028 2029 (initproc)index_init, /* tp_init */
2029 2030 0, /* tp_alloc */
2030 2031 };
2031 2032
2032 2033 /*
2033 2034 * returns a tuple of the form (index, index, cache) with elements as
2034 2035 * follows:
2035 2036 *
2036 2037 * index: an index object that lazily parses RevlogNG records
2037 2038 * cache: if data is inlined, a tuple (0, index_file_content), else None
2038 2039 * index_file_content could be a string, or a buffer
2039 2040 *
2040 2041 * added complications are for backwards compatibility
2041 2042 */
2042 2043 PyObject *parse_index2(PyObject *self, PyObject *args)
2043 2044 {
2044 2045 PyObject *tuple = NULL, *cache = NULL;
2045 2046 indexObject *idx;
2046 2047 int ret;
2047 2048
2048 2049 idx = PyObject_New(indexObject, &indexType);
2049 2050 if (idx == NULL)
2050 2051 goto bail;
2051 2052
2052 2053 ret = index_init(idx, args);
2053 2054 if (ret == -1)
2054 2055 goto bail;
2055 2056
2056 2057 if (idx->inlined) {
2057 2058 cache = Py_BuildValue("iO", 0, idx->data);
2058 2059 if (cache == NULL)
2059 2060 goto bail;
2060 2061 } else {
2061 2062 cache = Py_None;
2062 2063 Py_INCREF(cache);
2063 2064 }
2064 2065
2065 2066 tuple = Py_BuildValue("NN", idx, cache);
2066 2067 if (!tuple)
2067 2068 goto bail;
2068 2069 return tuple;
2069 2070
2070 2071 bail:
2071 2072 Py_XDECREF(idx);
2072 2073 Py_XDECREF(cache);
2073 2074 Py_XDECREF(tuple);
2074 2075 return NULL;
2075 2076 }
2076 2077
2077 2078 void revlog_module_init(PyObject *mod)
2078 2079 {
2079 2080 indexType.tp_new = PyType_GenericNew;
2080 2081 if (PyType_Ready(&indexType) < 0)
2081 2082 return;
2082 2083 Py_INCREF(&indexType);
2083 2084 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
2084 2085
2085 2086 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
2086 2087 -1, -1, -1, -1, nullid, 20);
2087 2088 if (nullentry)
2088 2089 PyObject_GC_UnTrack(nullentry);
2089 2090 }
General Comments 0
You need to be logged in to leave comments. Login now