##// END OF EJS Templates
lazymanifest: use a binary search to do an insertion...
Augie Fackler -
r24228:542c8912 default
parent child Browse files
Show More
@@ -1,841 +1,849 b''
1 1 /*
2 2 * manifest.c - manifest type that does on-demand parsing.
3 3 *
4 4 * Copyright 2015, Google Inc.
5 5 *
6 6 * This software may be used and distributed according to the terms of
7 7 * the GNU General Public License, incorporated herein by reference.
8 8 */
9 9 #include <assert.h>
10 10 #include <string.h>
11 11 #include <stdlib.h>
12 12
13 13 #include <Python.h>
14 14
15 15 /* VC9 doesn't include bool and lacks stdbool.h based on my searching */
16 16 #ifdef _MSC_VER
17 17 #define true 1
18 18 #define false 0
19 19 typedef unsigned char bool;
20 20 #else
21 21 #include <stdbool.h>
22 22 #endif
23 23
24 24 #define DEFAULT_LINES 100000
25 25
26 26 typedef struct {
27 27 char *start;
28 28 Py_ssize_t len; /* length of line including terminal newline */
29 29 char hash_suffix;
30 30 bool from_malloc;
31 31 bool deleted;
32 32 } line;
33 33
34 34 typedef struct {
35 35 PyObject_HEAD
36 36 PyObject *pydata;
37 37 line *lines;
38 38 int numlines; /* number of line entries */
39 39 int livelines; /* number of non-deleted lines */
40 40 int maxlines; /* allocated number of lines */
41 41 bool dirty;
42 42 } lazymanifest;
43 43
44 44 #define MANIFEST_OOM -1
45 45 #define MANIFEST_NOT_SORTED -2
46 46 #define MANIFEST_MALFORMED -3
47 47
48 48 /* defined in parsers.c */
49 49 PyObject *unhexlify(const char *str, int len);
50 50
51 51 /* get the length of the path for a line */
52 52 static size_t pathlen(line *l) {
53 53 return strlen(l->start);
54 54 }
55 55
56 56 /* get the node value of a single line */
57 57 static PyObject *nodeof(line *l) {
58 58 char *s = l->start;
59 59 ssize_t llen = pathlen(l);
60 60 PyObject *hash = unhexlify(s + llen + 1, 40);
61 61 if (!hash) {
62 62 return NULL;
63 63 }
64 64 if (l->hash_suffix != '\0') {
65 65 char newhash[21];
66 66 memcpy(newhash, PyString_AsString(hash), 20);
67 67 Py_DECREF(hash);
68 68 newhash[20] = l->hash_suffix;
69 69 hash = PyString_FromStringAndSize(newhash, 21);
70 70 }
71 71 return hash;
72 72 }
73 73
74 74 /* get the node hash and flags of a line as a tuple */
75 75 static PyObject *hashflags(line *l)
76 76 {
77 77 char *s = l->start;
78 78 size_t plen = pathlen(l);
79 79 PyObject *hash = nodeof(l);
80 80
81 81 /* 40 for hash, 1 for null byte, 1 for newline */
82 82 size_t hplen = plen + 42;
83 83 Py_ssize_t flen = l->len - hplen;
84 84 PyObject *flags;
85 85 PyObject *tup;
86 86
87 87 if (!hash)
88 88 return NULL;
89 89 flags = PyString_FromStringAndSize(s + hplen - 1, flen);
90 90 if (!flags) {
91 91 Py_DECREF(hash);
92 92 return NULL;
93 93 }
94 94 tup = PyTuple_Pack(2, hash, flags);
95 95 Py_DECREF(flags);
96 96 Py_DECREF(hash);
97 97 return tup;
98 98 }
99 99
100 100 /* if we're about to run out of space in the line index, add more */
101 101 static bool realloc_if_full(lazymanifest *self)
102 102 {
103 103 if (self->numlines == self->maxlines) {
104 104 self->maxlines *= 2;
105 105 self->lines = realloc(self->lines, self->maxlines * sizeof(line));
106 106 }
107 107 return self->lines;
108 108 }
109 109
110 110 /*
111 111 * Find the line boundaries in the manifest that 'data' points to and store
112 112 * information about each line in 'self'.
113 113 */
114 114 static int find_lines(lazymanifest *self, char *data, Py_ssize_t len)
115 115 {
116 116 char *prev = NULL;
117 117 while (len > 0) {
118 118 line *l;
119 119 char *next = memchr(data, '\n', len);
120 120 if (!next) {
121 121 return MANIFEST_MALFORMED;
122 122 }
123 123 next++; /* advance past newline */
124 124 if (!realloc_if_full(self)) {
125 125 return MANIFEST_OOM; /* no memory */
126 126 }
127 127 if (prev && strcmp(prev, data) > -1) {
128 128 /* This data isn't sorted, so we have to abort. */
129 129 return MANIFEST_NOT_SORTED;
130 130 }
131 131 l = self->lines + ((self->numlines)++);
132 132 l->start = data;
133 133 l->len = next - data;
134 134 l->hash_suffix = '\0';
135 135 l->from_malloc = false;
136 136 l->deleted = false;
137 137 len = len - l->len;
138 138 prev = data;
139 139 data = next;
140 140 }
141 141 self->livelines = self->numlines;
142 142 return 0;
143 143 }
144 144
145 145 static int lazymanifest_init(lazymanifest *self, PyObject *args)
146 146 {
147 147 char *data;
148 148 Py_ssize_t len;
149 149 int err, ret;
150 150 PyObject *pydata;
151 151 if (!PyArg_ParseTuple(args, "S", &pydata)) {
152 152 return -1;
153 153 }
154 154 err = PyString_AsStringAndSize(pydata, &data, &len);
155 155
156 156 self->dirty = false;
157 157 if (err == -1)
158 158 return -1;
159 159 self->pydata = pydata;
160 160 Py_INCREF(self->pydata);
161 161 Py_BEGIN_ALLOW_THREADS
162 162 self->lines = malloc(DEFAULT_LINES * sizeof(line));
163 163 self->maxlines = DEFAULT_LINES;
164 164 self->numlines = 0;
165 165 if (!self->lines)
166 166 ret = MANIFEST_OOM;
167 167 else
168 168 ret = find_lines(self, data, len);
169 169 Py_END_ALLOW_THREADS
170 170 switch (ret) {
171 171 case 0:
172 172 break;
173 173 case MANIFEST_OOM:
174 174 PyErr_NoMemory();
175 175 break;
176 176 case MANIFEST_NOT_SORTED:
177 177 PyErr_Format(PyExc_ValueError,
178 178 "Manifest lines not in sorted order.");
179 179 break;
180 180 case MANIFEST_MALFORMED:
181 181 PyErr_Format(PyExc_ValueError,
182 182 "Manifest did not end in a newline.");
183 183 break;
184 184 default:
185 185 PyErr_Format(PyExc_ValueError,
186 186 "Unknown problem parsing manifest.");
187 187 }
188 188 return ret == 0 ? 0 : -1;
189 189 }
190 190
191 191 static void lazymanifest_dealloc(lazymanifest *self)
192 192 {
193 193 /* free any extra lines we had to allocate */
194 194 int i;
195 195 for (i = 0; i < self->numlines; i++) {
196 196 if (self->lines[i].from_malloc) {
197 197 free(self->lines[i].start);
198 198 }
199 199 }
200 200 if (self->lines) {
201 201 free(self->lines);
202 202 self->lines = NULL;
203 203 }
204 204 if (self->pydata) {
205 205 Py_DECREF(self->pydata);
206 206 self->pydata = NULL;
207 207 }
208 208 PyObject_Del(self);
209 209 }
210 210
211 211 /* iteration support */
212 212
213 213 typedef struct {
214 214 PyObject_HEAD lazymanifest *m;
215 215 Py_ssize_t pos;
216 216 } lmIter;
217 217
218 218 static void lmiter_dealloc(PyObject *o)
219 219 {
220 220 lmIter *self = (lmIter *)o;
221 221 Py_DECREF(self->m);
222 222 PyObject_Del(self);
223 223 }
224 224
225 225 static PyObject *lmiter_iternext(PyObject *o)
226 226 {
227 227 size_t pl;
228 228 line *l;
229 229 Py_ssize_t consumed;
230 230 PyObject *ret = NULL, *path = NULL, *hash = NULL, *flags = NULL;
231 231 lmIter *self = (lmIter *)o;
232 232 do {
233 233 self->pos++;
234 234 if (self->pos >= self->m->numlines) {
235 235 goto bail;
236 236 }
237 237 /* skip over deleted manifest entries */
238 238 } while (self->m->lines[self->pos].deleted);
239 239 l = self->m->lines + self->pos;
240 240 pl = pathlen(l);
241 241 path = PyString_FromStringAndSize(l->start, pl);
242 242 hash = nodeof(l);
243 243 consumed = pl + 41;
244 244 flags = PyString_FromStringAndSize(l->start + consumed,
245 245 l->len - consumed - 1);
246 246 if (!flags) {
247 247 goto bail;
248 248 }
249 249 ret = PyTuple_Pack(3, path, hash, flags);
250 250 bail:
251 251 Py_XDECREF(path);
252 252 Py_XDECREF(hash);
253 253 Py_XDECREF(flags);
254 254 return ret;
255 255 }
256 256
257 257 static PyTypeObject lazymanifestIterator = {
258 258 PyObject_HEAD_INIT(NULL)
259 259 0, /*ob_size */
260 260 "parsers.lazymanifest.iterator", /*tp_name */
261 261 sizeof(lmIter), /*tp_basicsize */
262 262 0, /*tp_itemsize */
263 263 lmiter_dealloc, /*tp_dealloc */
264 264 0, /*tp_print */
265 265 0, /*tp_getattr */
266 266 0, /*tp_setattr */
267 267 0, /*tp_compare */
268 268 0, /*tp_repr */
269 269 0, /*tp_as_number */
270 270 0, /*tp_as_sequence */
271 271 0, /*tp_as_mapping */
272 272 0, /*tp_hash */
273 273 0, /*tp_call */
274 274 0, /*tp_str */
275 275 0, /*tp_getattro */
276 276 0, /*tp_setattro */
277 277 0, /*tp_as_buffer */
278 278 /* tp_flags: Py_TPFLAGS_HAVE_ITER tells python to
279 279 use tp_iter and tp_iternext fields. */
280 280 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_ITER,
281 281 "Iterator for a lazymanifest.", /* tp_doc */
282 282 0, /* tp_traverse */
283 283 0, /* tp_clear */
284 284 0, /* tp_richcompare */
285 285 0, /* tp_weaklistoffset */
286 286 PyObject_SelfIter, /* tp_iter: __iter__() method */
287 287 lmiter_iternext, /* tp_iternext: next() method */
288 288 };
289 289
290 290 static lazymanifest *lazymanifest_copy(lazymanifest *self);
291 291
292 292 static PyObject *lazymanifest_getiter(lazymanifest *self)
293 293 {
294 294 lmIter *i = NULL;
295 295 lazymanifest *t = lazymanifest_copy(self);
296 296 if (!t) {
297 297 PyErr_NoMemory();
298 298 return NULL;
299 299 }
300 300 i = PyObject_New(lmIter, &lazymanifestIterator);
301 301 if (i) {
302 302 i->m = t;
303 303 i->pos = -1;
304 304 } else {
305 305 Py_DECREF(t);
306 306 PyErr_NoMemory();
307 307 }
308 308 return (PyObject *)i;
309 309 }
310 310
311 311 /* __getitem__ and __setitem__ support */
312 312
313 313 static Py_ssize_t lazymanifest_size(lazymanifest *self)
314 314 {
315 315 return self->livelines;
316 316 }
317 317
318 318 static int linecmp(const void *left, const void *right)
319 319 {
320 320 return strcmp(((const line *)left)->start,
321 321 ((const line *)right)->start);
322 322 }
323 323
324 324 static PyObject *lazymanifest_getitem(lazymanifest *self, PyObject *key)
325 325 {
326 326 line needle;
327 327 line *hit;
328 328 if (!PyString_Check(key)) {
329 329 PyErr_Format(PyExc_TypeError,
330 330 "getitem: manifest keys must be a string.");
331 331 return NULL;
332 332 }
333 333 needle.start = PyString_AsString(key);
334 334 hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
335 335 &linecmp);
336 336 if (!hit || hit->deleted) {
337 337 PyErr_Format(PyExc_KeyError, "No such manifest entry.");
338 338 return NULL;
339 339 }
340 340 return hashflags(hit);
341 341 }
342 342
343 343 static int lazymanifest_delitem(lazymanifest *self, PyObject *key)
344 344 {
345 345 line needle;
346 346 line *hit;
347 347 if (!PyString_Check(key)) {
348 348 PyErr_Format(PyExc_TypeError,
349 349 "delitem: manifest keys must be a string.");
350 350 return -1;
351 351 }
352 352 needle.start = PyString_AsString(key);
353 353 hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
354 354 &linecmp);
355 355 if (!hit || hit->deleted) {
356 356 PyErr_Format(PyExc_KeyError,
357 357 "Tried to delete nonexistent manifest entry.");
358 358 return -1;
359 359 }
360 360 self->dirty = true;
361 361 hit->deleted = true;
362 362 self->livelines--;
363 363 return 0;
364 364 }
365 365
366 /* Do a binary search for the insertion point for new, creating the
367 * new entry if needed. */
368 static int internalsetitem(lazymanifest *self, line *new) {
369 int start = 0, end = self->numlines;
370 while (start < end) {
371 int pos = start + (end - start) / 2;
372 int c = linecmp(new, self->lines + pos);
373 if (c < 0)
374 end = pos;
375 else if (c > 0)
376 start = pos + 1;
377 else {
378 if (self->lines[pos].deleted)
379 self->livelines++;
380 start = pos;
381 goto finish;
382 }
383 }
384 /* being here means we need to do an insert */
385 if (!realloc_if_full(self)) {
386 PyErr_NoMemory();
387 return -1;
388 }
389 memmove(self->lines + start + 1, self->lines + start,
390 (self->numlines - start) * sizeof(line));
391 self->numlines++;
392 self->livelines++;
393 finish:
394 self->lines[start] = *new;
395 self->dirty = true;
396 return 0;
397 }
398
366 399 static int lazymanifest_setitem(
367 400 lazymanifest *self, PyObject *key, PyObject *value)
368 401 {
369 402 char *path;
370 403 Py_ssize_t plen;
371 404 PyObject *pyhash;
372 405 Py_ssize_t hlen;
373 406 char *hash;
374 407 PyObject *pyflags;
375 408 char *flags;
376 409 Py_ssize_t flen;
377 410 size_t dlen;
378 411 char *dest;
379 412 int i;
380 413 line new;
381 line *hit;
382 414 if (!PyString_Check(key)) {
383 415 PyErr_Format(PyExc_TypeError,
384 416 "setitem: manifest keys must be a string.");
385 417 return -1;
386 418 }
387 419 if (!value) {
388 420 return lazymanifest_delitem(self, key);
389 421 }
390 422 if (!PyTuple_Check(value) || PyTuple_Size(value) != 2) {
391 423 PyErr_Format(PyExc_TypeError,
392 424 "Manifest values must be a tuple of (node, flags).");
393 425 return -1;
394 426 }
395 427 if (PyString_AsStringAndSize(key, &path, &plen) == -1) {
396 428 return -1;
397 429 }
398 430
399 431 pyhash = PyTuple_GetItem(value, 0);
400 432 if (!PyString_Check(pyhash)) {
401 433 PyErr_Format(PyExc_TypeError,
402 434 "node must be a 20-byte string");
403 435 return -1;
404 436 }
405 437 hlen = PyString_Size(pyhash);
406 438 /* Some parts of the codebase try and set 21 or 22
407 439 * byte "hash" values in order to perturb things for
408 440 * status. We have to preserve at least the 21st
409 441 * byte. Sigh. If there's a 22nd byte, we drop it on
410 442 * the floor, which works fine.
411 443 */
412 444 if (hlen != 20 && hlen != 21 && hlen != 22) {
413 445 PyErr_Format(PyExc_TypeError,
414 446 "node must be a 20-byte string");
415 447 return -1;
416 448 }
417 449 hash = PyString_AsString(pyhash);
418 450
419 451 pyflags = PyTuple_GetItem(value, 1);
420 452 if (!PyString_Check(pyflags) || PyString_Size(pyflags) > 1) {
421 453 PyErr_Format(PyExc_TypeError,
422 454 "flags must a 0 or 1 byte string");
423 455 return -1;
424 456 }
425 457 if (PyString_AsStringAndSize(pyflags, &flags, &flen) == -1) {
426 458 return -1;
427 459 }
428 460 /* one null byte and one newline */
429 461 dlen = plen + 41 + flen + 1;
430 462 dest = malloc(dlen);
431 463 if (!dest) {
432 464 PyErr_NoMemory();
433 465 return -1;
434 466 }
435 467 memcpy(dest, path, plen + 1);
436 468 for (i = 0; i < 20; i++) {
437 469 sprintf(dest + plen + 1 + (i * 2), "%02hhx", hash[i]);
438 470 }
439 471 memcpy(dest + plen + 41, flags, flen);
440 472 dest[plen + 41 + flen] = '\n';
441 473 new.start = dest;
442 474 new.len = dlen;
443 475 new.hash_suffix = '\0';
444 476 if (hlen > 20) {
445 477 new.hash_suffix = hash[20];
446 478 }
447 479 new.from_malloc = true; /* is `start` a pointer we allocated? */
448 480 new.deleted = false; /* is this entry deleted? */
449 hit = bsearch(&new, self->lines, self->numlines,
450 sizeof(line), &linecmp);
451 self->dirty = true;
452 if (hit) {
453 /* updating a line we already had */
454 if (hit->from_malloc) {
455 free(hit->start);
456 }
457 if (hit->deleted) {
458 self->livelines++;
459 }
460 *hit = new;
461 } else {
462 /* new line */
463 if (!realloc_if_full(self)) {
464 PyErr_NoMemory();
481 if (internalsetitem(self, &new)) {
465 482 return -1;
466 483 }
467 self->lines[self->numlines++] = new;
468 self->livelines++;
469 /* TODO: do a binary search and insert rather than
470 * append and qsort. Also offer a batch-insert
471 * interface, which should be a nice little
472 * performance win.
473 */
474 qsort(self->lines, self->numlines, sizeof(line), &linecmp);
475 }
476 484 return 0;
477 485 }
478 486
479 487 static PyMappingMethods lazymanifest_mapping_methods = {
480 488 (lenfunc)lazymanifest_size, /* mp_length */
481 489 (binaryfunc)lazymanifest_getitem, /* mp_subscript */
482 490 (objobjargproc)lazymanifest_setitem, /* mp_ass_subscript */
483 491 };
484 492
485 493 /* sequence methods (important or __contains__ builds an iterator */
486 494
487 495 static int lazymanifest_contains(lazymanifest *self, PyObject *key)
488 496 {
489 497 line needle;
490 498 line *hit;
491 499 if (!PyString_Check(key)) {
492 500 /* Our keys are always strings, so if the contains
493 501 * check is for a non-string, just return false. */
494 502 return 0;
495 503 }
496 504 needle.start = PyString_AsString(key);
497 505 hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
498 506 &linecmp);
499 507 if (!hit || hit->deleted) {
500 508 return 0;
501 509 }
502 510 return 1;
503 511 }
504 512
505 513 static PySequenceMethods lazymanifest_seq_meths = {
506 514 (lenfunc)lazymanifest_size, /* sq_length */
507 515 0, /* sq_concat */
508 516 0, /* sq_repeat */
509 517 0, /* sq_item */
510 518 0, /* sq_slice */
511 519 0, /* sq_ass_item */
512 520 0, /* sq_ass_slice */
513 521 (objobjproc)lazymanifest_contains, /* sq_contains */
514 522 0, /* sq_inplace_concat */
515 523 0, /* sq_inplace_repeat */
516 524 };
517 525
518 526
519 527 /* Other methods (copy, diff, etc) */
520 528 static PyTypeObject lazymanifestType;
521 529
522 530 /* If the manifest has changes, build the new manifest text and reindex it. */
523 531 static int compact(lazymanifest *self) {
524 532 int i;
525 533 ssize_t need = 0;
526 534 char *data;
527 535 line *src, *dst;
528 536 PyObject *pydata;
529 537 if (!self->dirty)
530 538 return 0;
531 539 for (i = 0; i < self->numlines; i++) {
532 540 if (!self->lines[i].deleted) {
533 541 need += self->lines[i].len;
534 542 }
535 543 }
536 544 pydata = PyString_FromStringAndSize(NULL, need);
537 545 if (!pydata)
538 546 return -1;
539 547 data = PyString_AsString(pydata);
540 548 if (!data) {
541 549 return -1;
542 550 }
543 551 src = self->lines;
544 552 dst = self->lines;
545 553 for (i = 0; i < self->numlines; i++, src++) {
546 554 char *tofree = NULL;
547 555 if (src->from_malloc) {
548 556 tofree = src->start;
549 557 }
550 558 if (!src->deleted) {
551 559 memcpy(data, src->start, src->len);
552 560 *dst = *src;
553 561 dst->start = data;
554 562 dst->from_malloc = false;
555 563 data += dst->len;
556 564 dst++;
557 565 }
558 566 free(tofree);
559 567 }
560 568 Py_DECREF(self->pydata);
561 569 self->pydata = pydata;
562 570 self->numlines = self->livelines;
563 571 self->dirty = false;
564 572 return 0;
565 573 }
566 574
567 575 static PyObject *lazymanifest_text(lazymanifest *self)
568 576 {
569 577 if (compact(self) != 0) {
570 578 PyErr_NoMemory();
571 579 return NULL;
572 580 }
573 581 Py_INCREF(self->pydata);
574 582 return self->pydata;
575 583 }
576 584
577 585 static lazymanifest *lazymanifest_copy(lazymanifest *self)
578 586 {
579 587 lazymanifest *copy = NULL;
580 588 if (compact(self) != 0) {
581 589 goto nomem;
582 590 }
583 591 copy = PyObject_New(lazymanifest, &lazymanifestType);
584 592 if (!copy) {
585 593 goto nomem;
586 594 }
587 595 copy->numlines = self->numlines;
588 596 copy->livelines = self->livelines;
589 597 copy->dirty = false;
590 598 copy->lines = malloc(self->maxlines *sizeof(line));
591 599 if (!copy->lines) {
592 600 goto nomem;
593 601 }
594 602 memcpy(copy->lines, self->lines, self->numlines * sizeof(line));
595 603 copy->maxlines = self->maxlines;
596 604 copy->pydata = self->pydata;
597 605 Py_INCREF(copy->pydata);
598 606 return copy;
599 607 nomem:
600 608 PyErr_NoMemory();
601 609 Py_XDECREF(copy);
602 610 return NULL;
603 611 }
604 612
605 613 static lazymanifest *lazymanifest_filtercopy(
606 614 lazymanifest *self, PyObject *matchfn)
607 615 {
608 616 lazymanifest *copy = NULL;
609 617 int i;
610 618 if (!PyCallable_Check(matchfn)) {
611 619 PyErr_SetString(PyExc_TypeError, "matchfn must be callable");
612 620 return NULL;
613 621 }
614 622 /* compact ourselves first to avoid double-frees later when we
615 623 * compact tmp so that it doesn't have random pointers to our
616 624 * underlying from_malloc-data (self->pydata is safe) */
617 625 if (compact(self) != 0) {
618 626 goto nomem;
619 627 }
620 628 copy = PyObject_New(lazymanifest, &lazymanifestType);
621 629 copy->dirty = true;
622 630 copy->lines = malloc(self->maxlines * sizeof(line));
623 631 if (!copy->lines) {
624 632 goto nomem;
625 633 }
626 634 copy->maxlines = self->maxlines;
627 635 copy->numlines = 0;
628 636 copy->pydata = self->pydata;
629 637 Py_INCREF(self->pydata);
630 638 for (i = 0; i < self->numlines; i++) {
631 639 PyObject *arg = PyString_FromString(self->lines[i].start);
632 640 PyObject *arglist = PyTuple_Pack(1, arg);
633 641 PyObject *result = PyObject_CallObject(matchfn, arglist);
634 642 Py_DECREF(arglist);
635 643 Py_DECREF(arg);
636 644 /* if the callback raised an exception, just let it
637 645 * through and give up */
638 646 if (!result) {
639 647 free(copy->lines);
640 648 Py_DECREF(self->pydata);
641 649 return NULL;
642 650 }
643 651 if (PyObject_IsTrue(result)) {
644 652 assert(!(self->lines[i].from_malloc));
645 653 copy->lines[copy->numlines++] = self->lines[i];
646 654 }
647 655 Py_DECREF(result);
648 656 }
649 657 copy->livelines = copy->numlines;
650 658 return copy;
651 659 nomem:
652 660 PyErr_NoMemory();
653 661 Py_XDECREF(copy);
654 662 return NULL;
655 663 }
656 664
657 665 static PyObject *lazymanifest_diff(lazymanifest *self, PyObject *args)
658 666 {
659 667 lazymanifest *other;
660 668 PyObject *pyclean = NULL;
661 669 bool listclean;
662 670 PyObject *emptyTup = NULL, *ret = NULL;
663 671 PyObject *es;
664 672 int sneedle = 0, oneedle = 0;
665 673 if (!PyArg_ParseTuple(args, "O!|O", &lazymanifestType, &other, &pyclean)) {
666 674 return NULL;
667 675 }
668 676 listclean = (!pyclean) ? false : PyObject_IsTrue(pyclean);
669 677 es = PyString_FromString("");
670 678 if (!es) {
671 679 goto nomem;
672 680 }
673 681 emptyTup = PyTuple_Pack(2, Py_None, es);
674 682 Py_DECREF(es);
675 683 if (!emptyTup) {
676 684 goto nomem;
677 685 }
678 686 ret = PyDict_New();
679 687 if (!ret) {
680 688 goto nomem;
681 689 }
682 690 while (sneedle != self->numlines || oneedle != other->numlines) {
683 691 line *left = self->lines + sneedle;
684 692 line *right = other->lines + oneedle;
685 693 int result;
686 694 PyObject *key;
687 695 PyObject *outer;
688 696 /* If we're looking at a deleted entry and it's not
689 697 * the end of the manifest, just skip it. */
690 698 if (left->deleted && sneedle < self->numlines) {
691 699 sneedle++;
692 700 continue;
693 701 }
694 702 if (right->deleted && oneedle < other->numlines) {
695 703 oneedle++;
696 704 continue;
697 705 }
698 706 /* if we're at the end of either manifest, then we
699 707 * know the remaining items are adds so we can skip
700 708 * the strcmp. */
701 709 if (sneedle == self->numlines) {
702 710 result = 1;
703 711 } else if (oneedle == other->numlines) {
704 712 result = -1;
705 713 } else {
706 714 result = linecmp(left, right);
707 715 }
708 716 key = result <= 0 ?
709 717 PyString_FromString(left->start) :
710 718 PyString_FromString(right->start);
711 719 if (!key)
712 720 goto nomem;
713 721 if (result < 0) {
714 722 PyObject *l = hashflags(left);
715 723 if (!l) {
716 724 goto nomem;
717 725 }
718 726 outer = PyTuple_Pack(2, l, emptyTup);
719 727 Py_DECREF(l);
720 728 if (!outer) {
721 729 goto nomem;
722 730 }
723 731 PyDict_SetItem(ret, key, outer);
724 732 Py_DECREF(outer);
725 733 sneedle++;
726 734 } else if (result > 0) {
727 735 PyObject *r = hashflags(right);
728 736 if (!r) {
729 737 goto nomem;
730 738 }
731 739 outer = PyTuple_Pack(2, emptyTup, r);
732 740 Py_DECREF(r);
733 741 if (!outer) {
734 742 goto nomem;
735 743 }
736 744 PyDict_SetItem(ret, key, outer);
737 745 Py_DECREF(outer);
738 746 oneedle++;
739 747 } else {
740 748 /* file exists in both manifests */
741 749 if (left->len != right->len
742 750 || memcmp(left->start, right->start, left->len)
743 751 || left->hash_suffix != right->hash_suffix) {
744 752 PyObject *l = hashflags(left);
745 753 PyObject *r;
746 754 if (!l) {
747 755 goto nomem;
748 756 }
749 757 r = hashflags(right);
750 758 if (!r) {
751 759 Py_DECREF(l);
752 760 goto nomem;
753 761 }
754 762 outer = PyTuple_Pack(2, l, r);
755 763 Py_DECREF(l);
756 764 Py_DECREF(r);
757 765 if (!outer) {
758 766 goto nomem;
759 767 }
760 768 PyDict_SetItem(ret, key, outer);
761 769 Py_DECREF(outer);
762 770 } else if (listclean) {
763 771 PyDict_SetItem(ret, key, Py_None);
764 772 }
765 773 sneedle++;
766 774 oneedle++;
767 775 }
768 776 Py_DECREF(key);
769 777 }
770 778 Py_DECREF(emptyTup);
771 779 return ret;
772 780 nomem:
773 781 PyErr_NoMemory();
774 782 Py_XDECREF(ret);
775 783 Py_XDECREF(emptyTup);
776 784 return NULL;
777 785 }
778 786
779 787 static PyMethodDef lazymanifest_methods[] = {
780 788 {"copy", (PyCFunction)lazymanifest_copy, METH_NOARGS,
781 789 "Make a copy of this lazymanifest."},
782 790 {"filtercopy", (PyCFunction)lazymanifest_filtercopy, METH_O,
783 791 "Make a copy of this manifest filtered by matchfn."},
784 792 {"diff", (PyCFunction)lazymanifest_diff, METH_VARARGS,
785 793 "Compare this lazymanifest to another one."},
786 794 {"text", (PyCFunction)lazymanifest_text, METH_NOARGS,
787 795 "Encode this manifest to text."},
788 796 {NULL},
789 797 };
790 798
791 799 static PyTypeObject lazymanifestType = {
792 800 PyObject_HEAD_INIT(NULL)
793 801 0, /* ob_size */
794 802 "parsers.lazymanifest", /* tp_name */
795 803 sizeof(lazymanifest), /* tp_basicsize */
796 804 0, /* tp_itemsize */
797 805 (destructor)lazymanifest_dealloc, /* tp_dealloc */
798 806 0, /* tp_print */
799 807 0, /* tp_getattr */
800 808 0, /* tp_setattr */
801 809 0, /* tp_compare */
802 810 0, /* tp_repr */
803 811 0, /* tp_as_number */
804 812 &lazymanifest_seq_meths, /* tp_as_sequence */
805 813 &lazymanifest_mapping_methods, /* tp_as_mapping */
806 814 0, /* tp_hash */
807 815 0, /* tp_call */
808 816 0, /* tp_str */
809 817 0, /* tp_getattro */
810 818 0, /* tp_setattro */
811 819 0, /* tp_as_buffer */
812 820 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_SEQUENCE_IN, /* tp_flags */
813 821 "TODO(augie)", /* tp_doc */
814 822 0, /* tp_traverse */
815 823 0, /* tp_clear */
816 824 0, /* tp_richcompare */
817 825 0, /* tp_weaklistoffset */
818 826 (getiterfunc)lazymanifest_getiter, /* tp_iter */
819 827 0, /* tp_iternext */
820 828 lazymanifest_methods, /* tp_methods */
821 829 0, /* tp_members */
822 830 0, /* tp_getset */
823 831 0, /* tp_base */
824 832 0, /* tp_dict */
825 833 0, /* tp_descr_get */
826 834 0, /* tp_descr_set */
827 835 0, /* tp_dictoffset */
828 836 (initproc)lazymanifest_init, /* tp_init */
829 837 0, /* tp_alloc */
830 838 };
831 839
832 840 void manifest_module_init(PyObject * mod)
833 841 {
834 842 lazymanifestType.tp_new = PyType_GenericNew;
835 843 if (PyType_Ready(&lazymanifestType) < 0)
836 844 return;
837 845 Py_INCREF(&lazymanifestType);
838 846
839 847 PyModule_AddObject(mod, "lazymanifest",
840 848 (PyObject *)&lazymanifestType);
841 849 }
@@ -1,228 +1,232 b''
1 1 import binascii
2 2 import unittest
3 3 import itertools
4 4
5 5 import silenttestrunner
6 6
7 7 from mercurial import manifest as manifestmod
8 8
9 9 HASH_1 = '1' * 40
10 10 HASH_2 = 'f' * 40
11 11 HASH_3 = '1234567890abcdef0987654321deadbeef0fcafe'
12 12 A_SHORT_MANIFEST = (
13 13 'bar/baz/qux.py\0%(hash2)s%(flag2)s\n'
14 14 'foo\0%(hash1)s%(flag1)s\n'
15 15 ) % {'hash1': HASH_1,
16 16 'flag1': '',
17 17 'hash2': HASH_2,
18 18 'flag2': 'l',
19 19 }
20 20
21 21 HUGE_MANIFEST_ENTRIES = 200001
22 22
23 23 A_HUGE_MANIFEST = ''.join(sorted(
24 24 'file%d\0%s%s\n' % (i, h, f) for i, h, f in
25 25 itertools.izip(xrange(200001),
26 26 itertools.cycle((HASH_1, HASH_2)),
27 27 itertools.cycle(('', 'x', 'l')))))
28 28
29 29 class testmanifest(unittest.TestCase):
30 30
31 31 def assertIn(self, thing, container, msg=None):
32 32 # assertIn new in 2.7, use it if available, otherwise polyfill
33 33 sup = getattr(unittest.TestCase, 'assertIn', False)
34 34 if sup:
35 35 return sup(self, thing, container, msg=msg)
36 36 if not msg:
37 37 msg = 'Expected %r in %r' % (thing, container)
38 38 self.assert_(thing in container, msg)
39 39
40 40 def testEmptyManifest(self):
41 41 m = manifestmod._lazymanifest('')
42 42 self.assertEqual(0, len(m))
43 43 self.assertEqual([], list(m))
44 44
45 45 def testManifest(self):
46 46 m = manifestmod._lazymanifest(A_SHORT_MANIFEST)
47 47 want = [
48 48 ('bar/baz/qux.py', binascii.unhexlify(HASH_2), 'l'),
49 49 ('foo', binascii.unhexlify(HASH_1), ''),
50 50 ]
51 51 self.assertEqual(len(want), len(m))
52 52 self.assertEqual(want, list(m))
53 53 self.assertEqual((binascii.unhexlify(HASH_1), ''), m['foo'])
54 54 self.assertRaises(KeyError, lambda : m['wat'])
55 55 self.assertEqual((binascii.unhexlify(HASH_2), 'l'),
56 56 m['bar/baz/qux.py'])
57 57
58 58 def testSetItem(self):
59 59 want = binascii.unhexlify(HASH_1), ''
60 60
61 61 m = manifestmod._lazymanifest('')
62 62 m['a'] = want
63 63 self.assertIn('a', m)
64 64 self.assertEqual(want, m['a'])
65 65 self.assertEqual('a\0' + HASH_1 + '\n', m.text())
66 66
67 67 m = manifestmod._lazymanifest(A_SHORT_MANIFEST)
68 68 m['a'] = want
69 69 self.assertEqual(want, m['a'])
70 70 self.assertEqual('a\0' + HASH_1 + '\n' + A_SHORT_MANIFEST,
71 71 m.text())
72 72 m2 = m.copy()
73 73 del m
74 74 del m2 # make sure we don't double free() anything
75 75
76 76 def testCompaction(self):
77 77 unhex = binascii.unhexlify
78 78 h1, h2 = unhex(HASH_1), unhex(HASH_2)
79 79 m = manifestmod._lazymanifest(A_SHORT_MANIFEST)
80 80 m['alpha'] = h1, ''
81 81 m['beta'] = h2, ''
82 82 del m['foo']
83 83 want = 'alpha\0%s\nbar/baz/qux.py\0%sl\nbeta\0%s\n' % (
84 84 HASH_1, HASH_2, HASH_2)
85 85 self.assertEqual(want, m.text())
86 86 self.assertEqual(3, len(m))
87 87 self.assertEqual((h1, ''), m['alpha'])
88 88 self.assertEqual((h2, ''), m['beta'])
89 89 self.assertRaises(KeyError, lambda : m['foo'])
90 90 w = [('alpha', h1, ''), ('bar/baz/qux.py', h2, 'l'), ('beta', h2, '')]
91 91 self.assertEqual(w, list(m))
92 92
93 93 def testSetGetNodeSuffix(self):
94 94 clean = manifestmod._lazymanifest(A_SHORT_MANIFEST)
95 95 m = manifestmod._lazymanifest(A_SHORT_MANIFEST)
96 96 h, f = m['foo']
97 97 want = h + 'a', f
98 98 # Merge code wants to set 21-byte fake hashes at times
99 99 m['foo'] = want
100 100 self.assertEqual(want, m['foo'])
101 101 self.assertEqual([('bar/baz/qux.py', binascii.unhexlify(HASH_2), 'l'),
102 102 ('foo', binascii.unhexlify(HASH_1) + 'a', '')],
103 103 list(m))
104 104 # Sometimes it even tries a 22-byte fake hash, but we can
105 105 # return 21 and it'll work out
106 106 m['foo'] = want[0] + '+', f
107 107 self.assertEqual(want, m['foo'])
108 108 # make sure the suffix survives a copy
109 109 m2 = m.filtercopy(lambda x: x == 'foo')
110 110 self.assertEqual(want, m2['foo'])
111 111 self.assertEqual(1, len(m2))
112 112 self.assertEqual(('foo\0%s\n' % HASH_1), m2.text())
113 113 m2 = m.copy()
114 114 self.assertEqual(want, m2['foo'])
115 115 # suffix with iteration
116 116 self.assertEqual([('bar/baz/qux.py', binascii.unhexlify(HASH_2), 'l'),
117 117 ('foo', want[0], '')], list(m))
118 118 # shows up in diff
119 119 self.assertEqual({'foo': (want, (h, ''))}, m.diff(clean))
120 120 self.assertEqual({'foo': ((h, ''), want)}, clean.diff(m))
121 121
122 122 def testFilterCopyException(self):
123 123 m = manifestmod._lazymanifest(A_SHORT_MANIFEST)
124 124 def filt(path):
125 125 if path == 'foo':
126 126 assert False
127 127 return True
128 128 self.assertRaises(AssertionError, m.filtercopy, filt)
129 129
130 130 def testRemoveItem(self):
131 131 m = manifestmod._lazymanifest(A_SHORT_MANIFEST)
132 132 del m['foo']
133 133 self.assertRaises(KeyError, lambda : m['foo'])
134 134 self.assertEqual(1, len(m))
135 135 self.assertEqual(1, len(list(m)))
136 # now restore and make sure everything works right
137 m['foo'] = 'a' * 20, ''
138 self.assertEqual(2, len(m))
139 self.assertEqual(2, len(list(m)))
136 140
137 141 def testManifestDiff(self):
138 142 MISSING = (None, '')
139 143 addl = 'z-only-in-left\0' + HASH_1 + '\n'
140 144 addr = 'z-only-in-right\0' + HASH_2 + 'x\n'
141 145 left = manifestmod._lazymanifest(
142 146 A_SHORT_MANIFEST.replace(HASH_1, HASH_3 + 'x') + addl)
143 147 right = manifestmod._lazymanifest(A_SHORT_MANIFEST + addr)
144 148 want = {
145 149 'foo': ((binascii.unhexlify(HASH_3), 'x'),
146 150 (binascii.unhexlify(HASH_1), '')),
147 151 'z-only-in-left': ((binascii.unhexlify(HASH_1), ''), MISSING),
148 152 'z-only-in-right': (MISSING, (binascii.unhexlify(HASH_2), 'x')),
149 153 }
150 154 self.assertEqual(want, left.diff(right))
151 155
152 156 want = {
153 157 'bar/baz/qux.py': (MISSING, (binascii.unhexlify(HASH_2), 'l')),
154 158 'foo': (MISSING, (binascii.unhexlify(HASH_3), 'x')),
155 159 'z-only-in-left': (MISSING, (binascii.unhexlify(HASH_1), '')),
156 160 }
157 161 self.assertEqual(want, manifestmod._lazymanifest('').diff(left))
158 162
159 163 want = {
160 164 'bar/baz/qux.py': ((binascii.unhexlify(HASH_2), 'l'), MISSING),
161 165 'foo': ((binascii.unhexlify(HASH_3), 'x'), MISSING),
162 166 'z-only-in-left': ((binascii.unhexlify(HASH_1), ''), MISSING),
163 167 }
164 168 self.assertEqual(want, left.diff(manifestmod._lazymanifest('')))
165 169 copy = right.copy()
166 170 del copy['z-only-in-right']
167 171 del right['foo']
168 172 want = {
169 173 'foo': (MISSING, (binascii.unhexlify(HASH_1), '')),
170 174 'z-only-in-right': ((binascii.unhexlify(HASH_2), 'x'), MISSING),
171 175 }
172 176 self.assertEqual(want, right.diff(copy))
173 177
174 178 short = manifestmod._lazymanifest(A_SHORT_MANIFEST)
175 179 pruned = short.copy()
176 180 del pruned['foo']
177 181 want = {
178 182 'foo': ((binascii.unhexlify(HASH_1), ''), MISSING),
179 183 }
180 184 self.assertEqual(want, short.diff(pruned))
181 185 want = {
182 186 'foo': (MISSING, (binascii.unhexlify(HASH_1), '')),
183 187 }
184 188 self.assertEqual(want, pruned.diff(short))
185 189 want = {
186 190 'bar/baz/qux.py': None,
187 191 'foo': (MISSING, (binascii.unhexlify(HASH_1), '')),
188 192 }
189 193 self.assertEqual(want, pruned.diff(short, True))
190 194
191 195 def testReversedLines(self):
192 196 backwards = ''.join(
193 197 l + '\n' for l in reversed(A_SHORT_MANIFEST.split('\n')) if l)
194 198 try:
195 199 manifestmod._lazymanifest(backwards)
196 200 self.fail('Should have raised ValueError')
197 201 except ValueError, v:
198 202 self.assertIn('Manifest lines not in sorted order.', str(v))
199 203
200 204 def testNoTerminalNewline(self):
201 205 try:
202 206 manifestmod._lazymanifest(A_SHORT_MANIFEST + 'wat')
203 207 self.fail('Should have raised ValueError')
204 208 except ValueError, v:
205 209 self.assertIn('Manifest did not end in a newline.', str(v))
206 210
207 211 def testNoNewLineAtAll(self):
208 212 try:
209 213 manifestmod._lazymanifest('wat')
210 214 self.fail('Should have raised ValueError')
211 215 except ValueError, v:
212 216 self.assertIn('Manifest did not end in a newline.', str(v))
213 217
214 218 def testHugeManifest(self):
215 219 m = manifestmod._lazymanifest(A_HUGE_MANIFEST)
216 220 self.assertEqual(HUGE_MANIFEST_ENTRIES, len(m))
217 221 self.assertEqual(len(m), len(list(m)))
218 222
219 223 def testIntersectFiles(self):
220 224 m = manifestmod.manifestdict(A_HUGE_MANIFEST)
221 225 m2 = m.intersectfiles(['file1', 'file200', 'file300'])
222 226 w = ('file1\0%sx\n'
223 227 'file200\0%sl\n'
224 228 'file300\0%s\n') % (HASH_2, HASH_1, HASH_1)
225 229 self.assertEqual(w, m2.text())
226 230
227 231 if __name__ == '__main__':
228 232 silenttestrunner.main(__name__)
General Comments 0
You need to be logged in to leave comments. Login now