##// END OF EJS Templates
manifest: fix out-of-bounds read of corrupted manifest entry...
Yuya Nishihara -
r40279:5405cb1a 4.7.2 stable
parent child Browse files
Show More
@@ -1,952 +1,960 b''
1 1 /*
2 2 * manifest.c - manifest type that does on-demand parsing.
3 3 *
4 4 * Copyright 2015, Google Inc.
5 5 *
6 6 * This software may be used and distributed according to the terms of
7 7 * the GNU General Public License, incorporated herein by reference.
8 8 */
9 9 #include <Python.h>
10 10
11 11 #include <assert.h>
12 12 #include <stdlib.h>
13 13 #include <string.h>
14 14
15 15 #include "charencode.h"
16 16 #include "util.h"
17 17
18 18 #define DEFAULT_LINES 100000
19 19
20 20 typedef struct {
21 21 char *start;
22 22 Py_ssize_t len; /* length of line including terminal newline */
23 23 char hash_suffix;
24 24 bool from_malloc;
25 25 bool deleted;
26 26 } line;
27 27
28 28 typedef struct {
29 29 PyObject_HEAD
30 30 PyObject *pydata;
31 31 line *lines;
32 32 int numlines; /* number of line entries */
33 33 int livelines; /* number of non-deleted lines */
34 34 int maxlines; /* allocated number of lines */
35 35 bool dirty;
36 36 } lazymanifest;
37 37
38 38 #define MANIFEST_OOM -1
39 39 #define MANIFEST_NOT_SORTED -2
40 40 #define MANIFEST_MALFORMED -3
41 41
42 42 /* get the length of the path for a line */
43 43 static size_t pathlen(line *l)
44 44 {
45 45 const char *end = memchr(l->start, '\0', l->len);
46 46 return (end) ? (size_t)(end - l->start) : l->len;
47 47 }
48 48
49 49 /* get the node value of a single line */
50 50 static PyObject *nodeof(line *l)
51 51 {
52 52 char *s = l->start;
53 53 ssize_t llen = pathlen(l);
54 PyObject *hash = unhexlify(s + llen + 1, 40);
54 PyObject *hash;
55 if (llen + 1 + 40 + 1 > l->len) { /* path '\0' hash '\n' */
56 PyErr_SetString(PyExc_ValueError, "manifest line too short");
57 return NULL;
58 }
59 hash = unhexlify(s + llen + 1, 40);
55 60 if (!hash) {
56 61 return NULL;
57 62 }
58 63 if (l->hash_suffix != '\0') {
59 64 char newhash[21];
60 65 memcpy(newhash, PyBytes_AsString(hash), 20);
61 66 Py_DECREF(hash);
62 67 newhash[20] = l->hash_suffix;
63 68 hash = PyBytes_FromStringAndSize(newhash, 21);
64 69 }
65 70 return hash;
66 71 }
67 72
68 73 /* get the node hash and flags of a line as a tuple */
69 74 static PyObject *hashflags(line *l)
70 75 {
71 76 char *s = l->start;
72 77 size_t plen = pathlen(l);
73 78 PyObject *hash = nodeof(l);
74 79
75 80 /* 40 for hash, 1 for null byte, 1 for newline */
76 81 size_t hplen = plen + 42;
77 82 Py_ssize_t flen = l->len - hplen;
78 83 PyObject *flags;
79 84 PyObject *tup;
80 85
81 86 if (!hash)
82 87 return NULL;
83 88 flags = PyBytes_FromStringAndSize(s + hplen - 1, flen);
84 89 if (!flags) {
85 90 Py_DECREF(hash);
86 91 return NULL;
87 92 }
88 93 tup = PyTuple_Pack(2, hash, flags);
89 94 Py_DECREF(flags);
90 95 Py_DECREF(hash);
91 96 return tup;
92 97 }
93 98
94 99 /* if we're about to run out of space in the line index, add more */
95 100 static bool realloc_if_full(lazymanifest *self)
96 101 {
97 102 if (self->numlines == self->maxlines) {
98 103 self->maxlines *= 2;
99 104 self->lines = realloc(self->lines, self->maxlines * sizeof(line));
100 105 }
101 106 return !!self->lines;
102 107 }
103 108
104 109 /*
105 110 * Find the line boundaries in the manifest that 'data' points to and store
106 111 * information about each line in 'self'.
107 112 */
108 113 static int find_lines(lazymanifest *self, char *data, Py_ssize_t len)
109 114 {
110 115 char *prev = NULL;
111 116 while (len > 0) {
112 117 line *l;
113 118 char *next = memchr(data, '\n', len);
114 119 if (!next) {
115 120 return MANIFEST_MALFORMED;
116 121 }
117 122 next++; /* advance past newline */
118 123 if (!realloc_if_full(self)) {
119 124 return MANIFEST_OOM; /* no memory */
120 125 }
121 126 if (prev && strcmp(prev, data) > -1) {
122 127 /* This data isn't sorted, so we have to abort. */
123 128 return MANIFEST_NOT_SORTED;
124 129 }
125 130 l = self->lines + ((self->numlines)++);
126 131 l->start = data;
127 132 l->len = next - data;
128 133 l->hash_suffix = '\0';
129 134 l->from_malloc = false;
130 135 l->deleted = false;
131 136 len = len - l->len;
132 137 prev = data;
133 138 data = next;
134 139 }
135 140 self->livelines = self->numlines;
136 141 return 0;
137 142 }
138 143
139 144 static void lazymanifest_init_early(lazymanifest *self)
140 145 {
141 146 self->pydata = NULL;
142 147 self->lines = NULL;
143 148 self->numlines = 0;
144 149 self->maxlines = 0;
145 150 }
146 151
147 152 static int lazymanifest_init(lazymanifest *self, PyObject *args)
148 153 {
149 154 char *data;
150 155 Py_ssize_t len;
151 156 int err, ret;
152 157 PyObject *pydata;
153 158
154 159 lazymanifest_init_early(self);
155 160 if (!PyArg_ParseTuple(args, "S", &pydata)) {
156 161 return -1;
157 162 }
158 163 err = PyBytes_AsStringAndSize(pydata, &data, &len);
159 164
160 165 self->dirty = false;
161 166 if (err == -1)
162 167 return -1;
163 168 self->pydata = pydata;
164 169 Py_INCREF(self->pydata);
165 170 Py_BEGIN_ALLOW_THREADS
166 171 self->lines = malloc(DEFAULT_LINES * sizeof(line));
167 172 self->maxlines = DEFAULT_LINES;
168 173 self->numlines = 0;
169 174 if (!self->lines)
170 175 ret = MANIFEST_OOM;
171 176 else
172 177 ret = find_lines(self, data, len);
173 178 Py_END_ALLOW_THREADS
174 179 switch (ret) {
175 180 case 0:
176 181 break;
177 182 case MANIFEST_OOM:
178 183 PyErr_NoMemory();
179 184 break;
180 185 case MANIFEST_NOT_SORTED:
181 186 PyErr_Format(PyExc_ValueError,
182 187 "Manifest lines not in sorted order.");
183 188 break;
184 189 case MANIFEST_MALFORMED:
185 190 PyErr_Format(PyExc_ValueError,
186 191 "Manifest did not end in a newline.");
187 192 break;
188 193 default:
189 194 PyErr_Format(PyExc_ValueError,
190 195 "Unknown problem parsing manifest.");
191 196 }
192 197 return ret == 0 ? 0 : -1;
193 198 }
194 199
195 200 static void lazymanifest_dealloc(lazymanifest *self)
196 201 {
197 202 /* free any extra lines we had to allocate */
198 203 int i;
199 204 for (i = 0; self->lines && (i < self->numlines); i++) {
200 205 if (self->lines[i].from_malloc) {
201 206 free(self->lines[i].start);
202 207 }
203 208 }
204 209 free(self->lines);
205 210 self->lines = NULL;
206 211 if (self->pydata) {
207 212 Py_DECREF(self->pydata);
208 213 self->pydata = NULL;
209 214 }
210 215 PyObject_Del(self);
211 216 }
212 217
213 218 /* iteration support */
214 219
215 220 typedef struct {
216 221 PyObject_HEAD lazymanifest *m;
217 222 Py_ssize_t pos;
218 223 } lmIter;
219 224
220 225 static void lmiter_dealloc(PyObject *o)
221 226 {
222 227 lmIter *self = (lmIter *)o;
223 228 Py_DECREF(self->m);
224 229 PyObject_Del(self);
225 230 }
226 231
227 232 static line *lmiter_nextline(lmIter *self)
228 233 {
229 234 do {
230 235 self->pos++;
231 236 if (self->pos >= self->m->numlines) {
232 237 return NULL;
233 238 }
234 239 /* skip over deleted manifest entries */
235 240 } while (self->m->lines[self->pos].deleted);
236 241 return self->m->lines + self->pos;
237 242 }
238 243
239 244 static PyObject *lmiter_iterentriesnext(PyObject *o)
240 245 {
241 246 size_t pl;
242 247 line *l;
243 248 Py_ssize_t consumed;
244 249 PyObject *ret = NULL, *path = NULL, *hash = NULL, *flags = NULL;
245 250 l = lmiter_nextline((lmIter *)o);
246 251 if (!l) {
247 252 goto done;
248 253 }
249 254 pl = pathlen(l);
250 255 path = PyBytes_FromStringAndSize(l->start, pl);
251 256 hash = nodeof(l);
257 if (!path || !hash) {
258 goto done;
259 }
252 260 consumed = pl + 41;
253 261 flags = PyBytes_FromStringAndSize(l->start + consumed,
254 262 l->len - consumed - 1);
255 if (!path || !hash || !flags) {
263 if (!flags) {
256 264 goto done;
257 265 }
258 266 ret = PyTuple_Pack(3, path, hash, flags);
259 267 done:
260 268 Py_XDECREF(path);
261 269 Py_XDECREF(hash);
262 270 Py_XDECREF(flags);
263 271 return ret;
264 272 }
265 273
266 274 #ifdef IS_PY3K
267 275 #define LAZYMANIFESTENTRIESITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT
268 276 #else
269 277 #define LAZYMANIFESTENTRIESITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT \
270 278 | Py_TPFLAGS_HAVE_ITER
271 279 #endif
272 280
273 281 static PyTypeObject lazymanifestEntriesIterator = {
274 282 PyVarObject_HEAD_INIT(NULL, 0) /* header */
275 283 "parsers.lazymanifest.entriesiterator", /*tp_name */
276 284 sizeof(lmIter), /*tp_basicsize */
277 285 0, /*tp_itemsize */
278 286 lmiter_dealloc, /*tp_dealloc */
279 287 0, /*tp_print */
280 288 0, /*tp_getattr */
281 289 0, /*tp_setattr */
282 290 0, /*tp_compare */
283 291 0, /*tp_repr */
284 292 0, /*tp_as_number */
285 293 0, /*tp_as_sequence */
286 294 0, /*tp_as_mapping */
287 295 0, /*tp_hash */
288 296 0, /*tp_call */
289 297 0, /*tp_str */
290 298 0, /*tp_getattro */
291 299 0, /*tp_setattro */
292 300 0, /*tp_as_buffer */
293 301 LAZYMANIFESTENTRIESITERATOR_TPFLAGS, /* tp_flags */
294 302 "Iterator for 3-tuples in a lazymanifest.", /* tp_doc */
295 303 0, /* tp_traverse */
296 304 0, /* tp_clear */
297 305 0, /* tp_richcompare */
298 306 0, /* tp_weaklistoffset */
299 307 PyObject_SelfIter, /* tp_iter: __iter__() method */
300 308 lmiter_iterentriesnext, /* tp_iternext: next() method */
301 309 };
302 310
303 311 static PyObject *lmiter_iterkeysnext(PyObject *o)
304 312 {
305 313 size_t pl;
306 314 line *l = lmiter_nextline((lmIter *)o);
307 315 if (!l) {
308 316 return NULL;
309 317 }
310 318 pl = pathlen(l);
311 319 return PyBytes_FromStringAndSize(l->start, pl);
312 320 }
313 321
314 322 #ifdef IS_PY3K
315 323 #define LAZYMANIFESTKEYSITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT
316 324 #else
317 325 #define LAZYMANIFESTKEYSITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT \
318 326 | Py_TPFLAGS_HAVE_ITER
319 327 #endif
320 328
321 329 static PyTypeObject lazymanifestKeysIterator = {
322 330 PyVarObject_HEAD_INIT(NULL, 0) /* header */
323 331 "parsers.lazymanifest.keysiterator", /*tp_name */
324 332 sizeof(lmIter), /*tp_basicsize */
325 333 0, /*tp_itemsize */
326 334 lmiter_dealloc, /*tp_dealloc */
327 335 0, /*tp_print */
328 336 0, /*tp_getattr */
329 337 0, /*tp_setattr */
330 338 0, /*tp_compare */
331 339 0, /*tp_repr */
332 340 0, /*tp_as_number */
333 341 0, /*tp_as_sequence */
334 342 0, /*tp_as_mapping */
335 343 0, /*tp_hash */
336 344 0, /*tp_call */
337 345 0, /*tp_str */
338 346 0, /*tp_getattro */
339 347 0, /*tp_setattro */
340 348 0, /*tp_as_buffer */
341 349 LAZYMANIFESTKEYSITERATOR_TPFLAGS, /* tp_flags */
342 350 "Keys iterator for a lazymanifest.", /* tp_doc */
343 351 0, /* tp_traverse */
344 352 0, /* tp_clear */
345 353 0, /* tp_richcompare */
346 354 0, /* tp_weaklistoffset */
347 355 PyObject_SelfIter, /* tp_iter: __iter__() method */
348 356 lmiter_iterkeysnext, /* tp_iternext: next() method */
349 357 };
350 358
351 359 static lazymanifest *lazymanifest_copy(lazymanifest *self);
352 360
353 361 static PyObject *lazymanifest_getentriesiter(lazymanifest *self)
354 362 {
355 363 lmIter *i = NULL;
356 364 lazymanifest *t = lazymanifest_copy(self);
357 365 if (!t) {
358 366 PyErr_NoMemory();
359 367 return NULL;
360 368 }
361 369 i = PyObject_New(lmIter, &lazymanifestEntriesIterator);
362 370 if (i) {
363 371 i->m = t;
364 372 i->pos = -1;
365 373 } else {
366 374 Py_DECREF(t);
367 375 PyErr_NoMemory();
368 376 }
369 377 return (PyObject *)i;
370 378 }
371 379
372 380 static PyObject *lazymanifest_getkeysiter(lazymanifest *self)
373 381 {
374 382 lmIter *i = NULL;
375 383 lazymanifest *t = lazymanifest_copy(self);
376 384 if (!t) {
377 385 PyErr_NoMemory();
378 386 return NULL;
379 387 }
380 388 i = PyObject_New(lmIter, &lazymanifestKeysIterator);
381 389 if (i) {
382 390 i->m = t;
383 391 i->pos = -1;
384 392 } else {
385 393 Py_DECREF(t);
386 394 PyErr_NoMemory();
387 395 }
388 396 return (PyObject *)i;
389 397 }
390 398
391 399 /* __getitem__ and __setitem__ support */
392 400
393 401 static Py_ssize_t lazymanifest_size(lazymanifest *self)
394 402 {
395 403 return self->livelines;
396 404 }
397 405
398 406 static int linecmp(const void *left, const void *right)
399 407 {
400 408 return strcmp(((const line *)left)->start,
401 409 ((const line *)right)->start);
402 410 }
403 411
404 412 static PyObject *lazymanifest_getitem(lazymanifest *self, PyObject *key)
405 413 {
406 414 line needle;
407 415 line *hit;
408 416 if (!PyBytes_Check(key)) {
409 417 PyErr_Format(PyExc_TypeError,
410 418 "getitem: manifest keys must be a string.");
411 419 return NULL;
412 420 }
413 421 needle.start = PyBytes_AsString(key);
414 422 hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
415 423 &linecmp);
416 424 if (!hit || hit->deleted) {
417 425 PyErr_Format(PyExc_KeyError, "No such manifest entry.");
418 426 return NULL;
419 427 }
420 428 return hashflags(hit);
421 429 }
422 430
423 431 static int lazymanifest_delitem(lazymanifest *self, PyObject *key)
424 432 {
425 433 line needle;
426 434 line *hit;
427 435 if (!PyBytes_Check(key)) {
428 436 PyErr_Format(PyExc_TypeError,
429 437 "delitem: manifest keys must be a string.");
430 438 return -1;
431 439 }
432 440 needle.start = PyBytes_AsString(key);
433 441 hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
434 442 &linecmp);
435 443 if (!hit || hit->deleted) {
436 444 PyErr_Format(PyExc_KeyError,
437 445 "Tried to delete nonexistent manifest entry.");
438 446 return -1;
439 447 }
440 448 self->dirty = true;
441 449 hit->deleted = true;
442 450 self->livelines--;
443 451 return 0;
444 452 }
445 453
446 454 /* Do a binary search for the insertion point for new, creating the
447 455 * new entry if needed. */
448 456 static int internalsetitem(lazymanifest *self, line *new)
449 457 {
450 458 int start = 0, end = self->numlines;
451 459 while (start < end) {
452 460 int pos = start + (end - start) / 2;
453 461 int c = linecmp(new, self->lines + pos);
454 462 if (c < 0)
455 463 end = pos;
456 464 else if (c > 0)
457 465 start = pos + 1;
458 466 else {
459 467 if (self->lines[pos].deleted)
460 468 self->livelines++;
461 469 if (self->lines[pos].from_malloc)
462 470 free(self->lines[pos].start);
463 471 start = pos;
464 472 goto finish;
465 473 }
466 474 }
467 475 /* being here means we need to do an insert */
468 476 if (!realloc_if_full(self)) {
469 477 PyErr_NoMemory();
470 478 return -1;
471 479 }
472 480 memmove(self->lines + start + 1, self->lines + start,
473 481 (self->numlines - start) * sizeof(line));
474 482 self->numlines++;
475 483 self->livelines++;
476 484 finish:
477 485 self->lines[start] = *new;
478 486 self->dirty = true;
479 487 return 0;
480 488 }
481 489
482 490 static int lazymanifest_setitem(
483 491 lazymanifest *self, PyObject *key, PyObject *value)
484 492 {
485 493 char *path;
486 494 Py_ssize_t plen;
487 495 PyObject *pyhash;
488 496 Py_ssize_t hlen;
489 497 char *hash;
490 498 PyObject *pyflags;
491 499 char *flags;
492 500 Py_ssize_t flen;
493 501 size_t dlen;
494 502 char *dest;
495 503 int i;
496 504 line new;
497 505 if (!PyBytes_Check(key)) {
498 506 PyErr_Format(PyExc_TypeError,
499 507 "setitem: manifest keys must be a string.");
500 508 return -1;
501 509 }
502 510 if (!value) {
503 511 return lazymanifest_delitem(self, key);
504 512 }
505 513 if (!PyTuple_Check(value) || PyTuple_Size(value) != 2) {
506 514 PyErr_Format(PyExc_TypeError,
507 515 "Manifest values must be a tuple of (node, flags).");
508 516 return -1;
509 517 }
510 518 if (PyBytes_AsStringAndSize(key, &path, &plen) == -1) {
511 519 return -1;
512 520 }
513 521
514 522 pyhash = PyTuple_GetItem(value, 0);
515 523 if (!PyBytes_Check(pyhash)) {
516 524 PyErr_Format(PyExc_TypeError,
517 525 "node must be a 20-byte string");
518 526 return -1;
519 527 }
520 528 hlen = PyBytes_Size(pyhash);
521 529 /* Some parts of the codebase try and set 21 or 22
522 530 * byte "hash" values in order to perturb things for
523 531 * status. We have to preserve at least the 21st
524 532 * byte. Sigh. If there's a 22nd byte, we drop it on
525 533 * the floor, which works fine.
526 534 */
527 535 if (hlen != 20 && hlen != 21 && hlen != 22) {
528 536 PyErr_Format(PyExc_TypeError,
529 537 "node must be a 20-byte string");
530 538 return -1;
531 539 }
532 540 hash = PyBytes_AsString(pyhash);
533 541
534 542 pyflags = PyTuple_GetItem(value, 1);
535 543 if (!PyBytes_Check(pyflags) || PyBytes_Size(pyflags) > 1) {
536 544 PyErr_Format(PyExc_TypeError,
537 545 "flags must a 0 or 1 byte string");
538 546 return -1;
539 547 }
540 548 if (PyBytes_AsStringAndSize(pyflags, &flags, &flen) == -1) {
541 549 return -1;
542 550 }
543 551 /* one null byte and one newline */
544 552 dlen = plen + 41 + flen + 1;
545 553 dest = malloc(dlen);
546 554 if (!dest) {
547 555 PyErr_NoMemory();
548 556 return -1;
549 557 }
550 558 memcpy(dest, path, plen + 1);
551 559 for (i = 0; i < 20; i++) {
552 560 /* Cast to unsigned, so it will not get sign-extended when promoted
553 561 * to int (as is done when passing to a variadic function)
554 562 */
555 563 sprintf(dest + plen + 1 + (i * 2), "%02x", (unsigned char)hash[i]);
556 564 }
557 565 memcpy(dest + plen + 41, flags, flen);
558 566 dest[plen + 41 + flen] = '\n';
559 567 new.start = dest;
560 568 new.len = dlen;
561 569 new.hash_suffix = '\0';
562 570 if (hlen > 20) {
563 571 new.hash_suffix = hash[20];
564 572 }
565 573 new.from_malloc = true; /* is `start` a pointer we allocated? */
566 574 new.deleted = false; /* is this entry deleted? */
567 575 if (internalsetitem(self, &new)) {
568 576 return -1;
569 577 }
570 578 return 0;
571 579 }
572 580
573 581 static PyMappingMethods lazymanifest_mapping_methods = {
574 582 (lenfunc)lazymanifest_size, /* mp_length */
575 583 (binaryfunc)lazymanifest_getitem, /* mp_subscript */
576 584 (objobjargproc)lazymanifest_setitem, /* mp_ass_subscript */
577 585 };
578 586
579 587 /* sequence methods (important or __contains__ builds an iterator) */
580 588
581 589 static int lazymanifest_contains(lazymanifest *self, PyObject *key)
582 590 {
583 591 line needle;
584 592 line *hit;
585 593 if (!PyBytes_Check(key)) {
586 594 /* Our keys are always strings, so if the contains
587 595 * check is for a non-string, just return false. */
588 596 return 0;
589 597 }
590 598 needle.start = PyBytes_AsString(key);
591 599 hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
592 600 &linecmp);
593 601 if (!hit || hit->deleted) {
594 602 return 0;
595 603 }
596 604 return 1;
597 605 }
598 606
599 607 static PySequenceMethods lazymanifest_seq_meths = {
600 608 (lenfunc)lazymanifest_size, /* sq_length */
601 609 0, /* sq_concat */
602 610 0, /* sq_repeat */
603 611 0, /* sq_item */
604 612 0, /* sq_slice */
605 613 0, /* sq_ass_item */
606 614 0, /* sq_ass_slice */
607 615 (objobjproc)lazymanifest_contains, /* sq_contains */
608 616 0, /* sq_inplace_concat */
609 617 0, /* sq_inplace_repeat */
610 618 };
611 619
612 620
613 621 /* Other methods (copy, diff, etc) */
614 622 static PyTypeObject lazymanifestType;
615 623
616 624 /* If the manifest has changes, build the new manifest text and reindex it. */
617 625 static int compact(lazymanifest *self)
618 626 {
619 627 int i;
620 628 ssize_t need = 0;
621 629 char *data;
622 630 line *src, *dst;
623 631 PyObject *pydata;
624 632 if (!self->dirty)
625 633 return 0;
626 634 for (i = 0; i < self->numlines; i++) {
627 635 if (!self->lines[i].deleted) {
628 636 need += self->lines[i].len;
629 637 }
630 638 }
631 639 pydata = PyBytes_FromStringAndSize(NULL, need);
632 640 if (!pydata)
633 641 return -1;
634 642 data = PyBytes_AsString(pydata);
635 643 if (!data) {
636 644 return -1;
637 645 }
638 646 src = self->lines;
639 647 dst = self->lines;
640 648 for (i = 0; i < self->numlines; i++, src++) {
641 649 char *tofree = NULL;
642 650 if (src->from_malloc) {
643 651 tofree = src->start;
644 652 }
645 653 if (!src->deleted) {
646 654 memcpy(data, src->start, src->len);
647 655 *dst = *src;
648 656 dst->start = data;
649 657 dst->from_malloc = false;
650 658 data += dst->len;
651 659 dst++;
652 660 }
653 661 free(tofree);
654 662 }
655 663 Py_DECREF(self->pydata);
656 664 self->pydata = pydata;
657 665 self->numlines = self->livelines;
658 666 self->dirty = false;
659 667 return 0;
660 668 }
661 669
662 670 static PyObject *lazymanifest_text(lazymanifest *self)
663 671 {
664 672 if (compact(self) != 0) {
665 673 PyErr_NoMemory();
666 674 return NULL;
667 675 }
668 676 Py_INCREF(self->pydata);
669 677 return self->pydata;
670 678 }
671 679
672 680 static lazymanifest *lazymanifest_copy(lazymanifest *self)
673 681 {
674 682 lazymanifest *copy = NULL;
675 683 if (compact(self) != 0) {
676 684 goto nomem;
677 685 }
678 686 copy = PyObject_New(lazymanifest, &lazymanifestType);
679 687 if (!copy) {
680 688 goto nomem;
681 689 }
682 690 lazymanifest_init_early(copy);
683 691 copy->numlines = self->numlines;
684 692 copy->livelines = self->livelines;
685 693 copy->dirty = false;
686 694 copy->lines = malloc(self->maxlines *sizeof(line));
687 695 if (!copy->lines) {
688 696 goto nomem;
689 697 }
690 698 memcpy(copy->lines, self->lines, self->numlines * sizeof(line));
691 699 copy->maxlines = self->maxlines;
692 700 copy->pydata = self->pydata;
693 701 Py_INCREF(copy->pydata);
694 702 return copy;
695 703 nomem:
696 704 PyErr_NoMemory();
697 705 Py_XDECREF(copy);
698 706 return NULL;
699 707 }
700 708
701 709 static lazymanifest *lazymanifest_filtercopy(
702 710 lazymanifest *self, PyObject *matchfn)
703 711 {
704 712 lazymanifest *copy = NULL;
705 713 int i;
706 714 if (!PyCallable_Check(matchfn)) {
707 715 PyErr_SetString(PyExc_TypeError, "matchfn must be callable");
708 716 return NULL;
709 717 }
710 718 /* compact ourselves first to avoid double-frees later when we
711 719 * compact tmp so that it doesn't have random pointers to our
712 720 * underlying from_malloc-data (self->pydata is safe) */
713 721 if (compact(self) != 0) {
714 722 goto nomem;
715 723 }
716 724 copy = PyObject_New(lazymanifest, &lazymanifestType);
717 725 if (!copy) {
718 726 goto nomem;
719 727 }
720 728 lazymanifest_init_early(copy);
721 729 copy->dirty = true;
722 730 copy->lines = malloc(self->maxlines * sizeof(line));
723 731 if (!copy->lines) {
724 732 goto nomem;
725 733 }
726 734 copy->maxlines = self->maxlines;
727 735 copy->numlines = 0;
728 736 copy->pydata = self->pydata;
729 737 Py_INCREF(copy->pydata);
730 738 for (i = 0; i < self->numlines; i++) {
731 739 PyObject *arglist = NULL, *result = NULL;
732 740 arglist = Py_BuildValue(PY23("(s)", "(y)"),
733 741 self->lines[i].start);
734 742 if (!arglist) {
735 743 goto bail;
736 744 }
737 745 result = PyObject_CallObject(matchfn, arglist);
738 746 Py_DECREF(arglist);
739 747 /* if the callback raised an exception, just let it
740 748 * through and give up */
741 749 if (!result) {
742 750 goto bail;
743 751 }
744 752 if (PyObject_IsTrue(result)) {
745 753 assert(!(self->lines[i].from_malloc));
746 754 copy->lines[copy->numlines++] = self->lines[i];
747 755 }
748 756 Py_DECREF(result);
749 757 }
750 758 copy->livelines = copy->numlines;
751 759 return copy;
752 760 nomem:
753 761 PyErr_NoMemory();
754 762 bail:
755 763 Py_XDECREF(copy);
756 764 return NULL;
757 765 }
758 766
759 767 static PyObject *lazymanifest_diff(lazymanifest *self, PyObject *args)
760 768 {
761 769 lazymanifest *other;
762 770 PyObject *pyclean = NULL;
763 771 bool listclean;
764 772 PyObject *emptyTup = NULL, *ret = NULL;
765 773 PyObject *es;
766 774 int sneedle = 0, oneedle = 0;
767 775 if (!PyArg_ParseTuple(args, "O!|O", &lazymanifestType, &other, &pyclean)) {
768 776 return NULL;
769 777 }
770 778 listclean = (!pyclean) ? false : PyObject_IsTrue(pyclean);
771 779 es = PyBytes_FromString("");
772 780 if (!es) {
773 781 goto nomem;
774 782 }
775 783 emptyTup = PyTuple_Pack(2, Py_None, es);
776 784 Py_DECREF(es);
777 785 if (!emptyTup) {
778 786 goto nomem;
779 787 }
780 788 ret = PyDict_New();
781 789 if (!ret) {
782 790 goto nomem;
783 791 }
784 792 while (sneedle != self->numlines || oneedle != other->numlines) {
785 793 line *left = self->lines + sneedle;
786 794 line *right = other->lines + oneedle;
787 795 int result;
788 796 PyObject *key;
789 797 PyObject *outer;
790 798 /* If we're looking at a deleted entry and it's not
791 799 * the end of the manifest, just skip it. */
792 800 if (sneedle < self->numlines && left->deleted) {
793 801 sneedle++;
794 802 continue;
795 803 }
796 804 if (oneedle < other->numlines && right->deleted) {
797 805 oneedle++;
798 806 continue;
799 807 }
800 808 /* if we're at the end of either manifest, then we
801 809 * know the remaining items are adds so we can skip
802 810 * the strcmp. */
803 811 if (sneedle == self->numlines) {
804 812 result = 1;
805 813 } else if (oneedle == other->numlines) {
806 814 result = -1;
807 815 } else {
808 816 result = linecmp(left, right);
809 817 }
810 818 key = result <= 0 ?
811 819 PyBytes_FromString(left->start) :
812 820 PyBytes_FromString(right->start);
813 821 if (!key)
814 822 goto nomem;
815 823 if (result < 0) {
816 824 PyObject *l = hashflags(left);
817 825 if (!l) {
818 826 goto nomem;
819 827 }
820 828 outer = PyTuple_Pack(2, l, emptyTup);
821 829 Py_DECREF(l);
822 830 if (!outer) {
823 831 goto nomem;
824 832 }
825 833 PyDict_SetItem(ret, key, outer);
826 834 Py_DECREF(outer);
827 835 sneedle++;
828 836 } else if (result > 0) {
829 837 PyObject *r = hashflags(right);
830 838 if (!r) {
831 839 goto nomem;
832 840 }
833 841 outer = PyTuple_Pack(2, emptyTup, r);
834 842 Py_DECREF(r);
835 843 if (!outer) {
836 844 goto nomem;
837 845 }
838 846 PyDict_SetItem(ret, key, outer);
839 847 Py_DECREF(outer);
840 848 oneedle++;
841 849 } else {
842 850 /* file exists in both manifests */
843 851 if (left->len != right->len
844 852 || memcmp(left->start, right->start, left->len)
845 853 || left->hash_suffix != right->hash_suffix) {
846 854 PyObject *l = hashflags(left);
847 855 PyObject *r;
848 856 if (!l) {
849 857 goto nomem;
850 858 }
851 859 r = hashflags(right);
852 860 if (!r) {
853 861 Py_DECREF(l);
854 862 goto nomem;
855 863 }
856 864 outer = PyTuple_Pack(2, l, r);
857 865 Py_DECREF(l);
858 866 Py_DECREF(r);
859 867 if (!outer) {
860 868 goto nomem;
861 869 }
862 870 PyDict_SetItem(ret, key, outer);
863 871 Py_DECREF(outer);
864 872 } else if (listclean) {
865 873 PyDict_SetItem(ret, key, Py_None);
866 874 }
867 875 sneedle++;
868 876 oneedle++;
869 877 }
870 878 Py_DECREF(key);
871 879 }
872 880 Py_DECREF(emptyTup);
873 881 return ret;
874 882 nomem:
875 883 PyErr_NoMemory();
876 884 Py_XDECREF(ret);
877 885 Py_XDECREF(emptyTup);
878 886 return NULL;
879 887 }
880 888
881 889 static PyMethodDef lazymanifest_methods[] = {
882 890 {"iterkeys", (PyCFunction)lazymanifest_getkeysiter, METH_NOARGS,
883 891 "Iterate over file names in this lazymanifest."},
884 892 {"iterentries", (PyCFunction)lazymanifest_getentriesiter, METH_NOARGS,
885 893 "Iterate over (path, nodeid, flags) tuples in this lazymanifest."},
886 894 {"copy", (PyCFunction)lazymanifest_copy, METH_NOARGS,
887 895 "Make a copy of this lazymanifest."},
888 896 {"filtercopy", (PyCFunction)lazymanifest_filtercopy, METH_O,
889 897 "Make a copy of this manifest filtered by matchfn."},
890 898 {"diff", (PyCFunction)lazymanifest_diff, METH_VARARGS,
891 899 "Compare this lazymanifest to another one."},
892 900 {"text", (PyCFunction)lazymanifest_text, METH_NOARGS,
893 901 "Encode this manifest to text."},
894 902 {NULL},
895 903 };
896 904
897 905 #ifdef IS_PY3K
898 906 #define LAZYMANIFEST_TPFLAGS Py_TPFLAGS_DEFAULT
899 907 #else
900 908 #define LAZYMANIFEST_TPFLAGS Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_SEQUENCE_IN
901 909 #endif
902 910
903 911 static PyTypeObject lazymanifestType = {
904 912 PyVarObject_HEAD_INIT(NULL, 0) /* header */
905 913 "parsers.lazymanifest", /* tp_name */
906 914 sizeof(lazymanifest), /* tp_basicsize */
907 915 0, /* tp_itemsize */
908 916 (destructor)lazymanifest_dealloc, /* tp_dealloc */
909 917 0, /* tp_print */
910 918 0, /* tp_getattr */
911 919 0, /* tp_setattr */
912 920 0, /* tp_compare */
913 921 0, /* tp_repr */
914 922 0, /* tp_as_number */
915 923 &lazymanifest_seq_meths, /* tp_as_sequence */
916 924 &lazymanifest_mapping_methods, /* tp_as_mapping */
917 925 0, /* tp_hash */
918 926 0, /* tp_call */
919 927 0, /* tp_str */
920 928 0, /* tp_getattro */
921 929 0, /* tp_setattro */
922 930 0, /* tp_as_buffer */
923 931 LAZYMANIFEST_TPFLAGS, /* tp_flags */
924 932 "TODO(augie)", /* tp_doc */
925 933 0, /* tp_traverse */
926 934 0, /* tp_clear */
927 935 0, /* tp_richcompare */
928 936 0, /* tp_weaklistoffset */
929 937 (getiterfunc)lazymanifest_getkeysiter, /* tp_iter */
930 938 0, /* tp_iternext */
931 939 lazymanifest_methods, /* tp_methods */
932 940 0, /* tp_members */
933 941 0, /* tp_getset */
934 942 0, /* tp_base */
935 943 0, /* tp_dict */
936 944 0, /* tp_descr_get */
937 945 0, /* tp_descr_set */
938 946 0, /* tp_dictoffset */
939 947 (initproc)lazymanifest_init, /* tp_init */
940 948 0, /* tp_alloc */
941 949 };
942 950
943 951 void manifest_module_init(PyObject * mod)
944 952 {
945 953 lazymanifestType.tp_new = PyType_GenericNew;
946 954 if (PyType_Ready(&lazymanifestType) < 0)
947 955 return;
948 956 Py_INCREF(&lazymanifestType);
949 957
950 958 PyModule_AddObject(mod, "lazymanifest",
951 959 (PyObject *)&lazymanifestType);
952 960 }
General Comments 0
You need to be logged in to leave comments. Login now