##// END OF EJS Templates
manifest: reject lines shorter than 42 bytes, not 22...
Augie Fackler -
r40801:7e6834ad default
parent child Browse files
Show More
@@ -1,987 +1,987 b''
1 1 /*
2 2 * manifest.c - manifest type that does on-demand parsing.
3 3 *
4 4 * Copyright 2015, Google Inc.
5 5 *
6 6 * This software may be used and distributed according to the terms of
7 7 * the GNU General Public License, incorporated herein by reference.
8 8 */
9 9 #include <Python.h>
10 10
11 11 #include <assert.h>
12 12 #include <stdlib.h>
13 13 #include <string.h>
14 14
15 15 #include "charencode.h"
16 16 #include "util.h"
17 17
18 18 #define DEFAULT_LINES 100000
19 19
20 20 typedef struct {
21 21 char *start;
22 22 Py_ssize_t len; /* length of line including terminal newline */
23 23 char hash_suffix;
24 24 bool from_malloc;
25 25 bool deleted;
26 26 } line;
27 27
28 28 typedef struct {
29 29 PyObject_HEAD
30 30 PyObject *pydata;
31 31 line *lines;
32 32 int numlines; /* number of line entries */
33 33 int livelines; /* number of non-deleted lines */
34 34 int maxlines; /* allocated number of lines */
35 35 bool dirty;
36 36 } lazymanifest;
37 37
38 38 #define MANIFEST_OOM -1
39 39 #define MANIFEST_NOT_SORTED -2
40 40 #define MANIFEST_MALFORMED -3
41 41 #define MANIFEST_BOGUS_FILENAME -4
42 42 #define MANIFEST_TOO_SHORT_LINE -5
43 43
44 44 /* get the length of the path for a line */
45 45 static size_t pathlen(line *l)
46 46 {
47 47 const char *end = memchr(l->start, '\0', l->len);
48 48 return (end) ? (size_t)(end - l->start) : l->len;
49 49 }
50 50
51 51 /* get the node value of a single line */
52 52 static PyObject *nodeof(line *l)
53 53 {
54 54 char *s = l->start;
55 55 ssize_t llen = pathlen(l);
56 56 PyObject *hash;
57 57 if (llen + 1 + 40 + 1 > l->len) { /* path '\0' hash '\n' */
58 58 PyErr_SetString(PyExc_ValueError, "manifest line too short");
59 59 return NULL;
60 60 }
61 61 hash = unhexlify(s + llen + 1, 40);
62 62 if (!hash) {
63 63 return NULL;
64 64 }
65 65 if (l->hash_suffix != '\0') {
66 66 char newhash[21];
67 67 memcpy(newhash, PyBytes_AsString(hash), 20);
68 68 Py_DECREF(hash);
69 69 newhash[20] = l->hash_suffix;
70 70 hash = PyBytes_FromStringAndSize(newhash, 21);
71 71 }
72 72 return hash;
73 73 }
74 74
75 75 /* get the node hash and flags of a line as a tuple */
76 76 static PyObject *hashflags(line *l)
77 77 {
78 78 char *s = l->start;
79 79 size_t plen = pathlen(l);
80 80 PyObject *hash = nodeof(l);
81 81
82 82 /* 40 for hash, 1 for null byte, 1 for newline */
83 83 size_t hplen = plen + 42;
84 84 Py_ssize_t flen = l->len - hplen;
85 85 PyObject *flags;
86 86 PyObject *tup;
87 87
88 88 if (!hash)
89 89 return NULL;
90 90 flags = PyBytes_FromStringAndSize(s + hplen - 1, flen);
91 91 if (!flags) {
92 92 Py_DECREF(hash);
93 93 return NULL;
94 94 }
95 95 tup = PyTuple_Pack(2, hash, flags);
96 96 Py_DECREF(flags);
97 97 Py_DECREF(hash);
98 98 return tup;
99 99 }
100 100
101 101 /* if we're about to run out of space in the line index, add more */
102 102 static bool realloc_if_full(lazymanifest *self)
103 103 {
104 104 if (self->numlines == self->maxlines) {
105 105 self->maxlines *= 2;
106 106 self->lines = realloc(self->lines, self->maxlines * sizeof(line));
107 107 }
108 108 return !!self->lines;
109 109 }
110 110
111 111 /*
112 112 * Find the line boundaries in the manifest that 'data' points to and store
113 113 * information about each line in 'self'.
114 114 */
115 115 static int find_lines(lazymanifest *self, char *data, Py_ssize_t len)
116 116 {
117 117 char *prev = NULL;
118 118 while (len > 0) {
119 119 line *l;
120 120 char *next;
121 121 if (*data == '\0') {
122 122 /* It's implausible there's no filename, don't
123 123 * even bother looking for the newline. */
124 124 return MANIFEST_BOGUS_FILENAME;
125 125 }
126 126 next = memchr(data, '\n', len);
127 127 if (!next) {
128 128 return MANIFEST_MALFORMED;
129 129 }
130 if ((next - data) < 22) {
131 /* We should have at least 22 bytes in a line:
130 if ((next - data) < 42) {
131 /* We should have at least 42 bytes in a line:
132 132 1 byte filename
133 133 1 NUL
134 20 bytes of hash
134 40 bytes of hash
135 135 so we can give up here.
136 136 */
137 137 return MANIFEST_TOO_SHORT_LINE;
138 138 }
139 139 next++; /* advance past newline */
140 140 if (prev && strcmp(prev, data) > -1) {
141 141 /* This data isn't sorted, so we have to abort. */
142 142 return MANIFEST_NOT_SORTED;
143 143 }
144 144 if (!realloc_if_full(self)) {
145 145 return MANIFEST_OOM; /* no memory */
146 146 }
147 147 l = self->lines + ((self->numlines)++);
148 148 l->start = data;
149 149 l->len = next - data;
150 150 l->hash_suffix = '\0';
151 151 l->from_malloc = false;
152 152 l->deleted = false;
153 153 len = len - l->len;
154 154 prev = data;
155 155 data = next;
156 156 }
157 157 self->livelines = self->numlines;
158 158 return 0;
159 159 }
160 160
161 161 static void lazymanifest_init_early(lazymanifest *self)
162 162 {
163 163 self->pydata = NULL;
164 164 self->lines = NULL;
165 165 self->numlines = 0;
166 166 self->maxlines = 0;
167 167 }
168 168
169 169 static int lazymanifest_init(lazymanifest *self, PyObject *args)
170 170 {
171 171 char *data;
172 172 Py_ssize_t len;
173 173 int err, ret;
174 174 PyObject *pydata;
175 175
176 176 lazymanifest_init_early(self);
177 177 if (!PyArg_ParseTuple(args, "S", &pydata)) {
178 178 return -1;
179 179 }
180 180 err = PyBytes_AsStringAndSize(pydata, &data, &len);
181 181
182 182 self->dirty = false;
183 183 if (err == -1)
184 184 return -1;
185 185 self->pydata = pydata;
186 186 Py_INCREF(self->pydata);
187 187 Py_BEGIN_ALLOW_THREADS
188 188 self->lines = malloc(DEFAULT_LINES * sizeof(line));
189 189 self->maxlines = DEFAULT_LINES;
190 190 self->numlines = 0;
191 191 if (!self->lines)
192 192 ret = MANIFEST_OOM;
193 193 else
194 194 ret = find_lines(self, data, len);
195 195 Py_END_ALLOW_THREADS
196 196 switch (ret) {
197 197 case 0:
198 198 break;
199 199 case MANIFEST_OOM:
200 200 PyErr_NoMemory();
201 201 break;
202 202 case MANIFEST_NOT_SORTED:
203 203 PyErr_Format(PyExc_ValueError,
204 204 "Manifest lines not in sorted order.");
205 205 break;
206 206 case MANIFEST_MALFORMED:
207 207 PyErr_Format(PyExc_ValueError,
208 208 "Manifest did not end in a newline.");
209 209 break;
210 210 case MANIFEST_BOGUS_FILENAME:
211 211 PyErr_Format(
212 212 PyExc_ValueError,
213 213 "Manifest had an entry with a zero-length filename.");
214 214 break;
215 215 case MANIFEST_TOO_SHORT_LINE:
216 216 PyErr_Format(
217 217 PyExc_ValueError,
218 218 "Manifest had implausibly-short line.");
219 219 break;
220 220 default:
221 221 PyErr_Format(PyExc_ValueError,
222 222 "Unknown problem parsing manifest.");
223 223 }
224 224 return ret == 0 ? 0 : -1;
225 225 }
226 226
227 227 static void lazymanifest_dealloc(lazymanifest *self)
228 228 {
229 229 /* free any extra lines we had to allocate */
230 230 int i;
231 231 for (i = 0; self->lines && (i < self->numlines); i++) {
232 232 if (self->lines[i].from_malloc) {
233 233 free(self->lines[i].start);
234 234 }
235 235 }
236 236 free(self->lines);
237 237 self->lines = NULL;
238 238 if (self->pydata) {
239 239 Py_DECREF(self->pydata);
240 240 self->pydata = NULL;
241 241 }
242 242 PyObject_Del(self);
243 243 }
244 244
245 245 /* iteration support */
246 246
247 247 typedef struct {
248 248 PyObject_HEAD lazymanifest *m;
249 249 Py_ssize_t pos;
250 250 } lmIter;
251 251
252 252 static void lmiter_dealloc(PyObject *o)
253 253 {
254 254 lmIter *self = (lmIter *)o;
255 255 Py_DECREF(self->m);
256 256 PyObject_Del(self);
257 257 }
258 258
259 259 static line *lmiter_nextline(lmIter *self)
260 260 {
261 261 do {
262 262 self->pos++;
263 263 if (self->pos >= self->m->numlines) {
264 264 return NULL;
265 265 }
266 266 /* skip over deleted manifest entries */
267 267 } while (self->m->lines[self->pos].deleted);
268 268 return self->m->lines + self->pos;
269 269 }
270 270
271 271 static PyObject *lmiter_iterentriesnext(PyObject *o)
272 272 {
273 273 size_t pl;
274 274 line *l;
275 275 Py_ssize_t consumed;
276 276 PyObject *ret = NULL, *path = NULL, *hash = NULL, *flags = NULL;
277 277 l = lmiter_nextline((lmIter *)o);
278 278 if (!l) {
279 279 goto done;
280 280 }
281 281 pl = pathlen(l);
282 282 path = PyBytes_FromStringAndSize(l->start, pl);
283 283 hash = nodeof(l);
284 284 if (!path || !hash) {
285 285 goto done;
286 286 }
287 287 consumed = pl + 41;
288 288 flags = PyBytes_FromStringAndSize(l->start + consumed,
289 289 l->len - consumed - 1);
290 290 if (!flags) {
291 291 goto done;
292 292 }
293 293 ret = PyTuple_Pack(3, path, hash, flags);
294 294 done:
295 295 Py_XDECREF(path);
296 296 Py_XDECREF(hash);
297 297 Py_XDECREF(flags);
298 298 return ret;
299 299 }
300 300
301 301 #ifdef IS_PY3K
302 302 #define LAZYMANIFESTENTRIESITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT
303 303 #else
304 304 #define LAZYMANIFESTENTRIESITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT \
305 305 | Py_TPFLAGS_HAVE_ITER
306 306 #endif
307 307
308 308 static PyTypeObject lazymanifestEntriesIterator = {
309 309 PyVarObject_HEAD_INIT(NULL, 0) /* header */
310 310 "parsers.lazymanifest.entriesiterator", /*tp_name */
311 311 sizeof(lmIter), /*tp_basicsize */
312 312 0, /*tp_itemsize */
313 313 lmiter_dealloc, /*tp_dealloc */
314 314 0, /*tp_print */
315 315 0, /*tp_getattr */
316 316 0, /*tp_setattr */
317 317 0, /*tp_compare */
318 318 0, /*tp_repr */
319 319 0, /*tp_as_number */
320 320 0, /*tp_as_sequence */
321 321 0, /*tp_as_mapping */
322 322 0, /*tp_hash */
323 323 0, /*tp_call */
324 324 0, /*tp_str */
325 325 0, /*tp_getattro */
326 326 0, /*tp_setattro */
327 327 0, /*tp_as_buffer */
328 328 LAZYMANIFESTENTRIESITERATOR_TPFLAGS, /* tp_flags */
329 329 "Iterator for 3-tuples in a lazymanifest.", /* tp_doc */
330 330 0, /* tp_traverse */
331 331 0, /* tp_clear */
332 332 0, /* tp_richcompare */
333 333 0, /* tp_weaklistoffset */
334 334 PyObject_SelfIter, /* tp_iter: __iter__() method */
335 335 lmiter_iterentriesnext, /* tp_iternext: next() method */
336 336 };
337 337
338 338 static PyObject *lmiter_iterkeysnext(PyObject *o)
339 339 {
340 340 size_t pl;
341 341 line *l = lmiter_nextline((lmIter *)o);
342 342 if (!l) {
343 343 return NULL;
344 344 }
345 345 pl = pathlen(l);
346 346 return PyBytes_FromStringAndSize(l->start, pl);
347 347 }
348 348
349 349 #ifdef IS_PY3K
350 350 #define LAZYMANIFESTKEYSITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT
351 351 #else
352 352 #define LAZYMANIFESTKEYSITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT \
353 353 | Py_TPFLAGS_HAVE_ITER
354 354 #endif
355 355
356 356 static PyTypeObject lazymanifestKeysIterator = {
357 357 PyVarObject_HEAD_INIT(NULL, 0) /* header */
358 358 "parsers.lazymanifest.keysiterator", /*tp_name */
359 359 sizeof(lmIter), /*tp_basicsize */
360 360 0, /*tp_itemsize */
361 361 lmiter_dealloc, /*tp_dealloc */
362 362 0, /*tp_print */
363 363 0, /*tp_getattr */
364 364 0, /*tp_setattr */
365 365 0, /*tp_compare */
366 366 0, /*tp_repr */
367 367 0, /*tp_as_number */
368 368 0, /*tp_as_sequence */
369 369 0, /*tp_as_mapping */
370 370 0, /*tp_hash */
371 371 0, /*tp_call */
372 372 0, /*tp_str */
373 373 0, /*tp_getattro */
374 374 0, /*tp_setattro */
375 375 0, /*tp_as_buffer */
376 376 LAZYMANIFESTKEYSITERATOR_TPFLAGS, /* tp_flags */
377 377 "Keys iterator for a lazymanifest.", /* tp_doc */
378 378 0, /* tp_traverse */
379 379 0, /* tp_clear */
380 380 0, /* tp_richcompare */
381 381 0, /* tp_weaklistoffset */
382 382 PyObject_SelfIter, /* tp_iter: __iter__() method */
383 383 lmiter_iterkeysnext, /* tp_iternext: next() method */
384 384 };
385 385
386 386 static lazymanifest *lazymanifest_copy(lazymanifest *self);
387 387
388 388 static PyObject *lazymanifest_getentriesiter(lazymanifest *self)
389 389 {
390 390 lmIter *i = NULL;
391 391 lazymanifest *t = lazymanifest_copy(self);
392 392 if (!t) {
393 393 PyErr_NoMemory();
394 394 return NULL;
395 395 }
396 396 i = PyObject_New(lmIter, &lazymanifestEntriesIterator);
397 397 if (i) {
398 398 i->m = t;
399 399 i->pos = -1;
400 400 } else {
401 401 Py_DECREF(t);
402 402 PyErr_NoMemory();
403 403 }
404 404 return (PyObject *)i;
405 405 }
406 406
407 407 static PyObject *lazymanifest_getkeysiter(lazymanifest *self)
408 408 {
409 409 lmIter *i = NULL;
410 410 lazymanifest *t = lazymanifest_copy(self);
411 411 if (!t) {
412 412 PyErr_NoMemory();
413 413 return NULL;
414 414 }
415 415 i = PyObject_New(lmIter, &lazymanifestKeysIterator);
416 416 if (i) {
417 417 i->m = t;
418 418 i->pos = -1;
419 419 } else {
420 420 Py_DECREF(t);
421 421 PyErr_NoMemory();
422 422 }
423 423 return (PyObject *)i;
424 424 }
425 425
426 426 /* __getitem__ and __setitem__ support */
427 427
428 428 static Py_ssize_t lazymanifest_size(lazymanifest *self)
429 429 {
430 430 return self->livelines;
431 431 }
432 432
433 433 static int linecmp(const void *left, const void *right)
434 434 {
435 435 return strcmp(((const line *)left)->start,
436 436 ((const line *)right)->start);
437 437 }
438 438
439 439 static PyObject *lazymanifest_getitem(lazymanifest *self, PyObject *key)
440 440 {
441 441 line needle;
442 442 line *hit;
443 443 if (!PyBytes_Check(key)) {
444 444 PyErr_Format(PyExc_TypeError,
445 445 "getitem: manifest keys must be a string.");
446 446 return NULL;
447 447 }
448 448 needle.start = PyBytes_AsString(key);
449 449 hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
450 450 &linecmp);
451 451 if (!hit || hit->deleted) {
452 452 PyErr_Format(PyExc_KeyError, "No such manifest entry.");
453 453 return NULL;
454 454 }
455 455 return hashflags(hit);
456 456 }
457 457
458 458 static int lazymanifest_delitem(lazymanifest *self, PyObject *key)
459 459 {
460 460 line needle;
461 461 line *hit;
462 462 if (!PyBytes_Check(key)) {
463 463 PyErr_Format(PyExc_TypeError,
464 464 "delitem: manifest keys must be a string.");
465 465 return -1;
466 466 }
467 467 needle.start = PyBytes_AsString(key);
468 468 hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
469 469 &linecmp);
470 470 if (!hit || hit->deleted) {
471 471 PyErr_Format(PyExc_KeyError,
472 472 "Tried to delete nonexistent manifest entry.");
473 473 return -1;
474 474 }
475 475 self->dirty = true;
476 476 hit->deleted = true;
477 477 self->livelines--;
478 478 return 0;
479 479 }
480 480
481 481 /* Do a binary search for the insertion point for new, creating the
482 482 * new entry if needed. */
483 483 static int internalsetitem(lazymanifest *self, line *new)
484 484 {
485 485 int start = 0, end = self->numlines;
486 486 while (start < end) {
487 487 int pos = start + (end - start) / 2;
488 488 int c = linecmp(new, self->lines + pos);
489 489 if (c < 0)
490 490 end = pos;
491 491 else if (c > 0)
492 492 start = pos + 1;
493 493 else {
494 494 if (self->lines[pos].deleted)
495 495 self->livelines++;
496 496 if (self->lines[pos].from_malloc)
497 497 free(self->lines[pos].start);
498 498 start = pos;
499 499 goto finish;
500 500 }
501 501 }
502 502 /* being here means we need to do an insert */
503 503 if (!realloc_if_full(self)) {
504 504 PyErr_NoMemory();
505 505 return -1;
506 506 }
507 507 memmove(self->lines + start + 1, self->lines + start,
508 508 (self->numlines - start) * sizeof(line));
509 509 self->numlines++;
510 510 self->livelines++;
511 511 finish:
512 512 self->lines[start] = *new;
513 513 self->dirty = true;
514 514 return 0;
515 515 }
516 516
517 517 static int lazymanifest_setitem(
518 518 lazymanifest *self, PyObject *key, PyObject *value)
519 519 {
520 520 char *path;
521 521 Py_ssize_t plen;
522 522 PyObject *pyhash;
523 523 Py_ssize_t hlen;
524 524 char *hash;
525 525 PyObject *pyflags;
526 526 char *flags;
527 527 Py_ssize_t flen;
528 528 size_t dlen;
529 529 char *dest;
530 530 int i;
531 531 line new;
532 532 if (!PyBytes_Check(key)) {
533 533 PyErr_Format(PyExc_TypeError,
534 534 "setitem: manifest keys must be a string.");
535 535 return -1;
536 536 }
537 537 if (!value) {
538 538 return lazymanifest_delitem(self, key);
539 539 }
540 540 if (!PyTuple_Check(value) || PyTuple_Size(value) != 2) {
541 541 PyErr_Format(PyExc_TypeError,
542 542 "Manifest values must be a tuple of (node, flags).");
543 543 return -1;
544 544 }
545 545 if (PyBytes_AsStringAndSize(key, &path, &plen) == -1) {
546 546 return -1;
547 547 }
548 548
549 549 pyhash = PyTuple_GetItem(value, 0);
550 550 if (!PyBytes_Check(pyhash)) {
551 551 PyErr_Format(PyExc_TypeError,
552 552 "node must be a 20-byte string");
553 553 return -1;
554 554 }
555 555 hlen = PyBytes_Size(pyhash);
556 556 /* Some parts of the codebase try and set 21 or 22
557 557 * byte "hash" values in order to perturb things for
558 558 * status. We have to preserve at least the 21st
559 559 * byte. Sigh. If there's a 22nd byte, we drop it on
560 560 * the floor, which works fine.
561 561 */
562 562 if (hlen != 20 && hlen != 21 && hlen != 22) {
563 563 PyErr_Format(PyExc_TypeError,
564 564 "node must be a 20-byte string");
565 565 return -1;
566 566 }
567 567 hash = PyBytes_AsString(pyhash);
568 568
569 569 pyflags = PyTuple_GetItem(value, 1);
570 570 if (!PyBytes_Check(pyflags) || PyBytes_Size(pyflags) > 1) {
571 571 PyErr_Format(PyExc_TypeError,
572 572 "flags must a 0 or 1 byte string");
573 573 return -1;
574 574 }
575 575 if (PyBytes_AsStringAndSize(pyflags, &flags, &flen) == -1) {
576 576 return -1;
577 577 }
578 578 /* one null byte and one newline */
579 579 dlen = plen + 41 + flen + 1;
580 580 dest = malloc(dlen);
581 581 if (!dest) {
582 582 PyErr_NoMemory();
583 583 return -1;
584 584 }
585 585 memcpy(dest, path, plen + 1);
586 586 for (i = 0; i < 20; i++) {
587 587 /* Cast to unsigned, so it will not get sign-extended when promoted
588 588 * to int (as is done when passing to a variadic function)
589 589 */
590 590 sprintf(dest + plen + 1 + (i * 2), "%02x", (unsigned char)hash[i]);
591 591 }
592 592 memcpy(dest + plen + 41, flags, flen);
593 593 dest[plen + 41 + flen] = '\n';
594 594 new.start = dest;
595 595 new.len = dlen;
596 596 new.hash_suffix = '\0';
597 597 if (hlen > 20) {
598 598 new.hash_suffix = hash[20];
599 599 }
600 600 new.from_malloc = true; /* is `start` a pointer we allocated? */
601 601 new.deleted = false; /* is this entry deleted? */
602 602 if (internalsetitem(self, &new)) {
603 603 return -1;
604 604 }
605 605 return 0;
606 606 }
607 607
608 608 static PyMappingMethods lazymanifest_mapping_methods = {
609 609 (lenfunc)lazymanifest_size, /* mp_length */
610 610 (binaryfunc)lazymanifest_getitem, /* mp_subscript */
611 611 (objobjargproc)lazymanifest_setitem, /* mp_ass_subscript */
612 612 };
613 613
614 614 /* sequence methods (important or __contains__ builds an iterator) */
615 615
616 616 static int lazymanifest_contains(lazymanifest *self, PyObject *key)
617 617 {
618 618 line needle;
619 619 line *hit;
620 620 if (!PyBytes_Check(key)) {
621 621 /* Our keys are always strings, so if the contains
622 622 * check is for a non-string, just return false. */
623 623 return 0;
624 624 }
625 625 needle.start = PyBytes_AsString(key);
626 626 hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
627 627 &linecmp);
628 628 if (!hit || hit->deleted) {
629 629 return 0;
630 630 }
631 631 return 1;
632 632 }
633 633
634 634 static PySequenceMethods lazymanifest_seq_meths = {
635 635 (lenfunc)lazymanifest_size, /* sq_length */
636 636 0, /* sq_concat */
637 637 0, /* sq_repeat */
638 638 0, /* sq_item */
639 639 0, /* sq_slice */
640 640 0, /* sq_ass_item */
641 641 0, /* sq_ass_slice */
642 642 (objobjproc)lazymanifest_contains, /* sq_contains */
643 643 0, /* sq_inplace_concat */
644 644 0, /* sq_inplace_repeat */
645 645 };
646 646
647 647
648 648 /* Other methods (copy, diff, etc) */
649 649 static PyTypeObject lazymanifestType;
650 650
651 651 /* If the manifest has changes, build the new manifest text and reindex it. */
652 652 static int compact(lazymanifest *self)
653 653 {
654 654 int i;
655 655 ssize_t need = 0;
656 656 char *data;
657 657 line *src, *dst;
658 658 PyObject *pydata;
659 659 if (!self->dirty)
660 660 return 0;
661 661 for (i = 0; i < self->numlines; i++) {
662 662 if (!self->lines[i].deleted) {
663 663 need += self->lines[i].len;
664 664 }
665 665 }
666 666 pydata = PyBytes_FromStringAndSize(NULL, need);
667 667 if (!pydata)
668 668 return -1;
669 669 data = PyBytes_AsString(pydata);
670 670 if (!data) {
671 671 return -1;
672 672 }
673 673 src = self->lines;
674 674 dst = self->lines;
675 675 for (i = 0; i < self->numlines; i++, src++) {
676 676 char *tofree = NULL;
677 677 if (src->from_malloc) {
678 678 tofree = src->start;
679 679 }
680 680 if (!src->deleted) {
681 681 memcpy(data, src->start, src->len);
682 682 *dst = *src;
683 683 dst->start = data;
684 684 dst->from_malloc = false;
685 685 data += dst->len;
686 686 dst++;
687 687 }
688 688 free(tofree);
689 689 }
690 690 Py_DECREF(self->pydata);
691 691 self->pydata = pydata;
692 692 self->numlines = self->livelines;
693 693 self->dirty = false;
694 694 return 0;
695 695 }
696 696
697 697 static PyObject *lazymanifest_text(lazymanifest *self)
698 698 {
699 699 if (compact(self) != 0) {
700 700 PyErr_NoMemory();
701 701 return NULL;
702 702 }
703 703 Py_INCREF(self->pydata);
704 704 return self->pydata;
705 705 }
706 706
707 707 static lazymanifest *lazymanifest_copy(lazymanifest *self)
708 708 {
709 709 lazymanifest *copy = NULL;
710 710 if (compact(self) != 0) {
711 711 goto nomem;
712 712 }
713 713 copy = PyObject_New(lazymanifest, &lazymanifestType);
714 714 if (!copy) {
715 715 goto nomem;
716 716 }
717 717 lazymanifest_init_early(copy);
718 718 copy->numlines = self->numlines;
719 719 copy->livelines = self->livelines;
720 720 copy->dirty = false;
721 721 copy->lines = malloc(self->maxlines *sizeof(line));
722 722 if (!copy->lines) {
723 723 goto nomem;
724 724 }
725 725 memcpy(copy->lines, self->lines, self->numlines * sizeof(line));
726 726 copy->maxlines = self->maxlines;
727 727 copy->pydata = self->pydata;
728 728 Py_INCREF(copy->pydata);
729 729 return copy;
730 730 nomem:
731 731 PyErr_NoMemory();
732 732 Py_XDECREF(copy);
733 733 return NULL;
734 734 }
735 735
736 736 static lazymanifest *lazymanifest_filtercopy(
737 737 lazymanifest *self, PyObject *matchfn)
738 738 {
739 739 lazymanifest *copy = NULL;
740 740 int i;
741 741 if (!PyCallable_Check(matchfn)) {
742 742 PyErr_SetString(PyExc_TypeError, "matchfn must be callable");
743 743 return NULL;
744 744 }
745 745 /* compact ourselves first to avoid double-frees later when we
746 746 * compact tmp so that it doesn't have random pointers to our
747 747 * underlying from_malloc-data (self->pydata is safe) */
748 748 if (compact(self) != 0) {
749 749 goto nomem;
750 750 }
751 751 copy = PyObject_New(lazymanifest, &lazymanifestType);
752 752 if (!copy) {
753 753 goto nomem;
754 754 }
755 755 lazymanifest_init_early(copy);
756 756 copy->dirty = true;
757 757 copy->lines = malloc(self->maxlines * sizeof(line));
758 758 if (!copy->lines) {
759 759 goto nomem;
760 760 }
761 761 copy->maxlines = self->maxlines;
762 762 copy->numlines = 0;
763 763 copy->pydata = self->pydata;
764 764 Py_INCREF(copy->pydata);
765 765 for (i = 0; i < self->numlines; i++) {
766 766 PyObject *arglist = NULL, *result = NULL;
767 767 arglist = Py_BuildValue(PY23("(s)", "(y)"),
768 768 self->lines[i].start);
769 769 if (!arglist) {
770 770 goto bail;
771 771 }
772 772 result = PyObject_CallObject(matchfn, arglist);
773 773 Py_DECREF(arglist);
774 774 /* if the callback raised an exception, just let it
775 775 * through and give up */
776 776 if (!result) {
777 777 goto bail;
778 778 }
779 779 if (PyObject_IsTrue(result)) {
780 780 assert(!(self->lines[i].from_malloc));
781 781 copy->lines[copy->numlines++] = self->lines[i];
782 782 }
783 783 Py_DECREF(result);
784 784 }
785 785 copy->livelines = copy->numlines;
786 786 return copy;
787 787 nomem:
788 788 PyErr_NoMemory();
789 789 bail:
790 790 Py_XDECREF(copy);
791 791 return NULL;
792 792 }
793 793
794 794 static PyObject *lazymanifest_diff(lazymanifest *self, PyObject *args)
795 795 {
796 796 lazymanifest *other;
797 797 PyObject *pyclean = NULL;
798 798 bool listclean;
799 799 PyObject *emptyTup = NULL, *ret = NULL;
800 800 PyObject *es;
801 801 int sneedle = 0, oneedle = 0;
802 802 if (!PyArg_ParseTuple(args, "O!|O", &lazymanifestType, &other, &pyclean)) {
803 803 return NULL;
804 804 }
805 805 listclean = (!pyclean) ? false : PyObject_IsTrue(pyclean);
806 806 es = PyBytes_FromString("");
807 807 if (!es) {
808 808 goto nomem;
809 809 }
810 810 emptyTup = PyTuple_Pack(2, Py_None, es);
811 811 Py_DECREF(es);
812 812 if (!emptyTup) {
813 813 goto nomem;
814 814 }
815 815 ret = PyDict_New();
816 816 if (!ret) {
817 817 goto nomem;
818 818 }
819 819 while (sneedle != self->numlines || oneedle != other->numlines) {
820 820 line *left = self->lines + sneedle;
821 821 line *right = other->lines + oneedle;
822 822 int result;
823 823 PyObject *key;
824 824 PyObject *outer;
825 825 /* If we're looking at a deleted entry and it's not
826 826 * the end of the manifest, just skip it. */
827 827 if (sneedle < self->numlines && left->deleted) {
828 828 sneedle++;
829 829 continue;
830 830 }
831 831 if (oneedle < other->numlines && right->deleted) {
832 832 oneedle++;
833 833 continue;
834 834 }
835 835 /* if we're at the end of either manifest, then we
836 836 * know the remaining items are adds so we can skip
837 837 * the strcmp. */
838 838 if (sneedle == self->numlines) {
839 839 result = 1;
840 840 } else if (oneedle == other->numlines) {
841 841 result = -1;
842 842 } else {
843 843 result = linecmp(left, right);
844 844 }
845 845 key = result <= 0 ?
846 846 PyBytes_FromString(left->start) :
847 847 PyBytes_FromString(right->start);
848 848 if (!key)
849 849 goto nomem;
850 850 if (result < 0) {
851 851 PyObject *l = hashflags(left);
852 852 if (!l) {
853 853 goto nomem;
854 854 }
855 855 outer = PyTuple_Pack(2, l, emptyTup);
856 856 Py_DECREF(l);
857 857 if (!outer) {
858 858 goto nomem;
859 859 }
860 860 PyDict_SetItem(ret, key, outer);
861 861 Py_DECREF(outer);
862 862 sneedle++;
863 863 } else if (result > 0) {
864 864 PyObject *r = hashflags(right);
865 865 if (!r) {
866 866 goto nomem;
867 867 }
868 868 outer = PyTuple_Pack(2, emptyTup, r);
869 869 Py_DECREF(r);
870 870 if (!outer) {
871 871 goto nomem;
872 872 }
873 873 PyDict_SetItem(ret, key, outer);
874 874 Py_DECREF(outer);
875 875 oneedle++;
876 876 } else {
877 877 /* file exists in both manifests */
878 878 if (left->len != right->len
879 879 || memcmp(left->start, right->start, left->len)
880 880 || left->hash_suffix != right->hash_suffix) {
881 881 PyObject *l = hashflags(left);
882 882 PyObject *r;
883 883 if (!l) {
884 884 goto nomem;
885 885 }
886 886 r = hashflags(right);
887 887 if (!r) {
888 888 Py_DECREF(l);
889 889 goto nomem;
890 890 }
891 891 outer = PyTuple_Pack(2, l, r);
892 892 Py_DECREF(l);
893 893 Py_DECREF(r);
894 894 if (!outer) {
895 895 goto nomem;
896 896 }
897 897 PyDict_SetItem(ret, key, outer);
898 898 Py_DECREF(outer);
899 899 } else if (listclean) {
900 900 PyDict_SetItem(ret, key, Py_None);
901 901 }
902 902 sneedle++;
903 903 oneedle++;
904 904 }
905 905 Py_DECREF(key);
906 906 }
907 907 Py_DECREF(emptyTup);
908 908 return ret;
909 909 nomem:
910 910 PyErr_NoMemory();
911 911 Py_XDECREF(ret);
912 912 Py_XDECREF(emptyTup);
913 913 return NULL;
914 914 }
915 915
916 916 static PyMethodDef lazymanifest_methods[] = {
917 917 {"iterkeys", (PyCFunction)lazymanifest_getkeysiter, METH_NOARGS,
918 918 "Iterate over file names in this lazymanifest."},
919 919 {"iterentries", (PyCFunction)lazymanifest_getentriesiter, METH_NOARGS,
920 920 "Iterate over (path, nodeid, flags) tuples in this lazymanifest."},
921 921 {"copy", (PyCFunction)lazymanifest_copy, METH_NOARGS,
922 922 "Make a copy of this lazymanifest."},
923 923 {"filtercopy", (PyCFunction)lazymanifest_filtercopy, METH_O,
924 924 "Make a copy of this manifest filtered by matchfn."},
925 925 {"diff", (PyCFunction)lazymanifest_diff, METH_VARARGS,
926 926 "Compare this lazymanifest to another one."},
927 927 {"text", (PyCFunction)lazymanifest_text, METH_NOARGS,
928 928 "Encode this manifest to text."},
929 929 {NULL},
930 930 };
931 931
932 932 #ifdef IS_PY3K
933 933 #define LAZYMANIFEST_TPFLAGS Py_TPFLAGS_DEFAULT
934 934 #else
935 935 #define LAZYMANIFEST_TPFLAGS Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_SEQUENCE_IN
936 936 #endif
937 937
938 938 static PyTypeObject lazymanifestType = {
939 939 PyVarObject_HEAD_INIT(NULL, 0) /* header */
940 940 "parsers.lazymanifest", /* tp_name */
941 941 sizeof(lazymanifest), /* tp_basicsize */
942 942 0, /* tp_itemsize */
943 943 (destructor)lazymanifest_dealloc, /* tp_dealloc */
944 944 0, /* tp_print */
945 945 0, /* tp_getattr */
946 946 0, /* tp_setattr */
947 947 0, /* tp_compare */
948 948 0, /* tp_repr */
949 949 0, /* tp_as_number */
950 950 &lazymanifest_seq_meths, /* tp_as_sequence */
951 951 &lazymanifest_mapping_methods, /* tp_as_mapping */
952 952 0, /* tp_hash */
953 953 0, /* tp_call */
954 954 0, /* tp_str */
955 955 0, /* tp_getattro */
956 956 0, /* tp_setattro */
957 957 0, /* tp_as_buffer */
958 958 LAZYMANIFEST_TPFLAGS, /* tp_flags */
959 959 "TODO(augie)", /* tp_doc */
960 960 0, /* tp_traverse */
961 961 0, /* tp_clear */
962 962 0, /* tp_richcompare */
963 963 0, /* tp_weaklistoffset */
964 964 (getiterfunc)lazymanifest_getkeysiter, /* tp_iter */
965 965 0, /* tp_iternext */
966 966 lazymanifest_methods, /* tp_methods */
967 967 0, /* tp_members */
968 968 0, /* tp_getset */
969 969 0, /* tp_base */
970 970 0, /* tp_dict */
971 971 0, /* tp_descr_get */
972 972 0, /* tp_descr_set */
973 973 0, /* tp_dictoffset */
974 974 (initproc)lazymanifest_init, /* tp_init */
975 975 0, /* tp_alloc */
976 976 };
977 977
978 978 void manifest_module_init(PyObject * mod)
979 979 {
980 980 lazymanifestType.tp_new = PyType_GenericNew;
981 981 if (PyType_Ready(&lazymanifestType) < 0)
982 982 return;
983 983 Py_INCREF(&lazymanifestType);
984 984
985 985 PyModule_AddObject(mod, "lazymanifest",
986 986 (PyObject *)&lazymanifestType);
987 987 }
General Comments 0
You need to be logged in to leave comments. Login now