##// END OF EJS Templates
manifest: start removing 40-byte hash restrictions from C code...
Augie Fackler -
r45192:0b0e72b5 default
parent child Browse files
Show More
@@ -1,987 +1,1006 b''
1 1 /*
2 2 * manifest.c - manifest type that does on-demand parsing.
3 3 *
4 4 * Copyright 2015, Google Inc.
5 5 *
6 6 * This software may be used and distributed according to the terms of
7 7 * the GNU General Public License, incorporated herein by reference.
8 8 */
9 9 #include <Python.h>
10 10
11 11 #include <assert.h>
12 12 #include <stdlib.h>
13 13 #include <string.h>
14 14
15 15 #include "charencode.h"
16 16 #include "util.h"
17 17
18 18 #define DEFAULT_LINES 100000
19 19
20 20 typedef struct {
21 21 char *start;
22 22 Py_ssize_t len; /* length of line including terminal newline */
23 23 char hash_suffix;
24 24 bool from_malloc;
25 25 bool deleted;
26 26 } line;
27 27
28 28 typedef struct {
29 29 PyObject_HEAD
30 30 PyObject *pydata;
31 31 line *lines;
32 32 int numlines; /* number of line entries */
33 33 int livelines; /* number of non-deleted lines */
34 34 int maxlines; /* allocated number of lines */
35 35 bool dirty;
36 36 } lazymanifest;
37 37
38 38 #define MANIFEST_OOM -1
39 39 #define MANIFEST_NOT_SORTED -2
40 40 #define MANIFEST_MALFORMED -3
41 41 #define MANIFEST_BOGUS_FILENAME -4
42 42 #define MANIFEST_TOO_SHORT_LINE -5
43 43
44 44 /* get the length of the path for a line */
45 45 static Py_ssize_t pathlen(line *l)
46 46 {
47 47 const char *end = memchr(l->start, '\0', l->len);
48 48 return (end) ? (Py_ssize_t)(end - l->start) : l->len;
49 49 }
50 50
51 51 /* get the node value of a single line */
52 52 static PyObject *nodeof(line *l)
53 53 {
54 54 char *s = l->start;
55 55 Py_ssize_t llen = pathlen(l);
56 Py_ssize_t hlen = l->len - llen - 2;
57 Py_ssize_t hlen_raw = 20;
56 58 PyObject *hash;
57 59 if (llen + 1 + 40 + 1 > l->len) { /* path '\0' hash '\n' */
58 60 PyErr_SetString(PyExc_ValueError, "manifest line too short");
59 61 return NULL;
60 62 }
61 hash = unhexlify(s + llen + 1, 40);
63 switch (hlen) {
64 case 40: /* sha1 */
65 case 41: /* sha1 with cruft for a merge */
66 break;
67 case 64: /* new hash */
68 case 65: /* new hash with cruft for a merge */
69 hlen_raw = 32;
70 break;
71 default:
72 PyErr_SetString(PyExc_ValueError, "invalid node length in manifest");
73 return NULL;
74 }
75 hash = unhexlify(s + llen + 1, hlen_raw * 2);
62 76 if (!hash) {
63 77 return NULL;
64 78 }
65 79 if (l->hash_suffix != '\0') {
66 char newhash[21];
67 memcpy(newhash, PyBytes_AsString(hash), 20);
80 char newhash[33];
81 memcpy(newhash, PyBytes_AsString(hash), hlen_raw);
68 82 Py_DECREF(hash);
69 newhash[20] = l->hash_suffix;
70 hash = PyBytes_FromStringAndSize(newhash, 21);
83 newhash[hlen_raw] = l->hash_suffix;
84 hash = PyBytes_FromStringAndSize(newhash, hlen_raw+1);
71 85 }
72 86 return hash;
73 87 }
74 88
75 89 /* get the node hash and flags of a line as a tuple */
76 90 static PyObject *hashflags(line *l)
77 91 {
78 92 char *s = l->start;
79 93 Py_ssize_t plen = pathlen(l);
80 94 PyObject *hash = nodeof(l);
81
82 /* 40 for hash, 1 for null byte, 1 for newline */
83 Py_ssize_t hplen = plen + 42;
84 Py_ssize_t flen = l->len - hplen;
95 ssize_t hlen;
96 Py_ssize_t hplen, flen;
85 97 PyObject *flags;
86 98 PyObject *tup;
87 99
88 100 if (!hash)
89 101 return NULL;
102 /* hash is either 20 or 21 bytes for an old hash, so we use a
103 ternary here to get the "real" hexlified sha length. */
104 hlen = PyBytes_GET_SIZE(hash) < 22 ? 40 : 64;
105 /* 1 for null byte, 1 for newline */
106 hplen = plen + hlen + 2;
107 flen = l->len - hplen;
108
90 109 flags = PyBytes_FromStringAndSize(s + hplen - 1, flen);
91 110 if (!flags) {
92 111 Py_DECREF(hash);
93 112 return NULL;
94 113 }
95 114 tup = PyTuple_Pack(2, hash, flags);
96 115 Py_DECREF(flags);
97 116 Py_DECREF(hash);
98 117 return tup;
99 118 }
100 119
101 120 /* if we're about to run out of space in the line index, add more */
102 121 static bool realloc_if_full(lazymanifest *self)
103 122 {
104 123 if (self->numlines == self->maxlines) {
105 124 self->maxlines *= 2;
106 125 self->lines = realloc(self->lines, self->maxlines * sizeof(line));
107 126 }
108 127 return !!self->lines;
109 128 }
110 129
111 130 /*
112 131 * Find the line boundaries in the manifest that 'data' points to and store
113 132 * information about each line in 'self'.
114 133 */
115 134 static int find_lines(lazymanifest *self, char *data, Py_ssize_t len)
116 135 {
117 136 char *prev = NULL;
118 137 while (len > 0) {
119 138 line *l;
120 139 char *next;
121 140 if (*data == '\0') {
122 141 /* It's implausible there's no filename, don't
123 142 * even bother looking for the newline. */
124 143 return MANIFEST_BOGUS_FILENAME;
125 144 }
126 145 next = memchr(data, '\n', len);
127 146 if (!next) {
128 147 return MANIFEST_MALFORMED;
129 148 }
130 149 if ((next - data) < 42) {
131 150 /* We should have at least 42 bytes in a line:
132 151 1 byte filename
133 152 1 NUL
134 153 40 bytes of hash
135 154 so we can give up here.
136 155 */
137 156 return MANIFEST_TOO_SHORT_LINE;
138 157 }
139 158 next++; /* advance past newline */
140 159 if (prev && strcmp(prev, data) > -1) {
141 160 /* This data isn't sorted, so we have to abort. */
142 161 return MANIFEST_NOT_SORTED;
143 162 }
144 163 if (!realloc_if_full(self)) {
145 164 return MANIFEST_OOM; /* no memory */
146 165 }
147 166 l = self->lines + ((self->numlines)++);
148 167 l->start = data;
149 168 l->len = next - data;
150 169 l->hash_suffix = '\0';
151 170 l->from_malloc = false;
152 171 l->deleted = false;
153 172 len = len - l->len;
154 173 prev = data;
155 174 data = next;
156 175 }
157 176 self->livelines = self->numlines;
158 177 return 0;
159 178 }
160 179
161 180 static void lazymanifest_init_early(lazymanifest *self)
162 181 {
163 182 self->pydata = NULL;
164 183 self->lines = NULL;
165 184 self->numlines = 0;
166 185 self->maxlines = 0;
167 186 }
168 187
169 188 static int lazymanifest_init(lazymanifest *self, PyObject *args)
170 189 {
171 190 char *data;
172 191 Py_ssize_t len;
173 192 int err, ret;
174 193 PyObject *pydata;
175 194
176 195 lazymanifest_init_early(self);
177 196 if (!PyArg_ParseTuple(args, "S", &pydata)) {
178 197 return -1;
179 198 }
180 199 err = PyBytes_AsStringAndSize(pydata, &data, &len);
181 200
182 201 self->dirty = false;
183 202 if (err == -1)
184 203 return -1;
185 204 self->pydata = pydata;
186 205 Py_INCREF(self->pydata);
187 206 Py_BEGIN_ALLOW_THREADS
188 207 self->lines = malloc(DEFAULT_LINES * sizeof(line));
189 208 self->maxlines = DEFAULT_LINES;
190 209 self->numlines = 0;
191 210 if (!self->lines)
192 211 ret = MANIFEST_OOM;
193 212 else
194 213 ret = find_lines(self, data, len);
195 214 Py_END_ALLOW_THREADS
196 215 switch (ret) {
197 216 case 0:
198 217 break;
199 218 case MANIFEST_OOM:
200 219 PyErr_NoMemory();
201 220 break;
202 221 case MANIFEST_NOT_SORTED:
203 222 PyErr_Format(PyExc_ValueError,
204 223 "Manifest lines not in sorted order.");
205 224 break;
206 225 case MANIFEST_MALFORMED:
207 226 PyErr_Format(PyExc_ValueError,
208 227 "Manifest did not end in a newline.");
209 228 break;
210 229 case MANIFEST_BOGUS_FILENAME:
211 230 PyErr_Format(
212 231 PyExc_ValueError,
213 232 "Manifest had an entry with a zero-length filename.");
214 233 break;
215 234 case MANIFEST_TOO_SHORT_LINE:
216 235 PyErr_Format(
217 236 PyExc_ValueError,
218 237 "Manifest had implausibly-short line.");
219 238 break;
220 239 default:
221 240 PyErr_Format(PyExc_ValueError,
222 241 "Unknown problem parsing manifest.");
223 242 }
224 243 return ret == 0 ? 0 : -1;
225 244 }
226 245
227 246 static void lazymanifest_dealloc(lazymanifest *self)
228 247 {
229 248 /* free any extra lines we had to allocate */
230 249 int i;
231 250 for (i = 0; self->lines && (i < self->numlines); i++) {
232 251 if (self->lines[i].from_malloc) {
233 252 free(self->lines[i].start);
234 253 }
235 254 }
236 255 free(self->lines);
237 256 self->lines = NULL;
238 257 if (self->pydata) {
239 258 Py_DECREF(self->pydata);
240 259 self->pydata = NULL;
241 260 }
242 261 PyObject_Del(self);
243 262 }
244 263
245 264 /* iteration support */
246 265
247 266 typedef struct {
248 267 PyObject_HEAD lazymanifest *m;
249 268 Py_ssize_t pos;
250 269 } lmIter;
251 270
252 271 static void lmiter_dealloc(PyObject *o)
253 272 {
254 273 lmIter *self = (lmIter *)o;
255 274 Py_DECREF(self->m);
256 275 PyObject_Del(self);
257 276 }
258 277
259 278 static line *lmiter_nextline(lmIter *self)
260 279 {
261 280 do {
262 281 self->pos++;
263 282 if (self->pos >= self->m->numlines) {
264 283 return NULL;
265 284 }
266 285 /* skip over deleted manifest entries */
267 286 } while (self->m->lines[self->pos].deleted);
268 287 return self->m->lines + self->pos;
269 288 }
270 289
271 290 static PyObject *lmiter_iterentriesnext(PyObject *o)
272 291 {
273 292 Py_ssize_t pl;
274 293 line *l;
275 294 Py_ssize_t consumed;
276 295 PyObject *ret = NULL, *path = NULL, *hash = NULL, *flags = NULL;
277 296 l = lmiter_nextline((lmIter *)o);
278 297 if (!l) {
279 298 goto done;
280 299 }
281 300 pl = pathlen(l);
282 301 path = PyBytes_FromStringAndSize(l->start, pl);
283 302 hash = nodeof(l);
284 303 if (!path || !hash) {
285 304 goto done;
286 305 }
287 306 consumed = pl + 41;
288 307 flags = PyBytes_FromStringAndSize(l->start + consumed,
289 308 l->len - consumed - 1);
290 309 if (!flags) {
291 310 goto done;
292 311 }
293 312 ret = PyTuple_Pack(3, path, hash, flags);
294 313 done:
295 314 Py_XDECREF(path);
296 315 Py_XDECREF(hash);
297 316 Py_XDECREF(flags);
298 317 return ret;
299 318 }
300 319
301 320 #ifdef IS_PY3K
302 321 #define LAZYMANIFESTENTRIESITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT
303 322 #else
304 323 #define LAZYMANIFESTENTRIESITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT \
305 324 | Py_TPFLAGS_HAVE_ITER
306 325 #endif
307 326
308 327 static PyTypeObject lazymanifestEntriesIterator = {
309 328 PyVarObject_HEAD_INIT(NULL, 0) /* header */
310 329 "parsers.lazymanifest.entriesiterator", /*tp_name */
311 330 sizeof(lmIter), /*tp_basicsize */
312 331 0, /*tp_itemsize */
313 332 lmiter_dealloc, /*tp_dealloc */
314 333 0, /*tp_print */
315 334 0, /*tp_getattr */
316 335 0, /*tp_setattr */
317 336 0, /*tp_compare */
318 337 0, /*tp_repr */
319 338 0, /*tp_as_number */
320 339 0, /*tp_as_sequence */
321 340 0, /*tp_as_mapping */
322 341 0, /*tp_hash */
323 342 0, /*tp_call */
324 343 0, /*tp_str */
325 344 0, /*tp_getattro */
326 345 0, /*tp_setattro */
327 346 0, /*tp_as_buffer */
328 347 LAZYMANIFESTENTRIESITERATOR_TPFLAGS, /* tp_flags */
329 348 "Iterator for 3-tuples in a lazymanifest.", /* tp_doc */
330 349 0, /* tp_traverse */
331 350 0, /* tp_clear */
332 351 0, /* tp_richcompare */
333 352 0, /* tp_weaklistoffset */
334 353 PyObject_SelfIter, /* tp_iter: __iter__() method */
335 354 lmiter_iterentriesnext, /* tp_iternext: next() method */
336 355 };
337 356
338 357 static PyObject *lmiter_iterkeysnext(PyObject *o)
339 358 {
340 359 Py_ssize_t pl;
341 360 line *l = lmiter_nextline((lmIter *)o);
342 361 if (!l) {
343 362 return NULL;
344 363 }
345 364 pl = pathlen(l);
346 365 return PyBytes_FromStringAndSize(l->start, pl);
347 366 }
348 367
349 368 #ifdef IS_PY3K
350 369 #define LAZYMANIFESTKEYSITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT
351 370 #else
352 371 #define LAZYMANIFESTKEYSITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT \
353 372 | Py_TPFLAGS_HAVE_ITER
354 373 #endif
355 374
356 375 static PyTypeObject lazymanifestKeysIterator = {
357 376 PyVarObject_HEAD_INIT(NULL, 0) /* header */
358 377 "parsers.lazymanifest.keysiterator", /*tp_name */
359 378 sizeof(lmIter), /*tp_basicsize */
360 379 0, /*tp_itemsize */
361 380 lmiter_dealloc, /*tp_dealloc */
362 381 0, /*tp_print */
363 382 0, /*tp_getattr */
364 383 0, /*tp_setattr */
365 384 0, /*tp_compare */
366 385 0, /*tp_repr */
367 386 0, /*tp_as_number */
368 387 0, /*tp_as_sequence */
369 388 0, /*tp_as_mapping */
370 389 0, /*tp_hash */
371 390 0, /*tp_call */
372 391 0, /*tp_str */
373 392 0, /*tp_getattro */
374 393 0, /*tp_setattro */
375 394 0, /*tp_as_buffer */
376 395 LAZYMANIFESTKEYSITERATOR_TPFLAGS, /* tp_flags */
377 396 "Keys iterator for a lazymanifest.", /* tp_doc */
378 397 0, /* tp_traverse */
379 398 0, /* tp_clear */
380 399 0, /* tp_richcompare */
381 400 0, /* tp_weaklistoffset */
382 401 PyObject_SelfIter, /* tp_iter: __iter__() method */
383 402 lmiter_iterkeysnext, /* tp_iternext: next() method */
384 403 };
385 404
386 405 static lazymanifest *lazymanifest_copy(lazymanifest *self);
387 406
388 407 static PyObject *lazymanifest_getentriesiter(lazymanifest *self)
389 408 {
390 409 lmIter *i = NULL;
391 410 lazymanifest *t = lazymanifest_copy(self);
392 411 if (!t) {
393 412 PyErr_NoMemory();
394 413 return NULL;
395 414 }
396 415 i = PyObject_New(lmIter, &lazymanifestEntriesIterator);
397 416 if (i) {
398 417 i->m = t;
399 418 i->pos = -1;
400 419 } else {
401 420 Py_DECREF(t);
402 421 PyErr_NoMemory();
403 422 }
404 423 return (PyObject *)i;
405 424 }
406 425
407 426 static PyObject *lazymanifest_getkeysiter(lazymanifest *self)
408 427 {
409 428 lmIter *i = NULL;
410 429 lazymanifest *t = lazymanifest_copy(self);
411 430 if (!t) {
412 431 PyErr_NoMemory();
413 432 return NULL;
414 433 }
415 434 i = PyObject_New(lmIter, &lazymanifestKeysIterator);
416 435 if (i) {
417 436 i->m = t;
418 437 i->pos = -1;
419 438 } else {
420 439 Py_DECREF(t);
421 440 PyErr_NoMemory();
422 441 }
423 442 return (PyObject *)i;
424 443 }
425 444
426 445 /* __getitem__ and __setitem__ support */
427 446
428 447 static Py_ssize_t lazymanifest_size(lazymanifest *self)
429 448 {
430 449 return self->livelines;
431 450 }
432 451
433 452 static int linecmp(const void *left, const void *right)
434 453 {
435 454 return strcmp(((const line *)left)->start,
436 455 ((const line *)right)->start);
437 456 }
438 457
439 458 static PyObject *lazymanifest_getitem(lazymanifest *self, PyObject *key)
440 459 {
441 460 line needle;
442 461 line *hit;
443 462 if (!PyBytes_Check(key)) {
444 463 PyErr_Format(PyExc_TypeError,
445 464 "getitem: manifest keys must be a string.");
446 465 return NULL;
447 466 }
448 467 needle.start = PyBytes_AsString(key);
449 468 hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
450 469 &linecmp);
451 470 if (!hit || hit->deleted) {
452 471 PyErr_Format(PyExc_KeyError, "No such manifest entry.");
453 472 return NULL;
454 473 }
455 474 return hashflags(hit);
456 475 }
457 476
458 477 static int lazymanifest_delitem(lazymanifest *self, PyObject *key)
459 478 {
460 479 line needle;
461 480 line *hit;
462 481 if (!PyBytes_Check(key)) {
463 482 PyErr_Format(PyExc_TypeError,
464 483 "delitem: manifest keys must be a string.");
465 484 return -1;
466 485 }
467 486 needle.start = PyBytes_AsString(key);
468 487 hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
469 488 &linecmp);
470 489 if (!hit || hit->deleted) {
471 490 PyErr_Format(PyExc_KeyError,
472 491 "Tried to delete nonexistent manifest entry.");
473 492 return -1;
474 493 }
475 494 self->dirty = true;
476 495 hit->deleted = true;
477 496 self->livelines--;
478 497 return 0;
479 498 }
480 499
481 500 /* Do a binary search for the insertion point for new, creating the
482 501 * new entry if needed. */
483 502 static int internalsetitem(lazymanifest *self, line *new)
484 503 {
485 504 int start = 0, end = self->numlines;
486 505 while (start < end) {
487 506 int pos = start + (end - start) / 2;
488 507 int c = linecmp(new, self->lines + pos);
489 508 if (c < 0)
490 509 end = pos;
491 510 else if (c > 0)
492 511 start = pos + 1;
493 512 else {
494 513 if (self->lines[pos].deleted)
495 514 self->livelines++;
496 515 if (self->lines[pos].from_malloc)
497 516 free(self->lines[pos].start);
498 517 start = pos;
499 518 goto finish;
500 519 }
501 520 }
502 521 /* being here means we need to do an insert */
503 522 if (!realloc_if_full(self)) {
504 523 PyErr_NoMemory();
505 524 return -1;
506 525 }
507 526 memmove(self->lines + start + 1, self->lines + start,
508 527 (self->numlines - start) * sizeof(line));
509 528 self->numlines++;
510 529 self->livelines++;
511 530 finish:
512 531 self->lines[start] = *new;
513 532 self->dirty = true;
514 533 return 0;
515 534 }
516 535
517 536 static int lazymanifest_setitem(
518 537 lazymanifest *self, PyObject *key, PyObject *value)
519 538 {
520 539 char *path;
521 540 Py_ssize_t plen;
522 541 PyObject *pyhash;
523 542 Py_ssize_t hlen;
524 543 char *hash;
525 544 PyObject *pyflags;
526 545 char *flags;
527 546 Py_ssize_t flen;
528 547 Py_ssize_t dlen;
529 548 char *dest;
530 549 int i;
531 550 line new;
532 551 if (!PyBytes_Check(key)) {
533 552 PyErr_Format(PyExc_TypeError,
534 553 "setitem: manifest keys must be a string.");
535 554 return -1;
536 555 }
537 556 if (!value) {
538 557 return lazymanifest_delitem(self, key);
539 558 }
540 559 if (!PyTuple_Check(value) || PyTuple_Size(value) != 2) {
541 560 PyErr_Format(PyExc_TypeError,
542 561 "Manifest values must be a tuple of (node, flags).");
543 562 return -1;
544 563 }
545 564 if (PyBytes_AsStringAndSize(key, &path, &plen) == -1) {
546 565 return -1;
547 566 }
548 567
549 568 pyhash = PyTuple_GetItem(value, 0);
550 569 if (!PyBytes_Check(pyhash)) {
551 570 PyErr_Format(PyExc_TypeError,
552 571 "node must be a 20-byte string");
553 572 return -1;
554 573 }
555 574 hlen = PyBytes_Size(pyhash);
556 575 /* Some parts of the codebase try and set 21 or 22
557 576 * byte "hash" values in order to perturb things for
558 577 * status. We have to preserve at least the 21st
559 578 * byte. Sigh. If there's a 22nd byte, we drop it on
560 579 * the floor, which works fine.
561 580 */
562 581 if (hlen != 20 && hlen != 21 && hlen != 22) {
563 582 PyErr_Format(PyExc_TypeError,
564 583 "node must be a 20-byte string");
565 584 return -1;
566 585 }
567 586 hash = PyBytes_AsString(pyhash);
568 587
569 588 pyflags = PyTuple_GetItem(value, 1);
570 589 if (!PyBytes_Check(pyflags) || PyBytes_Size(pyflags) > 1) {
571 590 PyErr_Format(PyExc_TypeError,
572 591 "flags must a 0 or 1 byte string");
573 592 return -1;
574 593 }
575 594 if (PyBytes_AsStringAndSize(pyflags, &flags, &flen) == -1) {
576 595 return -1;
577 596 }
578 597 /* one null byte and one newline */
579 598 dlen = plen + 41 + flen + 1;
580 599 dest = malloc(dlen);
581 600 if (!dest) {
582 601 PyErr_NoMemory();
583 602 return -1;
584 603 }
585 604 memcpy(dest, path, plen + 1);
586 605 for (i = 0; i < 20; i++) {
587 606 /* Cast to unsigned, so it will not get sign-extended when promoted
588 607 * to int (as is done when passing to a variadic function)
589 608 */
590 609 sprintf(dest + plen + 1 + (i * 2), "%02x", (unsigned char)hash[i]);
591 610 }
592 611 memcpy(dest + plen + 41, flags, flen);
593 612 dest[plen + 41 + flen] = '\n';
594 613 new.start = dest;
595 614 new.len = dlen;
596 615 new.hash_suffix = '\0';
597 616 if (hlen > 20) {
598 617 new.hash_suffix = hash[20];
599 618 }
600 619 new.from_malloc = true; /* is `start` a pointer we allocated? */
601 620 new.deleted = false; /* is this entry deleted? */
602 621 if (internalsetitem(self, &new)) {
603 622 return -1;
604 623 }
605 624 return 0;
606 625 }
607 626
608 627 static PyMappingMethods lazymanifest_mapping_methods = {
609 628 (lenfunc)lazymanifest_size, /* mp_length */
610 629 (binaryfunc)lazymanifest_getitem, /* mp_subscript */
611 630 (objobjargproc)lazymanifest_setitem, /* mp_ass_subscript */
612 631 };
613 632
614 633 /* sequence methods (important or __contains__ builds an iterator) */
615 634
616 635 static int lazymanifest_contains(lazymanifest *self, PyObject *key)
617 636 {
618 637 line needle;
619 638 line *hit;
620 639 if (!PyBytes_Check(key)) {
621 640 /* Our keys are always strings, so if the contains
622 641 * check is for a non-string, just return false. */
623 642 return 0;
624 643 }
625 644 needle.start = PyBytes_AsString(key);
626 645 hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
627 646 &linecmp);
628 647 if (!hit || hit->deleted) {
629 648 return 0;
630 649 }
631 650 return 1;
632 651 }
633 652
634 653 static PySequenceMethods lazymanifest_seq_meths = {
635 654 (lenfunc)lazymanifest_size, /* sq_length */
636 655 0, /* sq_concat */
637 656 0, /* sq_repeat */
638 657 0, /* sq_item */
639 658 0, /* sq_slice */
640 659 0, /* sq_ass_item */
641 660 0, /* sq_ass_slice */
642 661 (objobjproc)lazymanifest_contains, /* sq_contains */
643 662 0, /* sq_inplace_concat */
644 663 0, /* sq_inplace_repeat */
645 664 };
646 665
647 666
648 667 /* Other methods (copy, diff, etc) */
649 668 static PyTypeObject lazymanifestType;
650 669
651 670 /* If the manifest has changes, build the new manifest text and reindex it. */
652 671 static int compact(lazymanifest *self)
653 672 {
654 673 int i;
655 674 ssize_t need = 0;
656 675 char *data;
657 676 line *src, *dst;
658 677 PyObject *pydata;
659 678 if (!self->dirty)
660 679 return 0;
661 680 for (i = 0; i < self->numlines; i++) {
662 681 if (!self->lines[i].deleted) {
663 682 need += self->lines[i].len;
664 683 }
665 684 }
666 685 pydata = PyBytes_FromStringAndSize(NULL, need);
667 686 if (!pydata)
668 687 return -1;
669 688 data = PyBytes_AsString(pydata);
670 689 if (!data) {
671 690 return -1;
672 691 }
673 692 src = self->lines;
674 693 dst = self->lines;
675 694 for (i = 0; i < self->numlines; i++, src++) {
676 695 char *tofree = NULL;
677 696 if (src->from_malloc) {
678 697 tofree = src->start;
679 698 }
680 699 if (!src->deleted) {
681 700 memcpy(data, src->start, src->len);
682 701 *dst = *src;
683 702 dst->start = data;
684 703 dst->from_malloc = false;
685 704 data += dst->len;
686 705 dst++;
687 706 }
688 707 free(tofree);
689 708 }
690 709 Py_DECREF(self->pydata);
691 710 self->pydata = pydata;
692 711 self->numlines = self->livelines;
693 712 self->dirty = false;
694 713 return 0;
695 714 }
696 715
697 716 static PyObject *lazymanifest_text(lazymanifest *self)
698 717 {
699 718 if (compact(self) != 0) {
700 719 PyErr_NoMemory();
701 720 return NULL;
702 721 }
703 722 Py_INCREF(self->pydata);
704 723 return self->pydata;
705 724 }
706 725
707 726 static lazymanifest *lazymanifest_copy(lazymanifest *self)
708 727 {
709 728 lazymanifest *copy = NULL;
710 729 if (compact(self) != 0) {
711 730 goto nomem;
712 731 }
713 732 copy = PyObject_New(lazymanifest, &lazymanifestType);
714 733 if (!copy) {
715 734 goto nomem;
716 735 }
717 736 lazymanifest_init_early(copy);
718 737 copy->numlines = self->numlines;
719 738 copy->livelines = self->livelines;
720 739 copy->dirty = false;
721 740 copy->lines = malloc(self->maxlines *sizeof(line));
722 741 if (!copy->lines) {
723 742 goto nomem;
724 743 }
725 744 memcpy(copy->lines, self->lines, self->numlines * sizeof(line));
726 745 copy->maxlines = self->maxlines;
727 746 copy->pydata = self->pydata;
728 747 Py_INCREF(copy->pydata);
729 748 return copy;
730 749 nomem:
731 750 PyErr_NoMemory();
732 751 Py_XDECREF(copy);
733 752 return NULL;
734 753 }
735 754
736 755 static lazymanifest *lazymanifest_filtercopy(
737 756 lazymanifest *self, PyObject *matchfn)
738 757 {
739 758 lazymanifest *copy = NULL;
740 759 int i;
741 760 if (!PyCallable_Check(matchfn)) {
742 761 PyErr_SetString(PyExc_TypeError, "matchfn must be callable");
743 762 return NULL;
744 763 }
745 764 /* compact ourselves first to avoid double-frees later when we
746 765 * compact tmp so that it doesn't have random pointers to our
747 766 * underlying from_malloc-data (self->pydata is safe) */
748 767 if (compact(self) != 0) {
749 768 goto nomem;
750 769 }
751 770 copy = PyObject_New(lazymanifest, &lazymanifestType);
752 771 if (!copy) {
753 772 goto nomem;
754 773 }
755 774 lazymanifest_init_early(copy);
756 775 copy->dirty = true;
757 776 copy->lines = malloc(self->maxlines * sizeof(line));
758 777 if (!copy->lines) {
759 778 goto nomem;
760 779 }
761 780 copy->maxlines = self->maxlines;
762 781 copy->numlines = 0;
763 782 copy->pydata = self->pydata;
764 783 Py_INCREF(copy->pydata);
765 784 for (i = 0; i < self->numlines; i++) {
766 785 PyObject *arglist = NULL, *result = NULL;
767 786 arglist = Py_BuildValue(PY23("(s)", "(y)"),
768 787 self->lines[i].start);
769 788 if (!arglist) {
770 789 goto bail;
771 790 }
772 791 result = PyObject_CallObject(matchfn, arglist);
773 792 Py_DECREF(arglist);
774 793 /* if the callback raised an exception, just let it
775 794 * through and give up */
776 795 if (!result) {
777 796 goto bail;
778 797 }
779 798 if (PyObject_IsTrue(result)) {
780 799 assert(!(self->lines[i].from_malloc));
781 800 copy->lines[copy->numlines++] = self->lines[i];
782 801 }
783 802 Py_DECREF(result);
784 803 }
785 804 copy->livelines = copy->numlines;
786 805 return copy;
787 806 nomem:
788 807 PyErr_NoMemory();
789 808 bail:
790 809 Py_XDECREF(copy);
791 810 return NULL;
792 811 }
793 812
794 813 static PyObject *lazymanifest_diff(lazymanifest *self, PyObject *args)
795 814 {
796 815 lazymanifest *other;
797 816 PyObject *pyclean = NULL;
798 817 bool listclean;
799 818 PyObject *emptyTup = NULL, *ret = NULL;
800 819 PyObject *es;
801 820 int sneedle = 0, oneedle = 0;
802 821 if (!PyArg_ParseTuple(args, "O!|O", &lazymanifestType, &other, &pyclean)) {
803 822 return NULL;
804 823 }
805 824 listclean = (!pyclean) ? false : PyObject_IsTrue(pyclean);
806 825 es = PyBytes_FromString("");
807 826 if (!es) {
808 827 goto nomem;
809 828 }
810 829 emptyTup = PyTuple_Pack(2, Py_None, es);
811 830 Py_DECREF(es);
812 831 if (!emptyTup) {
813 832 goto nomem;
814 833 }
815 834 ret = PyDict_New();
816 835 if (!ret) {
817 836 goto nomem;
818 837 }
819 838 while (sneedle != self->numlines || oneedle != other->numlines) {
820 839 line *left = self->lines + sneedle;
821 840 line *right = other->lines + oneedle;
822 841 int result;
823 842 PyObject *key;
824 843 PyObject *outer;
825 844 /* If we're looking at a deleted entry and it's not
826 845 * the end of the manifest, just skip it. */
827 846 if (sneedle < self->numlines && left->deleted) {
828 847 sneedle++;
829 848 continue;
830 849 }
831 850 if (oneedle < other->numlines && right->deleted) {
832 851 oneedle++;
833 852 continue;
834 853 }
835 854 /* if we're at the end of either manifest, then we
836 855 * know the remaining items are adds so we can skip
837 856 * the strcmp. */
838 857 if (sneedle == self->numlines) {
839 858 result = 1;
840 859 } else if (oneedle == other->numlines) {
841 860 result = -1;
842 861 } else {
843 862 result = linecmp(left, right);
844 863 }
845 864 key = result <= 0 ?
846 865 PyBytes_FromString(left->start) :
847 866 PyBytes_FromString(right->start);
848 867 if (!key)
849 868 goto nomem;
850 869 if (result < 0) {
851 870 PyObject *l = hashflags(left);
852 871 if (!l) {
853 872 goto nomem;
854 873 }
855 874 outer = PyTuple_Pack(2, l, emptyTup);
856 875 Py_DECREF(l);
857 876 if (!outer) {
858 877 goto nomem;
859 878 }
860 879 PyDict_SetItem(ret, key, outer);
861 880 Py_DECREF(outer);
862 881 sneedle++;
863 882 } else if (result > 0) {
864 883 PyObject *r = hashflags(right);
865 884 if (!r) {
866 885 goto nomem;
867 886 }
868 887 outer = PyTuple_Pack(2, emptyTup, r);
869 888 Py_DECREF(r);
870 889 if (!outer) {
871 890 goto nomem;
872 891 }
873 892 PyDict_SetItem(ret, key, outer);
874 893 Py_DECREF(outer);
875 894 oneedle++;
876 895 } else {
877 896 /* file exists in both manifests */
878 897 if (left->len != right->len
879 898 || memcmp(left->start, right->start, left->len)
880 899 || left->hash_suffix != right->hash_suffix) {
881 900 PyObject *l = hashflags(left);
882 901 PyObject *r;
883 902 if (!l) {
884 903 goto nomem;
885 904 }
886 905 r = hashflags(right);
887 906 if (!r) {
888 907 Py_DECREF(l);
889 908 goto nomem;
890 909 }
891 910 outer = PyTuple_Pack(2, l, r);
892 911 Py_DECREF(l);
893 912 Py_DECREF(r);
894 913 if (!outer) {
895 914 goto nomem;
896 915 }
897 916 PyDict_SetItem(ret, key, outer);
898 917 Py_DECREF(outer);
899 918 } else if (listclean) {
900 919 PyDict_SetItem(ret, key, Py_None);
901 920 }
902 921 sneedle++;
903 922 oneedle++;
904 923 }
905 924 Py_DECREF(key);
906 925 }
907 926 Py_DECREF(emptyTup);
908 927 return ret;
909 928 nomem:
910 929 PyErr_NoMemory();
911 930 Py_XDECREF(ret);
912 931 Py_XDECREF(emptyTup);
913 932 return NULL;
914 933 }
915 934
916 935 static PyMethodDef lazymanifest_methods[] = {
917 936 {"iterkeys", (PyCFunction)lazymanifest_getkeysiter, METH_NOARGS,
918 937 "Iterate over file names in this lazymanifest."},
919 938 {"iterentries", (PyCFunction)lazymanifest_getentriesiter, METH_NOARGS,
920 939 "Iterate over (path, nodeid, flags) tuples in this lazymanifest."},
921 940 {"copy", (PyCFunction)lazymanifest_copy, METH_NOARGS,
922 941 "Make a copy of this lazymanifest."},
923 942 {"filtercopy", (PyCFunction)lazymanifest_filtercopy, METH_O,
924 943 "Make a copy of this manifest filtered by matchfn."},
925 944 {"diff", (PyCFunction)lazymanifest_diff, METH_VARARGS,
926 945 "Compare this lazymanifest to another one."},
927 946 {"text", (PyCFunction)lazymanifest_text, METH_NOARGS,
928 947 "Encode this manifest to text."},
929 948 {NULL},
930 949 };
931 950
932 951 #ifdef IS_PY3K
933 952 #define LAZYMANIFEST_TPFLAGS Py_TPFLAGS_DEFAULT
934 953 #else
935 954 #define LAZYMANIFEST_TPFLAGS Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_SEQUENCE_IN
936 955 #endif
937 956
938 957 static PyTypeObject lazymanifestType = {
939 958 PyVarObject_HEAD_INIT(NULL, 0) /* header */
940 959 "parsers.lazymanifest", /* tp_name */
941 960 sizeof(lazymanifest), /* tp_basicsize */
942 961 0, /* tp_itemsize */
943 962 (destructor)lazymanifest_dealloc, /* tp_dealloc */
944 963 0, /* tp_print */
945 964 0, /* tp_getattr */
946 965 0, /* tp_setattr */
947 966 0, /* tp_compare */
948 967 0, /* tp_repr */
949 968 0, /* tp_as_number */
950 969 &lazymanifest_seq_meths, /* tp_as_sequence */
951 970 &lazymanifest_mapping_methods, /* tp_as_mapping */
952 971 0, /* tp_hash */
953 972 0, /* tp_call */
954 973 0, /* tp_str */
955 974 0, /* tp_getattro */
956 975 0, /* tp_setattro */
957 976 0, /* tp_as_buffer */
958 977 LAZYMANIFEST_TPFLAGS, /* tp_flags */
959 978 "TODO(augie)", /* tp_doc */
960 979 0, /* tp_traverse */
961 980 0, /* tp_clear */
962 981 0, /* tp_richcompare */
963 982 0, /* tp_weaklistoffset */
964 983 (getiterfunc)lazymanifest_getkeysiter, /* tp_iter */
965 984 0, /* tp_iternext */
966 985 lazymanifest_methods, /* tp_methods */
967 986 0, /* tp_members */
968 987 0, /* tp_getset */
969 988 0, /* tp_base */
970 989 0, /* tp_dict */
971 990 0, /* tp_descr_get */
972 991 0, /* tp_descr_set */
973 992 0, /* tp_dictoffset */
974 993 (initproc)lazymanifest_init, /* tp_init */
975 994 0, /* tp_alloc */
976 995 };
977 996
978 997 void manifest_module_init(PyObject * mod)
979 998 {
980 999 lazymanifestType.tp_new = PyType_GenericNew;
981 1000 if (PyType_Ready(&lazymanifestType) < 0)
982 1001 return;
983 1002 Py_INCREF(&lazymanifestType);
984 1003
985 1004 PyModule_AddObject(mod, "lazymanifest",
986 1005 (PyObject *)&lazymanifestType);
987 1006 }
General Comments 0
You need to be logged in to leave comments. Login now