##// END OF EJS Templates
manifest: make sure there's a filename before bothering to look for newline...
Augie Fackler -
r40635:9eeda719 default
parent child Browse files
Show More
@@ -1,960 +1,972
1 1 /*
2 2 * manifest.c - manifest type that does on-demand parsing.
3 3 *
4 4 * Copyright 2015, Google Inc.
5 5 *
6 6 * This software may be used and distributed according to the terms of
7 7 * the GNU General Public License, incorporated herein by reference.
8 8 */
9 9 #include <Python.h>
10 10
11 11 #include <assert.h>
12 12 #include <stdlib.h>
13 13 #include <string.h>
14 14
15 15 #include "charencode.h"
16 16 #include "util.h"
17 17
18 18 #define DEFAULT_LINES 100000
19 19
20 20 typedef struct {
21 21 char *start;
22 22 Py_ssize_t len; /* length of line including terminal newline */
23 23 char hash_suffix;
24 24 bool from_malloc;
25 25 bool deleted;
26 26 } line;
27 27
28 28 typedef struct {
29 29 PyObject_HEAD
30 30 PyObject *pydata;
31 31 line *lines;
32 32 int numlines; /* number of line entries */
33 33 int livelines; /* number of non-deleted lines */
34 34 int maxlines; /* allocated number of lines */
35 35 bool dirty;
36 36 } lazymanifest;
37 37
38 38 #define MANIFEST_OOM -1
39 39 #define MANIFEST_NOT_SORTED -2
40 40 #define MANIFEST_MALFORMED -3
41 #define MANIFEST_BOGUS_FILENAME -4
41 42
42 43 /* get the length of the path for a line */
43 44 static size_t pathlen(line *l)
44 45 {
45 46 const char *end = memchr(l->start, '\0', l->len);
46 47 return (end) ? (size_t)(end - l->start) : l->len;
47 48 }
48 49
49 50 /* get the node value of a single line */
50 51 static PyObject *nodeof(line *l)
51 52 {
52 53 char *s = l->start;
53 54 ssize_t llen = pathlen(l);
54 55 PyObject *hash;
55 56 if (llen + 1 + 40 + 1 > l->len) { /* path '\0' hash '\n' */
56 57 PyErr_SetString(PyExc_ValueError, "manifest line too short");
57 58 return NULL;
58 59 }
59 60 hash = unhexlify(s + llen + 1, 40);
60 61 if (!hash) {
61 62 return NULL;
62 63 }
63 64 if (l->hash_suffix != '\0') {
64 65 char newhash[21];
65 66 memcpy(newhash, PyBytes_AsString(hash), 20);
66 67 Py_DECREF(hash);
67 68 newhash[20] = l->hash_suffix;
68 69 hash = PyBytes_FromStringAndSize(newhash, 21);
69 70 }
70 71 return hash;
71 72 }
72 73
73 74 /* get the node hash and flags of a line as a tuple */
74 75 static PyObject *hashflags(line *l)
75 76 {
76 77 char *s = l->start;
77 78 size_t plen = pathlen(l);
78 79 PyObject *hash = nodeof(l);
79 80
80 81 /* 40 for hash, 1 for null byte, 1 for newline */
81 82 size_t hplen = plen + 42;
82 83 Py_ssize_t flen = l->len - hplen;
83 84 PyObject *flags;
84 85 PyObject *tup;
85 86
86 87 if (!hash)
87 88 return NULL;
88 89 flags = PyBytes_FromStringAndSize(s + hplen - 1, flen);
89 90 if (!flags) {
90 91 Py_DECREF(hash);
91 92 return NULL;
92 93 }
93 94 tup = PyTuple_Pack(2, hash, flags);
94 95 Py_DECREF(flags);
95 96 Py_DECREF(hash);
96 97 return tup;
97 98 }
98 99
99 100 /* if we're about to run out of space in the line index, add more */
100 101 static bool realloc_if_full(lazymanifest *self)
101 102 {
102 103 if (self->numlines == self->maxlines) {
103 104 self->maxlines *= 2;
104 105 self->lines = realloc(self->lines, self->maxlines * sizeof(line));
105 106 }
106 107 return !!self->lines;
107 108 }
108 109
109 110 /*
110 111 * Find the line boundaries in the manifest that 'data' points to and store
111 112 * information about each line in 'self'.
112 113 */
113 114 static int find_lines(lazymanifest *self, char *data, Py_ssize_t len)
114 115 {
115 116 char *prev = NULL;
116 117 while (len > 0) {
117 118 line *l;
118 char *next = memchr(data, '\n', len);
119 char *next;
120 if (*data == '\0') {
121 /* It's implausible there's no filename, don't
122 * even bother looking for the newline. */
123 return MANIFEST_BOGUS_FILENAME;
124 }
125 next = memchr(data, '\n', len);
119 126 if (!next) {
120 127 return MANIFEST_MALFORMED;
121 128 }
122 129 next++; /* advance past newline */
123 130 if (!realloc_if_full(self)) {
124 131 return MANIFEST_OOM; /* no memory */
125 132 }
126 133 if (prev && strcmp(prev, data) > -1) {
127 134 /* This data isn't sorted, so we have to abort. */
128 135 return MANIFEST_NOT_SORTED;
129 136 }
130 137 l = self->lines + ((self->numlines)++);
131 138 l->start = data;
132 139 l->len = next - data;
133 140 l->hash_suffix = '\0';
134 141 l->from_malloc = false;
135 142 l->deleted = false;
136 143 len = len - l->len;
137 144 prev = data;
138 145 data = next;
139 146 }
140 147 self->livelines = self->numlines;
141 148 return 0;
142 149 }
143 150
144 151 static void lazymanifest_init_early(lazymanifest *self)
145 152 {
146 153 self->pydata = NULL;
147 154 self->lines = NULL;
148 155 self->numlines = 0;
149 156 self->maxlines = 0;
150 157 }
151 158
152 159 static int lazymanifest_init(lazymanifest *self, PyObject *args)
153 160 {
154 161 char *data;
155 162 Py_ssize_t len;
156 163 int err, ret;
157 164 PyObject *pydata;
158 165
159 166 lazymanifest_init_early(self);
160 167 if (!PyArg_ParseTuple(args, "S", &pydata)) {
161 168 return -1;
162 169 }
163 170 err = PyBytes_AsStringAndSize(pydata, &data, &len);
164 171
165 172 self->dirty = false;
166 173 if (err == -1)
167 174 return -1;
168 175 self->pydata = pydata;
169 176 Py_INCREF(self->pydata);
170 177 Py_BEGIN_ALLOW_THREADS
171 178 self->lines = malloc(DEFAULT_LINES * sizeof(line));
172 179 self->maxlines = DEFAULT_LINES;
173 180 self->numlines = 0;
174 181 if (!self->lines)
175 182 ret = MANIFEST_OOM;
176 183 else
177 184 ret = find_lines(self, data, len);
178 185 Py_END_ALLOW_THREADS
179 186 switch (ret) {
180 187 case 0:
181 188 break;
182 189 case MANIFEST_OOM:
183 190 PyErr_NoMemory();
184 191 break;
185 192 case MANIFEST_NOT_SORTED:
186 193 PyErr_Format(PyExc_ValueError,
187 194 "Manifest lines not in sorted order.");
188 195 break;
189 196 case MANIFEST_MALFORMED:
190 197 PyErr_Format(PyExc_ValueError,
191 198 "Manifest did not end in a newline.");
192 199 break;
200 case MANIFEST_BOGUS_FILENAME:
201 PyErr_Format(
202 PyExc_ValueError,
203 "Manifest had an entry with a zero-length filename.");
204 break;
193 205 default:
194 206 PyErr_Format(PyExc_ValueError,
195 207 "Unknown problem parsing manifest.");
196 208 }
197 209 return ret == 0 ? 0 : -1;
198 210 }
199 211
200 212 static void lazymanifest_dealloc(lazymanifest *self)
201 213 {
202 214 /* free any extra lines we had to allocate */
203 215 int i;
204 216 for (i = 0; self->lines && (i < self->numlines); i++) {
205 217 if (self->lines[i].from_malloc) {
206 218 free(self->lines[i].start);
207 219 }
208 220 }
209 221 free(self->lines);
210 222 self->lines = NULL;
211 223 if (self->pydata) {
212 224 Py_DECREF(self->pydata);
213 225 self->pydata = NULL;
214 226 }
215 227 PyObject_Del(self);
216 228 }
217 229
218 230 /* iteration support */
219 231
220 232 typedef struct {
221 233 PyObject_HEAD lazymanifest *m;
222 234 Py_ssize_t pos;
223 235 } lmIter;
224 236
225 237 static void lmiter_dealloc(PyObject *o)
226 238 {
227 239 lmIter *self = (lmIter *)o;
228 240 Py_DECREF(self->m);
229 241 PyObject_Del(self);
230 242 }
231 243
232 244 static line *lmiter_nextline(lmIter *self)
233 245 {
234 246 do {
235 247 self->pos++;
236 248 if (self->pos >= self->m->numlines) {
237 249 return NULL;
238 250 }
239 251 /* skip over deleted manifest entries */
240 252 } while (self->m->lines[self->pos].deleted);
241 253 return self->m->lines + self->pos;
242 254 }
243 255
244 256 static PyObject *lmiter_iterentriesnext(PyObject *o)
245 257 {
246 258 size_t pl;
247 259 line *l;
248 260 Py_ssize_t consumed;
249 261 PyObject *ret = NULL, *path = NULL, *hash = NULL, *flags = NULL;
250 262 l = lmiter_nextline((lmIter *)o);
251 263 if (!l) {
252 264 goto done;
253 265 }
254 266 pl = pathlen(l);
255 267 path = PyBytes_FromStringAndSize(l->start, pl);
256 268 hash = nodeof(l);
257 269 if (!path || !hash) {
258 270 goto done;
259 271 }
260 272 consumed = pl + 41;
261 273 flags = PyBytes_FromStringAndSize(l->start + consumed,
262 274 l->len - consumed - 1);
263 275 if (!flags) {
264 276 goto done;
265 277 }
266 278 ret = PyTuple_Pack(3, path, hash, flags);
267 279 done:
268 280 Py_XDECREF(path);
269 281 Py_XDECREF(hash);
270 282 Py_XDECREF(flags);
271 283 return ret;
272 284 }
273 285
274 286 #ifdef IS_PY3K
275 287 #define LAZYMANIFESTENTRIESITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT
276 288 #else
277 289 #define LAZYMANIFESTENTRIESITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT \
278 290 | Py_TPFLAGS_HAVE_ITER
279 291 #endif
280 292
281 293 static PyTypeObject lazymanifestEntriesIterator = {
282 294 PyVarObject_HEAD_INIT(NULL, 0) /* header */
283 295 "parsers.lazymanifest.entriesiterator", /*tp_name */
284 296 sizeof(lmIter), /*tp_basicsize */
285 297 0, /*tp_itemsize */
286 298 lmiter_dealloc, /*tp_dealloc */
287 299 0, /*tp_print */
288 300 0, /*tp_getattr */
289 301 0, /*tp_setattr */
290 302 0, /*tp_compare */
291 303 0, /*tp_repr */
292 304 0, /*tp_as_number */
293 305 0, /*tp_as_sequence */
294 306 0, /*tp_as_mapping */
295 307 0, /*tp_hash */
296 308 0, /*tp_call */
297 309 0, /*tp_str */
298 310 0, /*tp_getattro */
299 311 0, /*tp_setattro */
300 312 0, /*tp_as_buffer */
301 313 LAZYMANIFESTENTRIESITERATOR_TPFLAGS, /* tp_flags */
302 314 "Iterator for 3-tuples in a lazymanifest.", /* tp_doc */
303 315 0, /* tp_traverse */
304 316 0, /* tp_clear */
305 317 0, /* tp_richcompare */
306 318 0, /* tp_weaklistoffset */
307 319 PyObject_SelfIter, /* tp_iter: __iter__() method */
308 320 lmiter_iterentriesnext, /* tp_iternext: next() method */
309 321 };
310 322
311 323 static PyObject *lmiter_iterkeysnext(PyObject *o)
312 324 {
313 325 size_t pl;
314 326 line *l = lmiter_nextline((lmIter *)o);
315 327 if (!l) {
316 328 return NULL;
317 329 }
318 330 pl = pathlen(l);
319 331 return PyBytes_FromStringAndSize(l->start, pl);
320 332 }
321 333
322 334 #ifdef IS_PY3K
323 335 #define LAZYMANIFESTKEYSITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT
324 336 #else
325 337 #define LAZYMANIFESTKEYSITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT \
326 338 | Py_TPFLAGS_HAVE_ITER
327 339 #endif
328 340
329 341 static PyTypeObject lazymanifestKeysIterator = {
330 342 PyVarObject_HEAD_INIT(NULL, 0) /* header */
331 343 "parsers.lazymanifest.keysiterator", /*tp_name */
332 344 sizeof(lmIter), /*tp_basicsize */
333 345 0, /*tp_itemsize */
334 346 lmiter_dealloc, /*tp_dealloc */
335 347 0, /*tp_print */
336 348 0, /*tp_getattr */
337 349 0, /*tp_setattr */
338 350 0, /*tp_compare */
339 351 0, /*tp_repr */
340 352 0, /*tp_as_number */
341 353 0, /*tp_as_sequence */
342 354 0, /*tp_as_mapping */
343 355 0, /*tp_hash */
344 356 0, /*tp_call */
345 357 0, /*tp_str */
346 358 0, /*tp_getattro */
347 359 0, /*tp_setattro */
348 360 0, /*tp_as_buffer */
349 361 LAZYMANIFESTKEYSITERATOR_TPFLAGS, /* tp_flags */
350 362 "Keys iterator for a lazymanifest.", /* tp_doc */
351 363 0, /* tp_traverse */
352 364 0, /* tp_clear */
353 365 0, /* tp_richcompare */
354 366 0, /* tp_weaklistoffset */
355 367 PyObject_SelfIter, /* tp_iter: __iter__() method */
356 368 lmiter_iterkeysnext, /* tp_iternext: next() method */
357 369 };
358 370
359 371 static lazymanifest *lazymanifest_copy(lazymanifest *self);
360 372
361 373 static PyObject *lazymanifest_getentriesiter(lazymanifest *self)
362 374 {
363 375 lmIter *i = NULL;
364 376 lazymanifest *t = lazymanifest_copy(self);
365 377 if (!t) {
366 378 PyErr_NoMemory();
367 379 return NULL;
368 380 }
369 381 i = PyObject_New(lmIter, &lazymanifestEntriesIterator);
370 382 if (i) {
371 383 i->m = t;
372 384 i->pos = -1;
373 385 } else {
374 386 Py_DECREF(t);
375 387 PyErr_NoMemory();
376 388 }
377 389 return (PyObject *)i;
378 390 }
379 391
380 392 static PyObject *lazymanifest_getkeysiter(lazymanifest *self)
381 393 {
382 394 lmIter *i = NULL;
383 395 lazymanifest *t = lazymanifest_copy(self);
384 396 if (!t) {
385 397 PyErr_NoMemory();
386 398 return NULL;
387 399 }
388 400 i = PyObject_New(lmIter, &lazymanifestKeysIterator);
389 401 if (i) {
390 402 i->m = t;
391 403 i->pos = -1;
392 404 } else {
393 405 Py_DECREF(t);
394 406 PyErr_NoMemory();
395 407 }
396 408 return (PyObject *)i;
397 409 }
398 410
399 411 /* __getitem__ and __setitem__ support */
400 412
401 413 static Py_ssize_t lazymanifest_size(lazymanifest *self)
402 414 {
403 415 return self->livelines;
404 416 }
405 417
406 418 static int linecmp(const void *left, const void *right)
407 419 {
408 420 return strcmp(((const line *)left)->start,
409 421 ((const line *)right)->start);
410 422 }
411 423
412 424 static PyObject *lazymanifest_getitem(lazymanifest *self, PyObject *key)
413 425 {
414 426 line needle;
415 427 line *hit;
416 428 if (!PyBytes_Check(key)) {
417 429 PyErr_Format(PyExc_TypeError,
418 430 "getitem: manifest keys must be a string.");
419 431 return NULL;
420 432 }
421 433 needle.start = PyBytes_AsString(key);
422 434 hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
423 435 &linecmp);
424 436 if (!hit || hit->deleted) {
425 437 PyErr_Format(PyExc_KeyError, "No such manifest entry.");
426 438 return NULL;
427 439 }
428 440 return hashflags(hit);
429 441 }
430 442
431 443 static int lazymanifest_delitem(lazymanifest *self, PyObject *key)
432 444 {
433 445 line needle;
434 446 line *hit;
435 447 if (!PyBytes_Check(key)) {
436 448 PyErr_Format(PyExc_TypeError,
437 449 "delitem: manifest keys must be a string.");
438 450 return -1;
439 451 }
440 452 needle.start = PyBytes_AsString(key);
441 453 hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
442 454 &linecmp);
443 455 if (!hit || hit->deleted) {
444 456 PyErr_Format(PyExc_KeyError,
445 457 "Tried to delete nonexistent manifest entry.");
446 458 return -1;
447 459 }
448 460 self->dirty = true;
449 461 hit->deleted = true;
450 462 self->livelines--;
451 463 return 0;
452 464 }
453 465
454 466 /* Do a binary search for the insertion point for new, creating the
455 467 * new entry if needed. */
456 468 static int internalsetitem(lazymanifest *self, line *new)
457 469 {
458 470 int start = 0, end = self->numlines;
459 471 while (start < end) {
460 472 int pos = start + (end - start) / 2;
461 473 int c = linecmp(new, self->lines + pos);
462 474 if (c < 0)
463 475 end = pos;
464 476 else if (c > 0)
465 477 start = pos + 1;
466 478 else {
467 479 if (self->lines[pos].deleted)
468 480 self->livelines++;
469 481 if (self->lines[pos].from_malloc)
470 482 free(self->lines[pos].start);
471 483 start = pos;
472 484 goto finish;
473 485 }
474 486 }
475 487 /* being here means we need to do an insert */
476 488 if (!realloc_if_full(self)) {
477 489 PyErr_NoMemory();
478 490 return -1;
479 491 }
480 492 memmove(self->lines + start + 1, self->lines + start,
481 493 (self->numlines - start) * sizeof(line));
482 494 self->numlines++;
483 495 self->livelines++;
484 496 finish:
485 497 self->lines[start] = *new;
486 498 self->dirty = true;
487 499 return 0;
488 500 }
489 501
490 502 static int lazymanifest_setitem(
491 503 lazymanifest *self, PyObject *key, PyObject *value)
492 504 {
493 505 char *path;
494 506 Py_ssize_t plen;
495 507 PyObject *pyhash;
496 508 Py_ssize_t hlen;
497 509 char *hash;
498 510 PyObject *pyflags;
499 511 char *flags;
500 512 Py_ssize_t flen;
501 513 size_t dlen;
502 514 char *dest;
503 515 int i;
504 516 line new;
505 517 if (!PyBytes_Check(key)) {
506 518 PyErr_Format(PyExc_TypeError,
507 519 "setitem: manifest keys must be a string.");
508 520 return -1;
509 521 }
510 522 if (!value) {
511 523 return lazymanifest_delitem(self, key);
512 524 }
513 525 if (!PyTuple_Check(value) || PyTuple_Size(value) != 2) {
514 526 PyErr_Format(PyExc_TypeError,
515 527 "Manifest values must be a tuple of (node, flags).");
516 528 return -1;
517 529 }
518 530 if (PyBytes_AsStringAndSize(key, &path, &plen) == -1) {
519 531 return -1;
520 532 }
521 533
522 534 pyhash = PyTuple_GetItem(value, 0);
523 535 if (!PyBytes_Check(pyhash)) {
524 536 PyErr_Format(PyExc_TypeError,
525 537 "node must be a 20-byte string");
526 538 return -1;
527 539 }
528 540 hlen = PyBytes_Size(pyhash);
529 541 /* Some parts of the codebase try and set 21 or 22
530 542 * byte "hash" values in order to perturb things for
531 543 * status. We have to preserve at least the 21st
532 544 * byte. Sigh. If there's a 22nd byte, we drop it on
533 545 * the floor, which works fine.
534 546 */
535 547 if (hlen != 20 && hlen != 21 && hlen != 22) {
536 548 PyErr_Format(PyExc_TypeError,
537 549 "node must be a 20-byte string");
538 550 return -1;
539 551 }
540 552 hash = PyBytes_AsString(pyhash);
541 553
542 554 pyflags = PyTuple_GetItem(value, 1);
543 555 if (!PyBytes_Check(pyflags) || PyBytes_Size(pyflags) > 1) {
544 556 PyErr_Format(PyExc_TypeError,
545 557 "flags must a 0 or 1 byte string");
546 558 return -1;
547 559 }
548 560 if (PyBytes_AsStringAndSize(pyflags, &flags, &flen) == -1) {
549 561 return -1;
550 562 }
551 563 /* one null byte and one newline */
552 564 dlen = plen + 41 + flen + 1;
553 565 dest = malloc(dlen);
554 566 if (!dest) {
555 567 PyErr_NoMemory();
556 568 return -1;
557 569 }
558 570 memcpy(dest, path, plen + 1);
559 571 for (i = 0; i < 20; i++) {
560 572 /* Cast to unsigned, so it will not get sign-extended when promoted
561 573 * to int (as is done when passing to a variadic function)
562 574 */
563 575 sprintf(dest + plen + 1 + (i * 2), "%02x", (unsigned char)hash[i]);
564 576 }
565 577 memcpy(dest + plen + 41, flags, flen);
566 578 dest[plen + 41 + flen] = '\n';
567 579 new.start = dest;
568 580 new.len = dlen;
569 581 new.hash_suffix = '\0';
570 582 if (hlen > 20) {
571 583 new.hash_suffix = hash[20];
572 584 }
573 585 new.from_malloc = true; /* is `start` a pointer we allocated? */
574 586 new.deleted = false; /* is this entry deleted? */
575 587 if (internalsetitem(self, &new)) {
576 588 return -1;
577 589 }
578 590 return 0;
579 591 }
580 592
581 593 static PyMappingMethods lazymanifest_mapping_methods = {
582 594 (lenfunc)lazymanifest_size, /* mp_length */
583 595 (binaryfunc)lazymanifest_getitem, /* mp_subscript */
584 596 (objobjargproc)lazymanifest_setitem, /* mp_ass_subscript */
585 597 };
586 598
587 599 /* sequence methods (important or __contains__ builds an iterator) */
588 600
589 601 static int lazymanifest_contains(lazymanifest *self, PyObject *key)
590 602 {
591 603 line needle;
592 604 line *hit;
593 605 if (!PyBytes_Check(key)) {
594 606 /* Our keys are always strings, so if the contains
595 607 * check is for a non-string, just return false. */
596 608 return 0;
597 609 }
598 610 needle.start = PyBytes_AsString(key);
599 611 hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
600 612 &linecmp);
601 613 if (!hit || hit->deleted) {
602 614 return 0;
603 615 }
604 616 return 1;
605 617 }
606 618
607 619 static PySequenceMethods lazymanifest_seq_meths = {
608 620 (lenfunc)lazymanifest_size, /* sq_length */
609 621 0, /* sq_concat */
610 622 0, /* sq_repeat */
611 623 0, /* sq_item */
612 624 0, /* sq_slice */
613 625 0, /* sq_ass_item */
614 626 0, /* sq_ass_slice */
615 627 (objobjproc)lazymanifest_contains, /* sq_contains */
616 628 0, /* sq_inplace_concat */
617 629 0, /* sq_inplace_repeat */
618 630 };
619 631
620 632
621 633 /* Other methods (copy, diff, etc) */
622 634 static PyTypeObject lazymanifestType;
623 635
624 636 /* If the manifest has changes, build the new manifest text and reindex it. */
625 637 static int compact(lazymanifest *self)
626 638 {
627 639 int i;
628 640 ssize_t need = 0;
629 641 char *data;
630 642 line *src, *dst;
631 643 PyObject *pydata;
632 644 if (!self->dirty)
633 645 return 0;
634 646 for (i = 0; i < self->numlines; i++) {
635 647 if (!self->lines[i].deleted) {
636 648 need += self->lines[i].len;
637 649 }
638 650 }
639 651 pydata = PyBytes_FromStringAndSize(NULL, need);
640 652 if (!pydata)
641 653 return -1;
642 654 data = PyBytes_AsString(pydata);
643 655 if (!data) {
644 656 return -1;
645 657 }
646 658 src = self->lines;
647 659 dst = self->lines;
648 660 for (i = 0; i < self->numlines; i++, src++) {
649 661 char *tofree = NULL;
650 662 if (src->from_malloc) {
651 663 tofree = src->start;
652 664 }
653 665 if (!src->deleted) {
654 666 memcpy(data, src->start, src->len);
655 667 *dst = *src;
656 668 dst->start = data;
657 669 dst->from_malloc = false;
658 670 data += dst->len;
659 671 dst++;
660 672 }
661 673 free(tofree);
662 674 }
663 675 Py_DECREF(self->pydata);
664 676 self->pydata = pydata;
665 677 self->numlines = self->livelines;
666 678 self->dirty = false;
667 679 return 0;
668 680 }
669 681
670 682 static PyObject *lazymanifest_text(lazymanifest *self)
671 683 {
672 684 if (compact(self) != 0) {
673 685 PyErr_NoMemory();
674 686 return NULL;
675 687 }
676 688 Py_INCREF(self->pydata);
677 689 return self->pydata;
678 690 }
679 691
680 692 static lazymanifest *lazymanifest_copy(lazymanifest *self)
681 693 {
682 694 lazymanifest *copy = NULL;
683 695 if (compact(self) != 0) {
684 696 goto nomem;
685 697 }
686 698 copy = PyObject_New(lazymanifest, &lazymanifestType);
687 699 if (!copy) {
688 700 goto nomem;
689 701 }
690 702 lazymanifest_init_early(copy);
691 703 copy->numlines = self->numlines;
692 704 copy->livelines = self->livelines;
693 705 copy->dirty = false;
694 706 copy->lines = malloc(self->maxlines *sizeof(line));
695 707 if (!copy->lines) {
696 708 goto nomem;
697 709 }
698 710 memcpy(copy->lines, self->lines, self->numlines * sizeof(line));
699 711 copy->maxlines = self->maxlines;
700 712 copy->pydata = self->pydata;
701 713 Py_INCREF(copy->pydata);
702 714 return copy;
703 715 nomem:
704 716 PyErr_NoMemory();
705 717 Py_XDECREF(copy);
706 718 return NULL;
707 719 }
708 720
709 721 static lazymanifest *lazymanifest_filtercopy(
710 722 lazymanifest *self, PyObject *matchfn)
711 723 {
712 724 lazymanifest *copy = NULL;
713 725 int i;
714 726 if (!PyCallable_Check(matchfn)) {
715 727 PyErr_SetString(PyExc_TypeError, "matchfn must be callable");
716 728 return NULL;
717 729 }
718 730 /* compact ourselves first to avoid double-frees later when we
719 731 * compact tmp so that it doesn't have random pointers to our
720 732 * underlying from_malloc-data (self->pydata is safe) */
721 733 if (compact(self) != 0) {
722 734 goto nomem;
723 735 }
724 736 copy = PyObject_New(lazymanifest, &lazymanifestType);
725 737 if (!copy) {
726 738 goto nomem;
727 739 }
728 740 lazymanifest_init_early(copy);
729 741 copy->dirty = true;
730 742 copy->lines = malloc(self->maxlines * sizeof(line));
731 743 if (!copy->lines) {
732 744 goto nomem;
733 745 }
734 746 copy->maxlines = self->maxlines;
735 747 copy->numlines = 0;
736 748 copy->pydata = self->pydata;
737 749 Py_INCREF(copy->pydata);
738 750 for (i = 0; i < self->numlines; i++) {
739 751 PyObject *arglist = NULL, *result = NULL;
740 752 arglist = Py_BuildValue(PY23("(s)", "(y)"),
741 753 self->lines[i].start);
742 754 if (!arglist) {
743 755 goto bail;
744 756 }
745 757 result = PyObject_CallObject(matchfn, arglist);
746 758 Py_DECREF(arglist);
747 759 /* if the callback raised an exception, just let it
748 760 * through and give up */
749 761 if (!result) {
750 762 goto bail;
751 763 }
752 764 if (PyObject_IsTrue(result)) {
753 765 assert(!(self->lines[i].from_malloc));
754 766 copy->lines[copy->numlines++] = self->lines[i];
755 767 }
756 768 Py_DECREF(result);
757 769 }
758 770 copy->livelines = copy->numlines;
759 771 return copy;
760 772 nomem:
761 773 PyErr_NoMemory();
762 774 bail:
763 775 Py_XDECREF(copy);
764 776 return NULL;
765 777 }
766 778
767 779 static PyObject *lazymanifest_diff(lazymanifest *self, PyObject *args)
768 780 {
769 781 lazymanifest *other;
770 782 PyObject *pyclean = NULL;
771 783 bool listclean;
772 784 PyObject *emptyTup = NULL, *ret = NULL;
773 785 PyObject *es;
774 786 int sneedle = 0, oneedle = 0;
775 787 if (!PyArg_ParseTuple(args, "O!|O", &lazymanifestType, &other, &pyclean)) {
776 788 return NULL;
777 789 }
778 790 listclean = (!pyclean) ? false : PyObject_IsTrue(pyclean);
779 791 es = PyBytes_FromString("");
780 792 if (!es) {
781 793 goto nomem;
782 794 }
783 795 emptyTup = PyTuple_Pack(2, Py_None, es);
784 796 Py_DECREF(es);
785 797 if (!emptyTup) {
786 798 goto nomem;
787 799 }
788 800 ret = PyDict_New();
789 801 if (!ret) {
790 802 goto nomem;
791 803 }
792 804 while (sneedle != self->numlines || oneedle != other->numlines) {
793 805 line *left = self->lines + sneedle;
794 806 line *right = other->lines + oneedle;
795 807 int result;
796 808 PyObject *key;
797 809 PyObject *outer;
798 810 /* If we're looking at a deleted entry and it's not
799 811 * the end of the manifest, just skip it. */
800 812 if (sneedle < self->numlines && left->deleted) {
801 813 sneedle++;
802 814 continue;
803 815 }
804 816 if (oneedle < other->numlines && right->deleted) {
805 817 oneedle++;
806 818 continue;
807 819 }
808 820 /* if we're at the end of either manifest, then we
809 821 * know the remaining items are adds so we can skip
810 822 * the strcmp. */
811 823 if (sneedle == self->numlines) {
812 824 result = 1;
813 825 } else if (oneedle == other->numlines) {
814 826 result = -1;
815 827 } else {
816 828 result = linecmp(left, right);
817 829 }
818 830 key = result <= 0 ?
819 831 PyBytes_FromString(left->start) :
820 832 PyBytes_FromString(right->start);
821 833 if (!key)
822 834 goto nomem;
823 835 if (result < 0) {
824 836 PyObject *l = hashflags(left);
825 837 if (!l) {
826 838 goto nomem;
827 839 }
828 840 outer = PyTuple_Pack(2, l, emptyTup);
829 841 Py_DECREF(l);
830 842 if (!outer) {
831 843 goto nomem;
832 844 }
833 845 PyDict_SetItem(ret, key, outer);
834 846 Py_DECREF(outer);
835 847 sneedle++;
836 848 } else if (result > 0) {
837 849 PyObject *r = hashflags(right);
838 850 if (!r) {
839 851 goto nomem;
840 852 }
841 853 outer = PyTuple_Pack(2, emptyTup, r);
842 854 Py_DECREF(r);
843 855 if (!outer) {
844 856 goto nomem;
845 857 }
846 858 PyDict_SetItem(ret, key, outer);
847 859 Py_DECREF(outer);
848 860 oneedle++;
849 861 } else {
850 862 /* file exists in both manifests */
851 863 if (left->len != right->len
852 864 || memcmp(left->start, right->start, left->len)
853 865 || left->hash_suffix != right->hash_suffix) {
854 866 PyObject *l = hashflags(left);
855 867 PyObject *r;
856 868 if (!l) {
857 869 goto nomem;
858 870 }
859 871 r = hashflags(right);
860 872 if (!r) {
861 873 Py_DECREF(l);
862 874 goto nomem;
863 875 }
864 876 outer = PyTuple_Pack(2, l, r);
865 877 Py_DECREF(l);
866 878 Py_DECREF(r);
867 879 if (!outer) {
868 880 goto nomem;
869 881 }
870 882 PyDict_SetItem(ret, key, outer);
871 883 Py_DECREF(outer);
872 884 } else if (listclean) {
873 885 PyDict_SetItem(ret, key, Py_None);
874 886 }
875 887 sneedle++;
876 888 oneedle++;
877 889 }
878 890 Py_DECREF(key);
879 891 }
880 892 Py_DECREF(emptyTup);
881 893 return ret;
882 894 nomem:
883 895 PyErr_NoMemory();
884 896 Py_XDECREF(ret);
885 897 Py_XDECREF(emptyTup);
886 898 return NULL;
887 899 }
888 900
889 901 static PyMethodDef lazymanifest_methods[] = {
890 902 {"iterkeys", (PyCFunction)lazymanifest_getkeysiter, METH_NOARGS,
891 903 "Iterate over file names in this lazymanifest."},
892 904 {"iterentries", (PyCFunction)lazymanifest_getentriesiter, METH_NOARGS,
893 905 "Iterate over (path, nodeid, flags) tuples in this lazymanifest."},
894 906 {"copy", (PyCFunction)lazymanifest_copy, METH_NOARGS,
895 907 "Make a copy of this lazymanifest."},
896 908 {"filtercopy", (PyCFunction)lazymanifest_filtercopy, METH_O,
897 909 "Make a copy of this manifest filtered by matchfn."},
898 910 {"diff", (PyCFunction)lazymanifest_diff, METH_VARARGS,
899 911 "Compare this lazymanifest to another one."},
900 912 {"text", (PyCFunction)lazymanifest_text, METH_NOARGS,
901 913 "Encode this manifest to text."},
902 914 {NULL},
903 915 };
904 916
905 917 #ifdef IS_PY3K
906 918 #define LAZYMANIFEST_TPFLAGS Py_TPFLAGS_DEFAULT
907 919 #else
908 920 #define LAZYMANIFEST_TPFLAGS Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_SEQUENCE_IN
909 921 #endif
910 922
911 923 static PyTypeObject lazymanifestType = {
912 924 PyVarObject_HEAD_INIT(NULL, 0) /* header */
913 925 "parsers.lazymanifest", /* tp_name */
914 926 sizeof(lazymanifest), /* tp_basicsize */
915 927 0, /* tp_itemsize */
916 928 (destructor)lazymanifest_dealloc, /* tp_dealloc */
917 929 0, /* tp_print */
918 930 0, /* tp_getattr */
919 931 0, /* tp_setattr */
920 932 0, /* tp_compare */
921 933 0, /* tp_repr */
922 934 0, /* tp_as_number */
923 935 &lazymanifest_seq_meths, /* tp_as_sequence */
924 936 &lazymanifest_mapping_methods, /* tp_as_mapping */
925 937 0, /* tp_hash */
926 938 0, /* tp_call */
927 939 0, /* tp_str */
928 940 0, /* tp_getattro */
929 941 0, /* tp_setattro */
930 942 0, /* tp_as_buffer */
931 943 LAZYMANIFEST_TPFLAGS, /* tp_flags */
932 944 "TODO(augie)", /* tp_doc */
933 945 0, /* tp_traverse */
934 946 0, /* tp_clear */
935 947 0, /* tp_richcompare */
936 948 0, /* tp_weaklistoffset */
937 949 (getiterfunc)lazymanifest_getkeysiter, /* tp_iter */
938 950 0, /* tp_iternext */
939 951 lazymanifest_methods, /* tp_methods */
940 952 0, /* tp_members */
941 953 0, /* tp_getset */
942 954 0, /* tp_base */
943 955 0, /* tp_dict */
944 956 0, /* tp_descr_get */
945 957 0, /* tp_descr_set */
946 958 0, /* tp_dictoffset */
947 959 (initproc)lazymanifest_init, /* tp_init */
948 960 0, /* tp_alloc */
949 961 };
950 962
951 963 void manifest_module_init(PyObject * mod)
952 964 {
953 965 lazymanifestType.tp_new = PyType_GenericNew;
954 966 if (PyType_Ready(&lazymanifestType) < 0)
955 967 return;
956 968 Py_INCREF(&lazymanifestType);
957 969
958 970 PyModule_AddObject(mod, "lazymanifest",
959 971 (PyObject *)&lazymanifestType);
960 972 }
@@ -1,422 +1,446
1 1 from __future__ import absolute_import
2 2
3 3 import binascii
4 4 import itertools
5 5 import silenttestrunner
6 6 import unittest
7 import zlib
7 8
8 9 from mercurial import (
9 10 manifest as manifestmod,
10 11 match as matchmod,
11 12 )
12 13
13 14 EMTPY_MANIFEST = b''
14 15
15 16 HASH_1 = b'1' * 40
16 17 BIN_HASH_1 = binascii.unhexlify(HASH_1)
17 18 HASH_2 = b'f' * 40
18 19 BIN_HASH_2 = binascii.unhexlify(HASH_2)
19 20 HASH_3 = b'1234567890abcdef0987654321deadbeef0fcafe'
20 21 BIN_HASH_3 = binascii.unhexlify(HASH_3)
21 22 A_SHORT_MANIFEST = (
22 23 b'bar/baz/qux.py\0%(hash2)s%(flag2)s\n'
23 24 b'foo\0%(hash1)s%(flag1)s\n'
24 25 ) % {b'hash1': HASH_1,
25 26 b'flag1': b'',
26 27 b'hash2': HASH_2,
27 28 b'flag2': b'l',
28 29 }
29 30
30 31 A_DEEPER_MANIFEST = (
31 32 b'a/b/c/bar.py\0%(hash3)s%(flag1)s\n'
32 33 b'a/b/c/bar.txt\0%(hash1)s%(flag1)s\n'
33 34 b'a/b/c/foo.py\0%(hash3)s%(flag1)s\n'
34 35 b'a/b/c/foo.txt\0%(hash2)s%(flag2)s\n'
35 36 b'a/b/d/baz.py\0%(hash3)s%(flag1)s\n'
36 37 b'a/b/d/qux.py\0%(hash1)s%(flag2)s\n'
37 38 b'a/b/d/ten.txt\0%(hash3)s%(flag2)s\n'
38 39 b'a/b/dog.py\0%(hash3)s%(flag1)s\n'
39 40 b'a/b/fish.py\0%(hash2)s%(flag1)s\n'
40 41 b'a/c/london.py\0%(hash3)s%(flag2)s\n'
41 42 b'a/c/paper.txt\0%(hash2)s%(flag2)s\n'
42 43 b'a/c/paris.py\0%(hash2)s%(flag1)s\n'
43 44 b'a/d/apple.py\0%(hash3)s%(flag1)s\n'
44 45 b'a/d/pizza.py\0%(hash3)s%(flag2)s\n'
45 46 b'a/green.py\0%(hash1)s%(flag2)s\n'
46 47 b'a/purple.py\0%(hash2)s%(flag1)s\n'
47 48 b'app.py\0%(hash3)s%(flag1)s\n'
48 49 b'readme.txt\0%(hash2)s%(flag1)s\n'
49 50 ) % {b'hash1': HASH_1,
50 51 b'flag1': b'',
51 52 b'hash2': HASH_2,
52 53 b'flag2': b'l',
53 54 b'hash3': HASH_3,
54 55 }
55 56
56 57 HUGE_MANIFEST_ENTRIES = 200001
57 58
58 59 izip = getattr(itertools, 'izip', zip)
59 60 if 'xrange' not in globals():
60 61 xrange = range
61 62
62 63 A_HUGE_MANIFEST = b''.join(sorted(
63 64 b'file%d\0%s%s\n' % (i, h, f) for i, h, f in
64 65 izip(xrange(200001),
65 66 itertools.cycle((HASH_1, HASH_2)),
66 67 itertools.cycle((b'', b'x', b'l')))))
67 68
68 69 class basemanifesttests(object):
69 70 def parsemanifest(self, text):
70 71 raise NotImplementedError('parsemanifest not implemented by test case')
71 72
72 73 def testEmptyManifest(self):
73 74 m = self.parsemanifest(EMTPY_MANIFEST)
74 75 self.assertEqual(0, len(m))
75 76 self.assertEqual([], list(m))
76 77
77 78 def testManifest(self):
78 79 m = self.parsemanifest(A_SHORT_MANIFEST)
79 80 self.assertEqual([b'bar/baz/qux.py', b'foo'], list(m))
80 81 self.assertEqual(BIN_HASH_2, m[b'bar/baz/qux.py'])
81 82 self.assertEqual(b'l', m.flags(b'bar/baz/qux.py'))
82 83 self.assertEqual(BIN_HASH_1, m[b'foo'])
83 84 self.assertEqual(b'', m.flags(b'foo'))
84 85 with self.assertRaises(KeyError):
85 86 m[b'wat']
86 87
87 88 def testSetItem(self):
88 89 want = BIN_HASH_1
89 90
90 91 m = self.parsemanifest(EMTPY_MANIFEST)
91 92 m[b'a'] = want
92 93 self.assertIn(b'a', m)
93 94 self.assertEqual(want, m[b'a'])
94 95 self.assertEqual(b'a\0' + HASH_1 + b'\n', m.text())
95 96
96 97 m = self.parsemanifest(A_SHORT_MANIFEST)
97 98 m[b'a'] = want
98 99 self.assertEqual(want, m[b'a'])
99 100 self.assertEqual(b'a\0' + HASH_1 + b'\n' + A_SHORT_MANIFEST,
100 101 m.text())
101 102
102 103 def testSetFlag(self):
103 104 want = b'x'
104 105
105 106 m = self.parsemanifest(EMTPY_MANIFEST)
106 107 # first add a file; a file-less flag makes no sense
107 108 m[b'a'] = BIN_HASH_1
108 109 m.setflag(b'a', want)
109 110 self.assertEqual(want, m.flags(b'a'))
110 111 self.assertEqual(b'a\0' + HASH_1 + want + b'\n', m.text())
111 112
112 113 m = self.parsemanifest(A_SHORT_MANIFEST)
113 114 # first add a file; a file-less flag makes no sense
114 115 m[b'a'] = BIN_HASH_1
115 116 m.setflag(b'a', want)
116 117 self.assertEqual(want, m.flags(b'a'))
117 118 self.assertEqual(b'a\0' + HASH_1 + want + b'\n' + A_SHORT_MANIFEST,
118 119 m.text())
119 120
120 121 def testCopy(self):
121 122 m = self.parsemanifest(A_SHORT_MANIFEST)
122 123 m[b'a'] = BIN_HASH_1
123 124 m2 = m.copy()
124 125 del m
125 126 del m2 # make sure we don't double free() anything
126 127
127 128 def testCompaction(self):
128 129 unhex = binascii.unhexlify
129 130 h1, h2 = unhex(HASH_1), unhex(HASH_2)
130 131 m = self.parsemanifest(A_SHORT_MANIFEST)
131 132 m[b'alpha'] = h1
132 133 m[b'beta'] = h2
133 134 del m[b'foo']
134 135 want = b'alpha\0%s\nbar/baz/qux.py\0%sl\nbeta\0%s\n' % (
135 136 HASH_1, HASH_2, HASH_2)
136 137 self.assertEqual(want, m.text())
137 138 self.assertEqual(3, len(m))
138 139 self.assertEqual([b'alpha', b'bar/baz/qux.py', b'beta'], list(m))
139 140 self.assertEqual(h1, m[b'alpha'])
140 141 self.assertEqual(h2, m[b'bar/baz/qux.py'])
141 142 self.assertEqual(h2, m[b'beta'])
142 143 self.assertEqual(b'', m.flags(b'alpha'))
143 144 self.assertEqual(b'l', m.flags(b'bar/baz/qux.py'))
144 145 self.assertEqual(b'', m.flags(b'beta'))
145 146 with self.assertRaises(KeyError):
146 147 m[b'foo']
147 148
148 149 def testSetGetNodeSuffix(self):
149 150 clean = self.parsemanifest(A_SHORT_MANIFEST)
150 151 m = self.parsemanifest(A_SHORT_MANIFEST)
151 152 h = m[b'foo']
152 153 f = m.flags(b'foo')
153 154 want = h + b'a'
154 155 # Merge code wants to set 21-byte fake hashes at times
155 156 m[b'foo'] = want
156 157 self.assertEqual(want, m[b'foo'])
157 158 self.assertEqual([(b'bar/baz/qux.py', BIN_HASH_2),
158 159 (b'foo', BIN_HASH_1 + b'a')],
159 160 list(m.items()))
160 161 # Sometimes it even tries a 22-byte fake hash, but we can
161 162 # return 21 and it'll work out
162 163 m[b'foo'] = want + b'+'
163 164 self.assertEqual(want, m[b'foo'])
164 165 # make sure the suffix survives a copy
165 166 match = matchmod.match(b'', b'', [b're:foo'])
166 167 m2 = m.matches(match)
167 168 self.assertEqual(want, m2[b'foo'])
168 169 self.assertEqual(1, len(m2))
169 170 m2 = m.copy()
170 171 self.assertEqual(want, m2[b'foo'])
171 172 # suffix with iteration
172 173 self.assertEqual([(b'bar/baz/qux.py', BIN_HASH_2),
173 174 (b'foo', want)],
174 175 list(m.items()))
175 176
176 177 # shows up in diff
177 178 self.assertEqual({b'foo': ((want, f), (h, b''))}, m.diff(clean))
178 179 self.assertEqual({b'foo': ((h, b''), (want, f))}, clean.diff(m))
179 180
180 181 def testMatchException(self):
181 182 m = self.parsemanifest(A_SHORT_MANIFEST)
182 183 match = matchmod.match(b'', b'', [b're:.*'])
183 184 def filt(path):
184 185 if path == b'foo':
185 186 assert False
186 187 return True
187 188 match.matchfn = filt
188 189 with self.assertRaises(AssertionError):
189 190 m.matches(match)
190 191
191 192 def testRemoveItem(self):
192 193 m = self.parsemanifest(A_SHORT_MANIFEST)
193 194 del m[b'foo']
194 195 with self.assertRaises(KeyError):
195 196 m[b'foo']
196 197 self.assertEqual(1, len(m))
197 198 self.assertEqual(1, len(list(m)))
198 199 # now restore and make sure everything works right
199 200 m[b'foo'] = b'a' * 20
200 201 self.assertEqual(2, len(m))
201 202 self.assertEqual(2, len(list(m)))
202 203
203 204 def testManifestDiff(self):
204 205 MISSING = (None, b'')
205 206 addl = b'z-only-in-left\0' + HASH_1 + b'\n'
206 207 addr = b'z-only-in-right\0' + HASH_2 + b'x\n'
207 208 left = self.parsemanifest(
208 209 A_SHORT_MANIFEST.replace(HASH_1, HASH_3 + b'x') + addl)
209 210 right = self.parsemanifest(A_SHORT_MANIFEST + addr)
210 211 want = {
211 212 b'foo': ((BIN_HASH_3, b'x'),
212 213 (BIN_HASH_1, b'')),
213 214 b'z-only-in-left': ((BIN_HASH_1, b''), MISSING),
214 215 b'z-only-in-right': (MISSING, (BIN_HASH_2, b'x')),
215 216 }
216 217 self.assertEqual(want, left.diff(right))
217 218
218 219 want = {
219 220 b'bar/baz/qux.py': (MISSING, (BIN_HASH_2, b'l')),
220 221 b'foo': (MISSING, (BIN_HASH_3, b'x')),
221 222 b'z-only-in-left': (MISSING, (BIN_HASH_1, b'')),
222 223 }
223 224 self.assertEqual(want, self.parsemanifest(EMTPY_MANIFEST).diff(left))
224 225
225 226 want = {
226 227 b'bar/baz/qux.py': ((BIN_HASH_2, b'l'), MISSING),
227 228 b'foo': ((BIN_HASH_3, b'x'), MISSING),
228 229 b'z-only-in-left': ((BIN_HASH_1, b''), MISSING),
229 230 }
230 231 self.assertEqual(want, left.diff(self.parsemanifest(EMTPY_MANIFEST)))
231 232 copy = right.copy()
232 233 del copy[b'z-only-in-right']
233 234 del right[b'foo']
234 235 want = {
235 236 b'foo': (MISSING, (BIN_HASH_1, b'')),
236 237 b'z-only-in-right': ((BIN_HASH_2, b'x'), MISSING),
237 238 }
238 239 self.assertEqual(want, right.diff(copy))
239 240
240 241 short = self.parsemanifest(A_SHORT_MANIFEST)
241 242 pruned = short.copy()
242 243 del pruned[b'foo']
243 244 want = {
244 245 b'foo': ((BIN_HASH_1, b''), MISSING),
245 246 }
246 247 self.assertEqual(want, short.diff(pruned))
247 248 want = {
248 249 b'foo': (MISSING, (BIN_HASH_1, b'')),
249 250 }
250 251 self.assertEqual(want, pruned.diff(short))
251 252 want = {
252 253 b'bar/baz/qux.py': None,
253 254 b'foo': (MISSING, (BIN_HASH_1, b'')),
254 255 }
255 256 self.assertEqual(want, pruned.diff(short, clean=True))
256 257
257 258 def testReversedLines(self):
258 259 backwards = b''.join(
259 260 l + b'\n' for l in reversed(A_SHORT_MANIFEST.split(b'\n')) if l)
260 261 try:
261 262 self.parsemanifest(backwards)
262 263 self.fail('Should have raised ValueError')
263 264 except ValueError as v:
264 265 self.assertIn('Manifest lines not in sorted order.', str(v))
265 266
266 267 def testNoTerminalNewline(self):
267 268 try:
268 269 self.parsemanifest(A_SHORT_MANIFEST + b'wat')
269 270 self.fail('Should have raised ValueError')
270 271 except ValueError as v:
271 272 self.assertIn('Manifest did not end in a newline.', str(v))
272 273
273 274 def testNoNewLineAtAll(self):
274 275 try:
275 276 self.parsemanifest(b'wat')
276 277 self.fail('Should have raised ValueError')
277 278 except ValueError as v:
278 279 self.assertIn('Manifest did not end in a newline.', str(v))
279 280
280 281 def testHugeManifest(self):
281 282 m = self.parsemanifest(A_HUGE_MANIFEST)
282 283 self.assertEqual(HUGE_MANIFEST_ENTRIES, len(m))
283 284 self.assertEqual(len(m), len(list(m)))
284 285
285 286 def testMatchesMetadata(self):
286 287 '''Tests matches() for a few specific files to make sure that both
287 288 the set of files as well as their flags and nodeids are correct in
288 289 the resulting manifest.'''
289 290 m = self.parsemanifest(A_HUGE_MANIFEST)
290 291
291 292 match = matchmod.match(b'/', b'',
292 293 [b'file1', b'file200', b'file300'], exact=True)
293 294 m2 = m.matches(match)
294 295
295 296 w = (b'file1\0%sx\n'
296 297 b'file200\0%sl\n'
297 298 b'file300\0%s\n') % (HASH_2, HASH_1, HASH_1)
298 299 self.assertEqual(w, m2.text())
299 300
300 301 def testMatchesNonexistentFile(self):
301 302 '''Tests matches() for a small set of specific files, including one
302 303 nonexistent file to make sure in only matches against existing files.
303 304 '''
304 305 m = self.parsemanifest(A_DEEPER_MANIFEST)
305 306
306 307 match = matchmod.match(b'/', b'',
307 308 [b'a/b/c/bar.txt', b'a/b/d/qux.py',
308 309 b'readme.txt', b'nonexistent'],
309 310 exact=True)
310 311 m2 = m.matches(match)
311 312
312 313 self.assertEqual(
313 314 [b'a/b/c/bar.txt', b'a/b/d/qux.py', b'readme.txt'],
314 315 m2.keys())
315 316
316 317 def testMatchesNonexistentDirectory(self):
317 318 '''Tests matches() for a relpath match on a directory that doesn't
318 319 actually exist.'''
319 320 m = self.parsemanifest(A_DEEPER_MANIFEST)
320 321
321 322 match = matchmod.match(b'/', b'', [b'a/f'], default=b'relpath')
322 323 m2 = m.matches(match)
323 324
324 325 self.assertEqual([], m2.keys())
325 326
326 327 def testMatchesExactLarge(self):
327 328 '''Tests matches() for files matching a large list of exact files.
328 329 '''
329 330 m = self.parsemanifest(A_HUGE_MANIFEST)
330 331
331 332 flist = m.keys()[80:300]
332 333 match = matchmod.match(b'/', b'', flist, exact=True)
333 334 m2 = m.matches(match)
334 335
335 336 self.assertEqual(flist, m2.keys())
336 337
337 338 def testMatchesFull(self):
338 339 '''Tests matches() for what should be a full match.'''
339 340 m = self.parsemanifest(A_DEEPER_MANIFEST)
340 341
341 342 match = matchmod.match(b'/', b'', [b''])
342 343 m2 = m.matches(match)
343 344
344 345 self.assertEqual(m.keys(), m2.keys())
345 346
346 347 def testMatchesDirectory(self):
347 348 '''Tests matches() on a relpath match on a directory, which should
348 349 match against all files within said directory.'''
349 350 m = self.parsemanifest(A_DEEPER_MANIFEST)
350 351
351 352 match = matchmod.match(b'/', b'', [b'a/b'], default=b'relpath')
352 353 m2 = m.matches(match)
353 354
354 355 self.assertEqual([
355 356 b'a/b/c/bar.py', b'a/b/c/bar.txt', b'a/b/c/foo.py',
356 357 b'a/b/c/foo.txt',
357 358 b'a/b/d/baz.py', b'a/b/d/qux.py', b'a/b/d/ten.txt', b'a/b/dog.py',
358 359 b'a/b/fish.py'], m2.keys())
359 360
360 361 def testMatchesExactPath(self):
361 362 '''Tests matches() on an exact match on a directory, which should
362 363 result in an empty manifest because you can't perform an exact match
363 364 against a directory.'''
364 365 m = self.parsemanifest(A_DEEPER_MANIFEST)
365 366
366 367 match = matchmod.match(b'/', b'', [b'a/b'], exact=True)
367 368 m2 = m.matches(match)
368 369
369 370 self.assertEqual([], m2.keys())
370 371
371 372 def testMatchesCwd(self):
372 373 '''Tests matches() on a relpath match with the current directory ('.')
373 374 when not in the root directory.'''
374 375 m = self.parsemanifest(A_DEEPER_MANIFEST)
375 376
376 377 match = matchmod.match(b'/', b'a/b', [b'.'], default=b'relpath')
377 378 m2 = m.matches(match)
378 379
379 380 self.assertEqual([
380 381 b'a/b/c/bar.py', b'a/b/c/bar.txt', b'a/b/c/foo.py',
381 382 b'a/b/c/foo.txt', b'a/b/d/baz.py', b'a/b/d/qux.py',
382 383 b'a/b/d/ten.txt', b'a/b/dog.py', b'a/b/fish.py'], m2.keys())
383 384
384 385 def testMatchesWithPattern(self):
385 386 '''Tests matches() for files matching a pattern that reside
386 387 deeper than the specified directory.'''
387 388 m = self.parsemanifest(A_DEEPER_MANIFEST)
388 389
389 390 match = matchmod.match(b'/', b'', [b'a/b/*/*.txt'])
390 391 m2 = m.matches(match)
391 392
392 393 self.assertEqual(
393 394 [b'a/b/c/bar.txt', b'a/b/c/foo.txt', b'a/b/d/ten.txt'],
394 395 m2.keys())
395 396
396 397 class testmanifestdict(unittest.TestCase, basemanifesttests):
397 398 def parsemanifest(self, text):
398 399 return manifestmod.manifestdict(text)
399 400
401 def testObviouslyBogusManifest(self):
402 # This is a 163k manifest that came from oss-fuzz. It was a
403 # timeout there, but when run normally it doesn't seem to
404 # present any particular slowness.
405 data = zlib.decompress(
406 'x\x9c\xed\xce;\n\x83\x00\x10\x04\xd0\x8deNa\x93~\xf1\x03\xc9q\xf4'
407 '\x14\xeaU\xbdB\xda\xd4\xe6Cj\xc1FA\xde+\x86\xe9f\xa2\xfci\xbb\xfb'
408 '\xa3\xef\xea\xba\xca\x7fk\x86q\x9a\xc6\xc8\xcc&\xb3\xcf\xf8\xb8|#'
409 '\x8a9\x00\xd8\xe6v\xf4\x01N\xe1\n\x00\x00\x00\x00\x00\x00\x00\x00'
410 '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
411 '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
412 '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
413 '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
414 '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
415 '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
416 '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
417 '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
418 '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
419 '\x00\x00\xc0\x8aey\x1d}\x01\xd8\xe0\xb9\xf3\xde\x1b\xcf\x17'
420 '\xac\xbe')
421 with self.assertRaises(ValueError):
422 self.parsemanifest(data)
423
400 424 class testtreemanifest(unittest.TestCase, basemanifesttests):
401 425 def parsemanifest(self, text):
402 426 return manifestmod.treemanifest(b'', text)
403 427
404 428 def testWalkSubtrees(self):
405 429 m = self.parsemanifest(A_DEEPER_MANIFEST)
406 430
407 431 dirs = [s._dir for s in m.walksubtrees()]
408 432 self.assertEqual(
409 433 sorted([
410 434 b'', b'a/', b'a/c/', b'a/d/', b'a/b/', b'a/b/c/', b'a/b/d/']),
411 435 sorted(dirs)
412 436 )
413 437
414 438 match = matchmod.match(b'/', b'', [b'path:a/b/'])
415 439 dirs = [s._dir for s in m.walksubtrees(matcher=match)]
416 440 self.assertEqual(
417 441 sorted([b'a/b/', b'a/b/c/', b'a/b/d/']),
418 442 sorted(dirs)
419 443 )
420 444
421 445 if __name__ == '__main__':
422 446 silenttestrunner.main(__name__)
General Comments 0
You need to be logged in to leave comments. Login now