##// END OF EJS Templates
parsers: backout version mismatch detection from 21dafd8546d1...
Matt Mackall -
r20159:96b2dd77 default
parent child Browse files
Show More
@@ -1,1988 +1,1965 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #include <Python.h>
11 11 #include <ctype.h>
12 12 #include <stddef.h>
13 13 #include <string.h>
14 14
15 15 #include "util.h"
16 16
17 17 static int8_t hextable[256] = {
18 18 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
19 19 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
20 20 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
21 21 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, /* 0-9 */
22 22 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* A-F */
23 23 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
24 24 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* a-f */
25 25 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
26 26 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
27 27 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
28 28 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
29 29 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
30 30 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
31 31 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
32 32 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
33 33 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
34 34 };
35 35
36 36 static inline int hexdigit(const char *p, Py_ssize_t off)
37 37 {
38 38 int8_t val = hextable[(unsigned char)p[off]];
39 39
40 40 if (val >= 0) {
41 41 return val;
42 42 }
43 43
44 44 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
45 45 return 0;
46 46 }
47 47
48 48 /*
49 49 * Turn a hex-encoded string into binary.
50 50 */
51 51 static PyObject *unhexlify(const char *str, int len)
52 52 {
53 53 PyObject *ret;
54 54 char *d;
55 55 int i;
56 56
57 57 ret = PyBytes_FromStringAndSize(NULL, len / 2);
58 58
59 59 if (!ret)
60 60 return NULL;
61 61
62 62 d = PyBytes_AsString(ret);
63 63
64 64 for (i = 0; i < len;) {
65 65 int hi = hexdigit(str, i++);
66 66 int lo = hexdigit(str, i++);
67 67 *d++ = (hi << 4) | lo;
68 68 }
69 69
70 70 return ret;
71 71 }
72 72
73 73 /*
74 74 * This code assumes that a manifest is stitched together with newline
75 75 * ('\n') characters.
76 76 */
77 77 static PyObject *parse_manifest(PyObject *self, PyObject *args)
78 78 {
79 79 PyObject *mfdict, *fdict;
80 80 char *str, *start, *end;
81 81 int len;
82 82
83 83 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
84 84 &PyDict_Type, &mfdict,
85 85 &PyDict_Type, &fdict,
86 86 &str, &len))
87 87 goto quit;
88 88
89 89 start = str;
90 90 end = str + len;
91 91 while (start < end) {
92 92 PyObject *file = NULL, *node = NULL;
93 93 PyObject *flags = NULL;
94 94 char *zero = NULL, *newline = NULL;
95 95 ptrdiff_t nlen;
96 96
97 97 zero = memchr(start, '\0', end - start);
98 98 if (!zero) {
99 99 PyErr_SetString(PyExc_ValueError,
100 100 "manifest entry has no separator");
101 101 goto quit;
102 102 }
103 103
104 104 newline = memchr(zero + 1, '\n', end - (zero + 1));
105 105 if (!newline) {
106 106 PyErr_SetString(PyExc_ValueError,
107 107 "manifest contains trailing garbage");
108 108 goto quit;
109 109 }
110 110
111 111 file = PyBytes_FromStringAndSize(start, zero - start);
112 112
113 113 if (!file)
114 114 goto bail;
115 115
116 116 nlen = newline - zero - 1;
117 117
118 118 node = unhexlify(zero + 1, nlen > 40 ? 40 : (int)nlen);
119 119 if (!node)
120 120 goto bail;
121 121
122 122 if (nlen > 40) {
123 123 flags = PyBytes_FromStringAndSize(zero + 41,
124 124 nlen - 40);
125 125 if (!flags)
126 126 goto bail;
127 127
128 128 if (PyDict_SetItem(fdict, file, flags) == -1)
129 129 goto bail;
130 130 }
131 131
132 132 if (PyDict_SetItem(mfdict, file, node) == -1)
133 133 goto bail;
134 134
135 135 start = newline + 1;
136 136
137 137 Py_XDECREF(flags);
138 138 Py_XDECREF(node);
139 139 Py_XDECREF(file);
140 140 continue;
141 141 bail:
142 142 Py_XDECREF(flags);
143 143 Py_XDECREF(node);
144 144 Py_XDECREF(file);
145 145 goto quit;
146 146 }
147 147
148 148 Py_INCREF(Py_None);
149 149 return Py_None;
150 150 quit:
151 151 return NULL;
152 152 }
153 153
154 154 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
155 155 {
156 156 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
157 157 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
158 158 char state, *str, *cur, *end, *cpos;
159 159 int mode, size, mtime;
160 160 unsigned int flen;
161 161 int len;
162 162
163 163 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
164 164 &PyDict_Type, &dmap,
165 165 &PyDict_Type, &cmap,
166 166 &str, &len))
167 167 goto quit;
168 168
169 169 /* read parents */
170 170 if (len < 40)
171 171 goto quit;
172 172
173 173 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
174 174 if (!parents)
175 175 goto quit;
176 176
177 177 /* read filenames */
178 178 cur = str + 40;
179 179 end = str + len;
180 180
181 181 while (cur < end - 17) {
182 182 /* unpack header */
183 183 state = *cur;
184 184 mode = getbe32(cur + 1);
185 185 size = getbe32(cur + 5);
186 186 mtime = getbe32(cur + 9);
187 187 flen = getbe32(cur + 13);
188 188 cur += 17;
189 189 if (cur + flen > end || cur + flen < cur) {
190 190 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
191 191 goto quit;
192 192 }
193 193
194 194 entry = Py_BuildValue("ciii", state, mode, size, mtime);
195 195 if (!entry)
196 196 goto quit;
197 197 PyObject_GC_UnTrack(entry); /* don't waste time with this */
198 198
199 199 cpos = memchr(cur, 0, flen);
200 200 if (cpos) {
201 201 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
202 202 cname = PyBytes_FromStringAndSize(cpos + 1,
203 203 flen - (cpos - cur) - 1);
204 204 if (!fname || !cname ||
205 205 PyDict_SetItem(cmap, fname, cname) == -1 ||
206 206 PyDict_SetItem(dmap, fname, entry) == -1)
207 207 goto quit;
208 208 Py_DECREF(cname);
209 209 } else {
210 210 fname = PyBytes_FromStringAndSize(cur, flen);
211 211 if (!fname ||
212 212 PyDict_SetItem(dmap, fname, entry) == -1)
213 213 goto quit;
214 214 }
215 215 cur += flen;
216 216 Py_DECREF(fname);
217 217 Py_DECREF(entry);
218 218 fname = cname = entry = NULL;
219 219 }
220 220
221 221 ret = parents;
222 222 Py_INCREF(ret);
223 223 quit:
224 224 Py_XDECREF(fname);
225 225 Py_XDECREF(cname);
226 226 Py_XDECREF(entry);
227 227 Py_XDECREF(parents);
228 228 return ret;
229 229 }
230 230
231 231 static inline int getintat(PyObject *tuple, int off, uint32_t *v)
232 232 {
233 233 PyObject *o = PyTuple_GET_ITEM(tuple, off);
234 234 long val;
235 235
236 236 if (PyInt_Check(o))
237 237 val = PyInt_AS_LONG(o);
238 238 else if (PyLong_Check(o)) {
239 239 val = PyLong_AsLong(o);
240 240 if (val == -1 && PyErr_Occurred())
241 241 return -1;
242 242 } else {
243 243 PyErr_SetString(PyExc_TypeError, "expected an int or long");
244 244 return -1;
245 245 }
246 246 if (LONG_MAX > INT_MAX && (val > INT_MAX || val < INT_MIN)) {
247 247 PyErr_SetString(PyExc_OverflowError,
248 248 "Python value to large to convert to uint32_t");
249 249 return -1;
250 250 }
251 251 *v = (uint32_t)val;
252 252 return 0;
253 253 }
254 254
255 255 static PyObject *dirstate_unset;
256 256
257 257 /*
258 258 * Efficiently pack a dirstate object into its on-disk format.
259 259 */
260 260 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
261 261 {
262 262 PyObject *packobj = NULL;
263 263 PyObject *map, *copymap, *pl;
264 264 Py_ssize_t nbytes, pos, l;
265 265 PyObject *k, *v, *pn;
266 266 char *p, *s;
267 267 double now;
268 268
269 269 if (!PyArg_ParseTuple(args, "O!O!Od:pack_dirstate",
270 270 &PyDict_Type, &map, &PyDict_Type, &copymap,
271 271 &pl, &now))
272 272 return NULL;
273 273
274 274 if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) {
275 275 PyErr_SetString(PyExc_TypeError, "expected 2-element sequence");
276 276 return NULL;
277 277 }
278 278
279 279 /* Figure out how much we need to allocate. */
280 280 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
281 281 PyObject *c;
282 282 if (!PyString_Check(k)) {
283 283 PyErr_SetString(PyExc_TypeError, "expected string key");
284 284 goto bail;
285 285 }
286 286 nbytes += PyString_GET_SIZE(k) + 17;
287 287 c = PyDict_GetItem(copymap, k);
288 288 if (c) {
289 289 if (!PyString_Check(c)) {
290 290 PyErr_SetString(PyExc_TypeError,
291 291 "expected string key");
292 292 goto bail;
293 293 }
294 294 nbytes += PyString_GET_SIZE(c) + 1;
295 295 }
296 296 }
297 297
298 298 packobj = PyString_FromStringAndSize(NULL, nbytes);
299 299 if (packobj == NULL)
300 300 goto bail;
301 301
302 302 p = PyString_AS_STRING(packobj);
303 303
304 304 pn = PySequence_ITEM(pl, 0);
305 305 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
306 306 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
307 307 goto bail;
308 308 }
309 309 memcpy(p, s, l);
310 310 p += 20;
311 311 pn = PySequence_ITEM(pl, 1);
312 312 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
313 313 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
314 314 goto bail;
315 315 }
316 316 memcpy(p, s, l);
317 317 p += 20;
318 318
319 319 for (pos = 0; PyDict_Next(map, &pos, &k, &v); ) {
320 320 uint32_t mode, size, mtime;
321 321 Py_ssize_t len, l;
322 322 PyObject *o;
323 323 char *s, *t;
324 324
325 325 if (!PyTuple_Check(v) || PyTuple_GET_SIZE(v) != 4) {
326 326 PyErr_SetString(PyExc_TypeError, "expected a 4-tuple");
327 327 goto bail;
328 328 }
329 329 o = PyTuple_GET_ITEM(v, 0);
330 330 if (PyString_AsStringAndSize(o, &s, &l) == -1 || l != 1) {
331 331 PyErr_SetString(PyExc_TypeError, "expected one byte");
332 332 goto bail;
333 333 }
334 334 *p++ = *s;
335 335 if (getintat(v, 1, &mode) == -1)
336 336 goto bail;
337 337 if (getintat(v, 2, &size) == -1)
338 338 goto bail;
339 339 if (getintat(v, 3, &mtime) == -1)
340 340 goto bail;
341 341 if (*s == 'n' && mtime == (uint32_t)now) {
342 342 /* See pure/parsers.py:pack_dirstate for why we do
343 343 * this. */
344 344 if (PyDict_SetItem(map, k, dirstate_unset) == -1)
345 345 goto bail;
346 346 mtime = -1;
347 347 }
348 348 putbe32(mode, p);
349 349 putbe32(size, p + 4);
350 350 putbe32(mtime, p + 8);
351 351 t = p + 12;
352 352 p += 16;
353 353 len = PyString_GET_SIZE(k);
354 354 memcpy(p, PyString_AS_STRING(k), len);
355 355 p += len;
356 356 o = PyDict_GetItem(copymap, k);
357 357 if (o) {
358 358 *p++ = '\0';
359 359 l = PyString_GET_SIZE(o);
360 360 memcpy(p, PyString_AS_STRING(o), l);
361 361 p += l;
362 362 len += l + 1;
363 363 }
364 364 putbe32((uint32_t)len, t);
365 365 }
366 366
367 367 pos = p - PyString_AS_STRING(packobj);
368 368 if (pos != nbytes) {
369 369 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
370 370 (long)pos, (long)nbytes);
371 371 goto bail;
372 372 }
373 373
374 374 return packobj;
375 375 bail:
376 376 Py_XDECREF(packobj);
377 377 return NULL;
378 378 }
379 379
380 380 /*
381 381 * A base-16 trie for fast node->rev mapping.
382 382 *
383 383 * Positive value is index of the next node in the trie
384 384 * Negative value is a leaf: -(rev + 1)
385 385 * Zero is empty
386 386 */
387 387 typedef struct {
388 388 int children[16];
389 389 } nodetree;
390 390
391 391 /*
392 392 * This class has two behaviours.
393 393 *
394 394 * When used in a list-like way (with integer keys), we decode an
395 395 * entry in a RevlogNG index file on demand. Our last entry is a
396 396 * sentinel, always a nullid. We have limited support for
397 397 * integer-keyed insert and delete, only at elements right before the
398 398 * sentinel.
399 399 *
400 400 * With string keys, we lazily perform a reverse mapping from node to
401 401 * rev, using a base-16 trie.
402 402 */
403 403 typedef struct {
404 404 PyObject_HEAD
405 405 /* Type-specific fields go here. */
406 406 PyObject *data; /* raw bytes of index */
407 407 PyObject **cache; /* cached tuples */
408 408 const char **offsets; /* populated on demand */
409 409 Py_ssize_t raw_length; /* original number of elements */
410 410 Py_ssize_t length; /* current number of elements */
411 411 PyObject *added; /* populated on demand */
412 412 PyObject *headrevs; /* cache, invalidated on changes */
413 413 nodetree *nt; /* base-16 trie */
414 414 int ntlength; /* # nodes in use */
415 415 int ntcapacity; /* # nodes allocated */
416 416 int ntdepth; /* maximum depth of tree */
417 417 int ntsplits; /* # splits performed */
418 418 int ntrev; /* last rev scanned */
419 419 int ntlookups; /* # lookups */
420 420 int ntmisses; /* # lookups that miss the cache */
421 421 int inlined;
422 422 } indexObject;
423 423
424 424 static Py_ssize_t index_length(const indexObject *self)
425 425 {
426 426 if (self->added == NULL)
427 427 return self->length;
428 428 return self->length + PyList_GET_SIZE(self->added);
429 429 }
430 430
431 431 static PyObject *nullentry;
432 432 static const char nullid[20];
433 433
434 434 static long inline_scan(indexObject *self, const char **offsets);
435 435
436 436 #if LONG_MAX == 0x7fffffffL
437 437 static char *tuple_format = "Kiiiiiis#";
438 438 #else
439 439 static char *tuple_format = "kiiiiiis#";
440 440 #endif
441 441
442 442 /* A RevlogNG v1 index entry is 64 bytes long. */
443 443 static const long v1_hdrsize = 64;
444 444
445 445 /*
446 446 * Return a pointer to the beginning of a RevlogNG record.
447 447 */
448 448 static const char *index_deref(indexObject *self, Py_ssize_t pos)
449 449 {
450 450 if (self->inlined && pos > 0) {
451 451 if (self->offsets == NULL) {
452 452 self->offsets = malloc(self->raw_length *
453 453 sizeof(*self->offsets));
454 454 if (self->offsets == NULL)
455 455 return (const char *)PyErr_NoMemory();
456 456 inline_scan(self, self->offsets);
457 457 }
458 458 return self->offsets[pos];
459 459 }
460 460
461 461 return PyString_AS_STRING(self->data) + pos * v1_hdrsize;
462 462 }
463 463
464 464 /*
465 465 * RevlogNG format (all in big endian, data may be inlined):
466 466 * 6 bytes: offset
467 467 * 2 bytes: flags
468 468 * 4 bytes: compressed length
469 469 * 4 bytes: uncompressed length
470 470 * 4 bytes: base revision
471 471 * 4 bytes: link revision
472 472 * 4 bytes: parent 1 revision
473 473 * 4 bytes: parent 2 revision
474 474 * 32 bytes: nodeid (only 20 bytes used)
475 475 */
476 476 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
477 477 {
478 478 uint64_t offset_flags;
479 479 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
480 480 const char *c_node_id;
481 481 const char *data;
482 482 Py_ssize_t length = index_length(self);
483 483 PyObject *entry;
484 484
485 485 if (pos < 0)
486 486 pos += length;
487 487
488 488 if (pos < 0 || pos >= length) {
489 489 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
490 490 return NULL;
491 491 }
492 492
493 493 if (pos == length - 1) {
494 494 Py_INCREF(nullentry);
495 495 return nullentry;
496 496 }
497 497
498 498 if (pos >= self->length - 1) {
499 499 PyObject *obj;
500 500 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
501 501 Py_INCREF(obj);
502 502 return obj;
503 503 }
504 504
505 505 if (self->cache) {
506 506 if (self->cache[pos]) {
507 507 Py_INCREF(self->cache[pos]);
508 508 return self->cache[pos];
509 509 }
510 510 } else {
511 511 self->cache = calloc(self->raw_length, sizeof(PyObject *));
512 512 if (self->cache == NULL)
513 513 return PyErr_NoMemory();
514 514 }
515 515
516 516 data = index_deref(self, pos);
517 517 if (data == NULL)
518 518 return NULL;
519 519
520 520 offset_flags = getbe32(data + 4);
521 521 if (pos == 0) /* mask out version number for the first entry */
522 522 offset_flags &= 0xFFFF;
523 523 else {
524 524 uint32_t offset_high = getbe32(data);
525 525 offset_flags |= ((uint64_t)offset_high) << 32;
526 526 }
527 527
528 528 comp_len = getbe32(data + 8);
529 529 uncomp_len = getbe32(data + 12);
530 530 base_rev = getbe32(data + 16);
531 531 link_rev = getbe32(data + 20);
532 532 parent_1 = getbe32(data + 24);
533 533 parent_2 = getbe32(data + 28);
534 534 c_node_id = data + 32;
535 535
536 536 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
537 537 uncomp_len, base_rev, link_rev,
538 538 parent_1, parent_2, c_node_id, 20);
539 539
540 540 if (entry) {
541 541 PyObject_GC_UnTrack(entry);
542 542 Py_INCREF(entry);
543 543 }
544 544
545 545 self->cache[pos] = entry;
546 546
547 547 return entry;
548 548 }
549 549
550 550 /*
551 551 * Return the 20-byte SHA of the node corresponding to the given rev.
552 552 */
553 553 static const char *index_node(indexObject *self, Py_ssize_t pos)
554 554 {
555 555 Py_ssize_t length = index_length(self);
556 556 const char *data;
557 557
558 558 if (pos == length - 1 || pos == INT_MAX)
559 559 return nullid;
560 560
561 561 if (pos >= length)
562 562 return NULL;
563 563
564 564 if (pos >= self->length - 1) {
565 565 PyObject *tuple, *str;
566 566 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
567 567 str = PyTuple_GetItem(tuple, 7);
568 568 return str ? PyString_AS_STRING(str) : NULL;
569 569 }
570 570
571 571 data = index_deref(self, pos);
572 572 return data ? data + 32 : NULL;
573 573 }
574 574
575 575 static int nt_insert(indexObject *self, const char *node, int rev);
576 576
577 577 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
578 578 {
579 579 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
580 580 return -1;
581 581 if (*nodelen == 20)
582 582 return 0;
583 583 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
584 584 return -1;
585 585 }
586 586
587 587 static PyObject *index_insert(indexObject *self, PyObject *args)
588 588 {
589 589 PyObject *obj;
590 590 char *node;
591 591 long offset;
592 592 Py_ssize_t len, nodelen;
593 593
594 594 if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
595 595 return NULL;
596 596
597 597 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
598 598 PyErr_SetString(PyExc_TypeError, "8-tuple required");
599 599 return NULL;
600 600 }
601 601
602 602 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
603 603 return NULL;
604 604
605 605 len = index_length(self);
606 606
607 607 if (offset < 0)
608 608 offset += len;
609 609
610 610 if (offset != len - 1) {
611 611 PyErr_SetString(PyExc_IndexError,
612 612 "insert only supported at index -1");
613 613 return NULL;
614 614 }
615 615
616 616 if (offset > INT_MAX) {
617 617 PyErr_SetString(PyExc_ValueError,
618 618 "currently only 2**31 revs supported");
619 619 return NULL;
620 620 }
621 621
622 622 if (self->added == NULL) {
623 623 self->added = PyList_New(0);
624 624 if (self->added == NULL)
625 625 return NULL;
626 626 }
627 627
628 628 if (PyList_Append(self->added, obj) == -1)
629 629 return NULL;
630 630
631 631 if (self->nt)
632 632 nt_insert(self, node, (int)offset);
633 633
634 634 Py_CLEAR(self->headrevs);
635 635 Py_RETURN_NONE;
636 636 }
637 637
638 638 static void _index_clearcaches(indexObject *self)
639 639 {
640 640 if (self->cache) {
641 641 Py_ssize_t i;
642 642
643 643 for (i = 0; i < self->raw_length; i++)
644 644 Py_CLEAR(self->cache[i]);
645 645 free(self->cache);
646 646 self->cache = NULL;
647 647 }
648 648 if (self->offsets) {
649 649 free(self->offsets);
650 650 self->offsets = NULL;
651 651 }
652 652 if (self->nt) {
653 653 free(self->nt);
654 654 self->nt = NULL;
655 655 }
656 656 Py_CLEAR(self->headrevs);
657 657 }
658 658
659 659 static PyObject *index_clearcaches(indexObject *self)
660 660 {
661 661 _index_clearcaches(self);
662 662 self->ntlength = self->ntcapacity = 0;
663 663 self->ntdepth = self->ntsplits = 0;
664 664 self->ntrev = -1;
665 665 self->ntlookups = self->ntmisses = 0;
666 666 Py_RETURN_NONE;
667 667 }
668 668
669 669 static PyObject *index_stats(indexObject *self)
670 670 {
671 671 PyObject *obj = PyDict_New();
672 672
673 673 if (obj == NULL)
674 674 return NULL;
675 675
676 676 #define istat(__n, __d) \
677 677 if (PyDict_SetItemString(obj, __d, PyInt_FromSsize_t(self->__n)) == -1) \
678 678 goto bail;
679 679
680 680 if (self->added) {
681 681 Py_ssize_t len = PyList_GET_SIZE(self->added);
682 682 if (PyDict_SetItemString(obj, "index entries added",
683 683 PyInt_FromSsize_t(len)) == -1)
684 684 goto bail;
685 685 }
686 686
687 687 if (self->raw_length != self->length - 1)
688 688 istat(raw_length, "revs on disk");
689 689 istat(length, "revs in memory");
690 690 istat(ntcapacity, "node trie capacity");
691 691 istat(ntdepth, "node trie depth");
692 692 istat(ntlength, "node trie count");
693 693 istat(ntlookups, "node trie lookups");
694 694 istat(ntmisses, "node trie misses");
695 695 istat(ntrev, "node trie last rev scanned");
696 696 istat(ntsplits, "node trie splits");
697 697
698 698 #undef istat
699 699
700 700 return obj;
701 701
702 702 bail:
703 703 Py_XDECREF(obj);
704 704 return NULL;
705 705 }
706 706
707 707 /*
708 708 * When we cache a list, we want to be sure the caller can't mutate
709 709 * the cached copy.
710 710 */
711 711 static PyObject *list_copy(PyObject *list)
712 712 {
713 713 Py_ssize_t len = PyList_GET_SIZE(list);
714 714 PyObject *newlist = PyList_New(len);
715 715 Py_ssize_t i;
716 716
717 717 if (newlist == NULL)
718 718 return NULL;
719 719
720 720 for (i = 0; i < len; i++) {
721 721 PyObject *obj = PyList_GET_ITEM(list, i);
722 722 Py_INCREF(obj);
723 723 PyList_SET_ITEM(newlist, i, obj);
724 724 }
725 725
726 726 return newlist;
727 727 }
728 728
729 729 static PyObject *index_headrevs(indexObject *self)
730 730 {
731 731 Py_ssize_t i, len, addlen;
732 732 char *nothead = NULL;
733 733 PyObject *heads;
734 734
735 735 if (self->headrevs)
736 736 return list_copy(self->headrevs);
737 737
738 738 len = index_length(self) - 1;
739 739 heads = PyList_New(0);
740 740 if (heads == NULL)
741 741 goto bail;
742 742 if (len == 0) {
743 743 PyObject *nullid = PyInt_FromLong(-1);
744 744 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
745 745 Py_XDECREF(nullid);
746 746 goto bail;
747 747 }
748 748 goto done;
749 749 }
750 750
751 751 nothead = calloc(len, 1);
752 752 if (nothead == NULL)
753 753 goto bail;
754 754
755 755 for (i = 0; i < self->raw_length; i++) {
756 756 const char *data = index_deref(self, i);
757 757 int parent_1 = getbe32(data + 24);
758 758 int parent_2 = getbe32(data + 28);
759 759 if (parent_1 >= 0)
760 760 nothead[parent_1] = 1;
761 761 if (parent_2 >= 0)
762 762 nothead[parent_2] = 1;
763 763 }
764 764
765 765 addlen = self->added ? PyList_GET_SIZE(self->added) : 0;
766 766
767 767 for (i = 0; i < addlen; i++) {
768 768 PyObject *rev = PyList_GET_ITEM(self->added, i);
769 769 PyObject *p1 = PyTuple_GET_ITEM(rev, 5);
770 770 PyObject *p2 = PyTuple_GET_ITEM(rev, 6);
771 771 long parent_1, parent_2;
772 772
773 773 if (!PyInt_Check(p1) || !PyInt_Check(p2)) {
774 774 PyErr_SetString(PyExc_TypeError,
775 775 "revlog parents are invalid");
776 776 goto bail;
777 777 }
778 778 parent_1 = PyInt_AS_LONG(p1);
779 779 parent_2 = PyInt_AS_LONG(p2);
780 780 if (parent_1 >= 0)
781 781 nothead[parent_1] = 1;
782 782 if (parent_2 >= 0)
783 783 nothead[parent_2] = 1;
784 784 }
785 785
786 786 for (i = 0; i < len; i++) {
787 787 PyObject *head;
788 788
789 789 if (nothead[i])
790 790 continue;
791 791 head = PyInt_FromLong(i);
792 792 if (head == NULL || PyList_Append(heads, head) == -1) {
793 793 Py_XDECREF(head);
794 794 goto bail;
795 795 }
796 796 }
797 797
798 798 done:
799 799 self->headrevs = heads;
800 800 free(nothead);
801 801 return list_copy(self->headrevs);
802 802 bail:
803 803 Py_XDECREF(heads);
804 804 free(nothead);
805 805 return NULL;
806 806 }
807 807
808 808 static inline int nt_level(const char *node, Py_ssize_t level)
809 809 {
810 810 int v = node[level>>1];
811 811 if (!(level & 1))
812 812 v >>= 4;
813 813 return v & 0xf;
814 814 }
815 815
816 816 /*
817 817 * Return values:
818 818 *
819 819 * -4: match is ambiguous (multiple candidates)
820 820 * -2: not found
821 821 * rest: valid rev
822 822 */
823 823 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
824 824 int hex)
825 825 {
826 826 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
827 827 int level, maxlevel, off;
828 828
829 829 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
830 830 return -1;
831 831
832 832 if (self->nt == NULL)
833 833 return -2;
834 834
835 835 if (hex)
836 836 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
837 837 else
838 838 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
839 839
840 840 for (level = off = 0; level < maxlevel; level++) {
841 841 int k = getnybble(node, level);
842 842 nodetree *n = &self->nt[off];
843 843 int v = n->children[k];
844 844
845 845 if (v < 0) {
846 846 const char *n;
847 847 Py_ssize_t i;
848 848
849 849 v = -v - 1;
850 850 n = index_node(self, v);
851 851 if (n == NULL)
852 852 return -2;
853 853 for (i = level; i < maxlevel; i++)
854 854 if (getnybble(node, i) != nt_level(n, i))
855 855 return -2;
856 856 return v;
857 857 }
858 858 if (v == 0)
859 859 return -2;
860 860 off = v;
861 861 }
862 862 /* multiple matches against an ambiguous prefix */
863 863 return -4;
864 864 }
865 865
866 866 static int nt_new(indexObject *self)
867 867 {
868 868 if (self->ntlength == self->ntcapacity) {
869 869 self->ntcapacity *= 2;
870 870 self->nt = realloc(self->nt,
871 871 self->ntcapacity * sizeof(nodetree));
872 872 if (self->nt == NULL) {
873 873 PyErr_SetString(PyExc_MemoryError, "out of memory");
874 874 return -1;
875 875 }
876 876 memset(&self->nt[self->ntlength], 0,
877 877 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
878 878 }
879 879 return self->ntlength++;
880 880 }
881 881
882 882 static int nt_insert(indexObject *self, const char *node, int rev)
883 883 {
884 884 int level = 0;
885 885 int off = 0;
886 886
887 887 while (level < 40) {
888 888 int k = nt_level(node, level);
889 889 nodetree *n;
890 890 int v;
891 891
892 892 n = &self->nt[off];
893 893 v = n->children[k];
894 894
895 895 if (v == 0) {
896 896 n->children[k] = -rev - 1;
897 897 return 0;
898 898 }
899 899 if (v < 0) {
900 900 const char *oldnode = index_node(self, -v - 1);
901 901 int noff;
902 902
903 903 if (!oldnode || !memcmp(oldnode, node, 20)) {
904 904 n->children[k] = -rev - 1;
905 905 return 0;
906 906 }
907 907 noff = nt_new(self);
908 908 if (noff == -1)
909 909 return -1;
910 910 /* self->nt may have been changed by realloc */
911 911 self->nt[off].children[k] = noff;
912 912 off = noff;
913 913 n = &self->nt[off];
914 914 n->children[nt_level(oldnode, ++level)] = v;
915 915 if (level > self->ntdepth)
916 916 self->ntdepth = level;
917 917 self->ntsplits += 1;
918 918 } else {
919 919 level += 1;
920 920 off = v;
921 921 }
922 922 }
923 923
924 924 return -1;
925 925 }
926 926
927 927 static int nt_init(indexObject *self)
928 928 {
929 929 if (self->nt == NULL) {
930 930 if (self->raw_length > INT_MAX) {
931 931 PyErr_SetString(PyExc_ValueError, "overflow in nt_init");
932 932 return -1;
933 933 }
934 934 self->ntcapacity = self->raw_length < 4
935 935 ? 4 : (int)self->raw_length / 2;
936 936
937 937 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
938 938 if (self->nt == NULL) {
939 939 PyErr_NoMemory();
940 940 return -1;
941 941 }
942 942 self->ntlength = 1;
943 943 self->ntrev = (int)index_length(self) - 1;
944 944 self->ntlookups = 1;
945 945 self->ntmisses = 0;
946 946 if (nt_insert(self, nullid, INT_MAX) == -1)
947 947 return -1;
948 948 }
949 949 return 0;
950 950 }
951 951
952 952 /*
953 953 * Return values:
954 954 *
955 955 * -3: error (exception set)
956 956 * -2: not found (no exception set)
957 957 * rest: valid rev
958 958 */
959 959 static int index_find_node(indexObject *self,
960 960 const char *node, Py_ssize_t nodelen)
961 961 {
962 962 int rev;
963 963
964 964 self->ntlookups++;
965 965 rev = nt_find(self, node, nodelen, 0);
966 966 if (rev >= -1)
967 967 return rev;
968 968
969 969 if (nt_init(self) == -1)
970 970 return -3;
971 971
972 972 /*
973 973 * For the first handful of lookups, we scan the entire index,
974 974 * and cache only the matching nodes. This optimizes for cases
975 975 * like "hg tip", where only a few nodes are accessed.
976 976 *
977 977 * After that, we cache every node we visit, using a single
978 978 * scan amortized over multiple lookups. This gives the best
979 979 * bulk performance, e.g. for "hg log".
980 980 */
981 981 if (self->ntmisses++ < 4) {
982 982 for (rev = self->ntrev - 1; rev >= 0; rev--) {
983 983 const char *n = index_node(self, rev);
984 984 if (n == NULL)
985 985 return -2;
986 986 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
987 987 if (nt_insert(self, n, rev) == -1)
988 988 return -3;
989 989 break;
990 990 }
991 991 }
992 992 } else {
993 993 for (rev = self->ntrev - 1; rev >= 0; rev--) {
994 994 const char *n = index_node(self, rev);
995 995 if (n == NULL) {
996 996 self->ntrev = rev + 1;
997 997 return -2;
998 998 }
999 999 if (nt_insert(self, n, rev) == -1) {
1000 1000 self->ntrev = rev + 1;
1001 1001 return -3;
1002 1002 }
1003 1003 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1004 1004 break;
1005 1005 }
1006 1006 }
1007 1007 self->ntrev = rev;
1008 1008 }
1009 1009
1010 1010 if (rev >= 0)
1011 1011 return rev;
1012 1012 return -2;
1013 1013 }
1014 1014
1015 1015 static PyObject *raise_revlog_error(void)
1016 1016 {
1017 1017 static PyObject *errclass;
1018 1018 PyObject *mod = NULL, *errobj;
1019 1019
1020 1020 if (errclass == NULL) {
1021 1021 PyObject *dict;
1022 1022
1023 1023 mod = PyImport_ImportModule("mercurial.error");
1024 1024 if (mod == NULL)
1025 1025 goto classfail;
1026 1026
1027 1027 dict = PyModule_GetDict(mod);
1028 1028 if (dict == NULL)
1029 1029 goto classfail;
1030 1030
1031 1031 errclass = PyDict_GetItemString(dict, "RevlogError");
1032 1032 if (errclass == NULL) {
1033 1033 PyErr_SetString(PyExc_SystemError,
1034 1034 "could not find RevlogError");
1035 1035 goto classfail;
1036 1036 }
1037 1037 Py_INCREF(errclass);
1038 1038 }
1039 1039
1040 1040 errobj = PyObject_CallFunction(errclass, NULL);
1041 1041 if (errobj == NULL)
1042 1042 return NULL;
1043 1043 PyErr_SetObject(errclass, errobj);
1044 1044 return errobj;
1045 1045
1046 1046 classfail:
1047 1047 Py_XDECREF(mod);
1048 1048 return NULL;
1049 1049 }
1050 1050
1051 1051 static PyObject *index_getitem(indexObject *self, PyObject *value)
1052 1052 {
1053 1053 char *node;
1054 1054 Py_ssize_t nodelen;
1055 1055 int rev;
1056 1056
1057 1057 if (PyInt_Check(value))
1058 1058 return index_get(self, PyInt_AS_LONG(value));
1059 1059
1060 1060 if (node_check(value, &node, &nodelen) == -1)
1061 1061 return NULL;
1062 1062 rev = index_find_node(self, node, nodelen);
1063 1063 if (rev >= -1)
1064 1064 return PyInt_FromLong(rev);
1065 1065 if (rev == -2)
1066 1066 raise_revlog_error();
1067 1067 return NULL;
1068 1068 }
1069 1069
1070 1070 static int nt_partialmatch(indexObject *self, const char *node,
1071 1071 Py_ssize_t nodelen)
1072 1072 {
1073 1073 int rev;
1074 1074
1075 1075 if (nt_init(self) == -1)
1076 1076 return -3;
1077 1077
1078 1078 if (self->ntrev > 0) {
1079 1079 /* ensure that the radix tree is fully populated */
1080 1080 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1081 1081 const char *n = index_node(self, rev);
1082 1082 if (n == NULL)
1083 1083 return -2;
1084 1084 if (nt_insert(self, n, rev) == -1)
1085 1085 return -3;
1086 1086 }
1087 1087 self->ntrev = rev;
1088 1088 }
1089 1089
1090 1090 return nt_find(self, node, nodelen, 1);
1091 1091 }
1092 1092
1093 1093 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1094 1094 {
1095 1095 const char *fullnode;
1096 1096 int nodelen;
1097 1097 char *node;
1098 1098 int rev, i;
1099 1099
1100 1100 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
1101 1101 return NULL;
1102 1102
1103 1103 if (nodelen < 4) {
1104 1104 PyErr_SetString(PyExc_ValueError, "key too short");
1105 1105 return NULL;
1106 1106 }
1107 1107
1108 1108 if (nodelen > 40) {
1109 1109 PyErr_SetString(PyExc_ValueError, "key too long");
1110 1110 return NULL;
1111 1111 }
1112 1112
1113 1113 for (i = 0; i < nodelen; i++)
1114 1114 hexdigit(node, i);
1115 1115 if (PyErr_Occurred()) {
1116 1116 /* input contains non-hex characters */
1117 1117 PyErr_Clear();
1118 1118 Py_RETURN_NONE;
1119 1119 }
1120 1120
1121 1121 rev = nt_partialmatch(self, node, nodelen);
1122 1122
1123 1123 switch (rev) {
1124 1124 case -4:
1125 1125 raise_revlog_error();
1126 1126 case -3:
1127 1127 return NULL;
1128 1128 case -2:
1129 1129 Py_RETURN_NONE;
1130 1130 case -1:
1131 1131 return PyString_FromStringAndSize(nullid, 20);
1132 1132 }
1133 1133
1134 1134 fullnode = index_node(self, rev);
1135 1135 if (fullnode == NULL) {
1136 1136 PyErr_Format(PyExc_IndexError,
1137 1137 "could not access rev %d", rev);
1138 1138 return NULL;
1139 1139 }
1140 1140 return PyString_FromStringAndSize(fullnode, 20);
1141 1141 }
1142 1142
1143 1143 static PyObject *index_m_get(indexObject *self, PyObject *args)
1144 1144 {
1145 1145 Py_ssize_t nodelen;
1146 1146 PyObject *val;
1147 1147 char *node;
1148 1148 int rev;
1149 1149
1150 1150 if (!PyArg_ParseTuple(args, "O", &val))
1151 1151 return NULL;
1152 1152 if (node_check(val, &node, &nodelen) == -1)
1153 1153 return NULL;
1154 1154 rev = index_find_node(self, node, nodelen);
1155 1155 if (rev == -3)
1156 1156 return NULL;
1157 1157 if (rev == -2)
1158 1158 Py_RETURN_NONE;
1159 1159 return PyInt_FromLong(rev);
1160 1160 }
1161 1161
1162 1162 static int index_contains(indexObject *self, PyObject *value)
1163 1163 {
1164 1164 char *node;
1165 1165 Py_ssize_t nodelen;
1166 1166
1167 1167 if (PyInt_Check(value)) {
1168 1168 long rev = PyInt_AS_LONG(value);
1169 1169 return rev >= -1 && rev < index_length(self);
1170 1170 }
1171 1171
1172 1172 if (node_check(value, &node, &nodelen) == -1)
1173 1173 return -1;
1174 1174
1175 1175 switch (index_find_node(self, node, nodelen)) {
1176 1176 case -3:
1177 1177 return -1;
1178 1178 case -2:
1179 1179 return 0;
1180 1180 default:
1181 1181 return 1;
1182 1182 }
1183 1183 }
1184 1184
1185 1185 static inline void index_get_parents(indexObject *self, int rev, int *ps)
1186 1186 {
1187 1187 if (rev >= self->length - 1) {
1188 1188 PyObject *tuple = PyList_GET_ITEM(self->added,
1189 1189 rev - self->length + 1);
1190 1190 ps[0] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 5));
1191 1191 ps[1] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 6));
1192 1192 } else {
1193 1193 const char *data = index_deref(self, rev);
1194 1194 ps[0] = getbe32(data + 24);
1195 1195 ps[1] = getbe32(data + 28);
1196 1196 }
1197 1197 }
1198 1198
1199 1199 typedef uint64_t bitmask;
1200 1200
1201 1201 /*
1202 1202 * Given a disjoint set of revs, return all candidates for the
1203 1203 * greatest common ancestor. In revset notation, this is the set
1204 1204 * "heads(::a and ::b and ...)"
1205 1205 */
1206 1206 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
1207 1207 int revcount)
1208 1208 {
1209 1209 const bitmask allseen = (1ull << revcount) - 1;
1210 1210 const bitmask poison = 1ull << revcount;
1211 1211 PyObject *gca = PyList_New(0);
1212 1212 int i, v, interesting, left;
1213 1213 int maxrev = -1;
1214 1214 long sp;
1215 1215 bitmask *seen;
1216 1216
1217 1217 if (gca == NULL)
1218 1218 return PyErr_NoMemory();
1219 1219
1220 1220 for (i = 0; i < revcount; i++) {
1221 1221 if (revs[i] > maxrev)
1222 1222 maxrev = revs[i];
1223 1223 }
1224 1224
1225 1225 seen = calloc(sizeof(*seen), maxrev + 1);
1226 1226 if (seen == NULL) {
1227 1227 Py_DECREF(gca);
1228 1228 return PyErr_NoMemory();
1229 1229 }
1230 1230
1231 1231 for (i = 0; i < revcount; i++)
1232 1232 seen[revs[i]] = 1ull << i;
1233 1233
1234 1234 interesting = left = revcount;
1235 1235
1236 1236 for (v = maxrev; v >= 0 && interesting; v--) {
1237 1237 long sv = seen[v];
1238 1238 int parents[2];
1239 1239
1240 1240 if (!sv)
1241 1241 continue;
1242 1242
1243 1243 if (sv < poison) {
1244 1244 interesting -= 1;
1245 1245 if (sv == allseen) {
1246 1246 PyObject *obj = PyInt_FromLong(v);
1247 1247 if (obj == NULL)
1248 1248 goto bail;
1249 1249 if (PyList_Append(gca, obj) == -1) {
1250 1250 Py_DECREF(obj);
1251 1251 goto bail;
1252 1252 }
1253 1253 sv |= poison;
1254 1254 for (i = 0; i < revcount; i++) {
1255 1255 if (revs[i] == v) {
1256 1256 if (--left <= 1)
1257 1257 goto done;
1258 1258 break;
1259 1259 }
1260 1260 }
1261 1261 }
1262 1262 }
1263 1263 index_get_parents(self, v, parents);
1264 1264
1265 1265 for (i = 0; i < 2; i++) {
1266 1266 int p = parents[i];
1267 1267 if (p == -1)
1268 1268 continue;
1269 1269 sp = seen[p];
1270 1270 if (sv < poison) {
1271 1271 if (sp == 0) {
1272 1272 seen[p] = sv;
1273 1273 interesting++;
1274 1274 }
1275 1275 else if (sp != sv)
1276 1276 seen[p] |= sv;
1277 1277 } else {
1278 1278 if (sp && sp < poison)
1279 1279 interesting--;
1280 1280 seen[p] = sv;
1281 1281 }
1282 1282 }
1283 1283 }
1284 1284
1285 1285 done:
1286 1286 free(seen);
1287 1287 return gca;
1288 1288 bail:
1289 1289 free(seen);
1290 1290 Py_XDECREF(gca);
1291 1291 return NULL;
1292 1292 }
1293 1293
1294 1294 /*
1295 1295 * Given a disjoint set of revs, return the subset with the longest
1296 1296 * path to the root.
1297 1297 */
1298 1298 static PyObject *find_deepest(indexObject *self, PyObject *revs)
1299 1299 {
1300 1300 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
1301 1301 static const Py_ssize_t capacity = 24;
1302 1302 int *depth, *interesting = NULL;
1303 1303 int i, j, v, ninteresting;
1304 1304 PyObject *dict = NULL, *keys;
1305 1305 long *seen = NULL;
1306 1306 int maxrev = -1;
1307 1307 long final;
1308 1308
1309 1309 if (revcount > capacity) {
1310 1310 PyErr_Format(PyExc_OverflowError,
1311 1311 "bitset size (%ld) > capacity (%ld)",
1312 1312 (long)revcount, (long)capacity);
1313 1313 return NULL;
1314 1314 }
1315 1315
1316 1316 for (i = 0; i < revcount; i++) {
1317 1317 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1318 1318 if (n > maxrev)
1319 1319 maxrev = n;
1320 1320 }
1321 1321
1322 1322 depth = calloc(sizeof(*depth), maxrev + 1);
1323 1323 if (depth == NULL)
1324 1324 return PyErr_NoMemory();
1325 1325
1326 1326 seen = calloc(sizeof(*seen), maxrev + 1);
1327 1327 if (seen == NULL) {
1328 1328 PyErr_NoMemory();
1329 1329 goto bail;
1330 1330 }
1331 1331
1332 1332 interesting = calloc(sizeof(*interesting), 2 << revcount);
1333 1333 if (interesting == NULL) {
1334 1334 PyErr_NoMemory();
1335 1335 goto bail;
1336 1336 }
1337 1337
1338 1338 if (PyList_Sort(revs) == -1)
1339 1339 goto bail;
1340 1340
1341 1341 for (i = 0; i < revcount; i++) {
1342 1342 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1343 1343 long b = 1l << i;
1344 1344 depth[n] = 1;
1345 1345 seen[n] = b;
1346 1346 interesting[b] = 1;
1347 1347 }
1348 1348
1349 1349 ninteresting = (int)revcount;
1350 1350
1351 1351 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
1352 1352 int dv = depth[v];
1353 1353 int parents[2];
1354 1354 long sv;
1355 1355
1356 1356 if (dv == 0)
1357 1357 continue;
1358 1358
1359 1359 sv = seen[v];
1360 1360 index_get_parents(self, v, parents);
1361 1361
1362 1362 for (i = 0; i < 2; i++) {
1363 1363 int p = parents[i];
1364 1364 long nsp, sp;
1365 1365 int dp;
1366 1366
1367 1367 if (p == -1)
1368 1368 continue;
1369 1369
1370 1370 dp = depth[p];
1371 1371 nsp = sp = seen[p];
1372 1372 if (dp <= dv) {
1373 1373 depth[p] = dv + 1;
1374 1374 if (sp != sv) {
1375 1375 interesting[sv] += 1;
1376 1376 nsp = seen[p] = sv;
1377 1377 if (sp) {
1378 1378 interesting[sp] -= 1;
1379 1379 if (interesting[sp] == 0)
1380 1380 ninteresting -= 1;
1381 1381 }
1382 1382 }
1383 1383 }
1384 1384 else if (dv == dp - 1) {
1385 1385 nsp = sp | sv;
1386 1386 if (nsp == sp)
1387 1387 continue;
1388 1388 seen[p] = nsp;
1389 1389 interesting[sp] -= 1;
1390 1390 if (interesting[sp] == 0 && interesting[nsp] > 0)
1391 1391 ninteresting -= 1;
1392 1392 interesting[nsp] += 1;
1393 1393 }
1394 1394 }
1395 1395 interesting[sv] -= 1;
1396 1396 if (interesting[sv] == 0)
1397 1397 ninteresting -= 1;
1398 1398 }
1399 1399
1400 1400 final = 0;
1401 1401 j = ninteresting;
1402 1402 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
1403 1403 if (interesting[i] == 0)
1404 1404 continue;
1405 1405 final |= i;
1406 1406 j -= 1;
1407 1407 }
1408 1408 if (final == 0)
1409 1409 return PyList_New(0);
1410 1410
1411 1411 dict = PyDict_New();
1412 1412 if (dict == NULL)
1413 1413 goto bail;
1414 1414
1415 1415 for (i = 0; i < revcount; i++) {
1416 1416 PyObject *key;
1417 1417
1418 1418 if ((final & (1 << i)) == 0)
1419 1419 continue;
1420 1420
1421 1421 key = PyList_GET_ITEM(revs, i);
1422 1422 Py_INCREF(key);
1423 1423 Py_INCREF(Py_None);
1424 1424 if (PyDict_SetItem(dict, key, Py_None) == -1) {
1425 1425 Py_DECREF(key);
1426 1426 Py_DECREF(Py_None);
1427 1427 goto bail;
1428 1428 }
1429 1429 }
1430 1430
1431 1431 keys = PyDict_Keys(dict);
1432 1432
1433 1433 free(depth);
1434 1434 free(seen);
1435 1435 free(interesting);
1436 1436 Py_DECREF(dict);
1437 1437
1438 1438 return keys;
1439 1439 bail:
1440 1440 free(depth);
1441 1441 free(seen);
1442 1442 free(interesting);
1443 1443 Py_XDECREF(dict);
1444 1444
1445 1445 return NULL;
1446 1446 }
1447 1447
1448 1448 /*
1449 1449 * Given a (possibly overlapping) set of revs, return the greatest
1450 1450 * common ancestors: those with the longest path to the root.
1451 1451 */
1452 1452 static PyObject *index_ancestors(indexObject *self, PyObject *args)
1453 1453 {
1454 1454 PyObject *ret = NULL, *gca = NULL;
1455 1455 Py_ssize_t argcount, i, len;
1456 1456 bitmask repeat = 0;
1457 1457 int revcount = 0;
1458 1458 int *revs;
1459 1459
1460 1460 argcount = PySequence_Length(args);
1461 1461 revs = malloc(argcount * sizeof(*revs));
1462 1462 if (argcount > 0 && revs == NULL)
1463 1463 return PyErr_NoMemory();
1464 1464 len = index_length(self) - 1;
1465 1465
1466 1466 for (i = 0; i < argcount; i++) {
1467 1467 static const int capacity = 24;
1468 1468 PyObject *obj = PySequence_GetItem(args, i);
1469 1469 bitmask x;
1470 1470 long val;
1471 1471
1472 1472 if (!PyInt_Check(obj)) {
1473 1473 PyErr_SetString(PyExc_TypeError,
1474 1474 "arguments must all be ints");
1475 1475 goto bail;
1476 1476 }
1477 1477 val = PyInt_AsLong(obj);
1478 1478 if (val == -1) {
1479 1479 ret = PyList_New(0);
1480 1480 goto done;
1481 1481 }
1482 1482 if (val < 0 || val >= len) {
1483 1483 PyErr_SetString(PyExc_IndexError,
1484 1484 "index out of range");
1485 1485 goto bail;
1486 1486 }
1487 1487 /* this cheesy bloom filter lets us avoid some more
1488 1488 * expensive duplicate checks in the common set-is-disjoint
1489 1489 * case */
1490 1490 x = 1ull << (val & 0x3f);
1491 1491 if (repeat & x) {
1492 1492 int k;
1493 1493 for (k = 0; k < revcount; k++) {
1494 1494 if (val == revs[k])
1495 1495 goto duplicate;
1496 1496 }
1497 1497 }
1498 1498 else repeat |= x;
1499 1499 if (revcount >= capacity) {
1500 1500 PyErr_Format(PyExc_OverflowError,
1501 1501 "bitset size (%d) > capacity (%d)",
1502 1502 revcount, capacity);
1503 1503 goto bail;
1504 1504 }
1505 1505 revs[revcount++] = (int)val;
1506 1506 duplicate:;
1507 1507 }
1508 1508
1509 1509 if (revcount == 0) {
1510 1510 ret = PyList_New(0);
1511 1511 goto done;
1512 1512 }
1513 1513 if (revcount == 1) {
1514 1514 PyObject *obj;
1515 1515 ret = PyList_New(1);
1516 1516 if (ret == NULL)
1517 1517 goto bail;
1518 1518 obj = PyInt_FromLong(revs[0]);
1519 1519 if (obj == NULL)
1520 1520 goto bail;
1521 1521 PyList_SET_ITEM(ret, 0, obj);
1522 1522 goto done;
1523 1523 }
1524 1524
1525 1525 gca = find_gca_candidates(self, revs, revcount);
1526 1526 if (gca == NULL)
1527 1527 goto bail;
1528 1528
1529 1529 if (PyList_GET_SIZE(gca) <= 1) {
1530 1530 ret = gca;
1531 1531 Py_INCREF(gca);
1532 1532 }
1533 1533 else if (PyList_GET_SIZE(gca) == 1) {
1534 1534 ret = PyList_GET_ITEM(gca, 0);
1535 1535 Py_INCREF(ret);
1536 1536 }
1537 1537 else ret = find_deepest(self, gca);
1538 1538
1539 1539 done:
1540 1540 free(revs);
1541 1541 Py_XDECREF(gca);
1542 1542
1543 1543 return ret;
1544 1544
1545 1545 bail:
1546 1546 free(revs);
1547 1547 Py_XDECREF(gca);
1548 1548 Py_XDECREF(ret);
1549 1549 return NULL;
1550 1550 }
1551 1551
1552 1552 /*
1553 1553 * Invalidate any trie entries introduced by added revs.
1554 1554 */
1555 1555 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
1556 1556 {
1557 1557 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
1558 1558
1559 1559 for (i = start; i < len; i++) {
1560 1560 PyObject *tuple = PyList_GET_ITEM(self->added, i);
1561 1561 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
1562 1562
1563 1563 nt_insert(self, PyString_AS_STRING(node), -1);
1564 1564 }
1565 1565
1566 1566 if (start == 0)
1567 1567 Py_CLEAR(self->added);
1568 1568 }
1569 1569
1570 1570 /*
1571 1571 * Delete a numeric range of revs, which must be at the end of the
1572 1572 * range, but exclude the sentinel nullid entry.
1573 1573 */
1574 1574 static int index_slice_del(indexObject *self, PyObject *item)
1575 1575 {
1576 1576 Py_ssize_t start, stop, step, slicelength;
1577 1577 Py_ssize_t length = index_length(self);
1578 1578 int ret = 0;
1579 1579
1580 1580 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
1581 1581 &start, &stop, &step, &slicelength) < 0)
1582 1582 return -1;
1583 1583
1584 1584 if (slicelength <= 0)
1585 1585 return 0;
1586 1586
1587 1587 if ((step < 0 && start < stop) || (step > 0 && start > stop))
1588 1588 stop = start;
1589 1589
1590 1590 if (step < 0) {
1591 1591 stop = start + 1;
1592 1592 start = stop + step*(slicelength - 1) - 1;
1593 1593 step = -step;
1594 1594 }
1595 1595
1596 1596 if (step != 1) {
1597 1597 PyErr_SetString(PyExc_ValueError,
1598 1598 "revlog index delete requires step size of 1");
1599 1599 return -1;
1600 1600 }
1601 1601
1602 1602 if (stop != length - 1) {
1603 1603 PyErr_SetString(PyExc_IndexError,
1604 1604 "revlog index deletion indices are invalid");
1605 1605 return -1;
1606 1606 }
1607 1607
1608 1608 if (start < self->length - 1) {
1609 1609 if (self->nt) {
1610 1610 Py_ssize_t i;
1611 1611
1612 1612 for (i = start + 1; i < self->length - 1; i++) {
1613 1613 const char *node = index_node(self, i);
1614 1614
1615 1615 if (node)
1616 1616 nt_insert(self, node, -1);
1617 1617 }
1618 1618 if (self->added)
1619 1619 nt_invalidate_added(self, 0);
1620 1620 if (self->ntrev > start)
1621 1621 self->ntrev = (int)start;
1622 1622 }
1623 1623 self->length = start + 1;
1624 1624 if (start < self->raw_length) {
1625 1625 if (self->cache) {
1626 1626 Py_ssize_t i;
1627 1627 for (i = start; i < self->raw_length; i++)
1628 1628 Py_CLEAR(self->cache[i]);
1629 1629 }
1630 1630 self->raw_length = start;
1631 1631 }
1632 1632 goto done;
1633 1633 }
1634 1634
1635 1635 if (self->nt) {
1636 1636 nt_invalidate_added(self, start - self->length + 1);
1637 1637 if (self->ntrev > start)
1638 1638 self->ntrev = (int)start;
1639 1639 }
1640 1640 if (self->added)
1641 1641 ret = PyList_SetSlice(self->added, start - self->length + 1,
1642 1642 PyList_GET_SIZE(self->added), NULL);
1643 1643 done:
1644 1644 Py_CLEAR(self->headrevs);
1645 1645 return ret;
1646 1646 }
1647 1647
1648 1648 /*
1649 1649 * Supported ops:
1650 1650 *
1651 1651 * slice deletion
1652 1652 * string assignment (extend node->rev mapping)
1653 1653 * string deletion (shrink node->rev mapping)
1654 1654 */
1655 1655 static int index_assign_subscript(indexObject *self, PyObject *item,
1656 1656 PyObject *value)
1657 1657 {
1658 1658 char *node;
1659 1659 Py_ssize_t nodelen;
1660 1660 long rev;
1661 1661
1662 1662 if (PySlice_Check(item) && value == NULL)
1663 1663 return index_slice_del(self, item);
1664 1664
1665 1665 if (node_check(item, &node, &nodelen) == -1)
1666 1666 return -1;
1667 1667
1668 1668 if (value == NULL)
1669 1669 return self->nt ? nt_insert(self, node, -1) : 0;
1670 1670 rev = PyInt_AsLong(value);
1671 1671 if (rev > INT_MAX || rev < 0) {
1672 1672 if (!PyErr_Occurred())
1673 1673 PyErr_SetString(PyExc_ValueError, "rev out of range");
1674 1674 return -1;
1675 1675 }
1676 1676 return nt_insert(self, node, (int)rev);
1677 1677 }
1678 1678
1679 1679 /*
1680 1680 * Find all RevlogNG entries in an index that has inline data. Update
1681 1681 * the optional "offsets" table with those entries.
1682 1682 */
1683 1683 static long inline_scan(indexObject *self, const char **offsets)
1684 1684 {
1685 1685 const char *data = PyString_AS_STRING(self->data);
1686 1686 const char *end = data + PyString_GET_SIZE(self->data);
1687 1687 long incr = v1_hdrsize;
1688 1688 Py_ssize_t len = 0;
1689 1689
1690 1690 while (data + v1_hdrsize <= end) {
1691 1691 uint32_t comp_len;
1692 1692 const char *old_data;
1693 1693 /* 3rd element of header is length of compressed inline data */
1694 1694 comp_len = getbe32(data + 8);
1695 1695 incr = v1_hdrsize + comp_len;
1696 1696 if (incr < v1_hdrsize)
1697 1697 break;
1698 1698 if (offsets)
1699 1699 offsets[len] = data;
1700 1700 len++;
1701 1701 old_data = data;
1702 1702 data += incr;
1703 1703 if (data <= old_data)
1704 1704 break;
1705 1705 }
1706 1706
1707 1707 if (data != end && data + v1_hdrsize != end) {
1708 1708 if (!PyErr_Occurred())
1709 1709 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1710 1710 return -1;
1711 1711 }
1712 1712
1713 1713 return len;
1714 1714 }
1715 1715
1716 1716 static int index_init(indexObject *self, PyObject *args)
1717 1717 {
1718 1718 PyObject *data_obj, *inlined_obj;
1719 1719 Py_ssize_t size;
1720 1720
1721 1721 /* Initialize before argument-checking to avoid index_dealloc() crash. */
1722 1722 self->raw_length = 0;
1723 1723 self->added = NULL;
1724 1724 self->cache = NULL;
1725 1725 self->data = NULL;
1726 1726 self->headrevs = NULL;
1727 1727 self->nt = NULL;
1728 1728 self->offsets = NULL;
1729 1729
1730 1730 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
1731 1731 return -1;
1732 1732 if (!PyString_Check(data_obj)) {
1733 1733 PyErr_SetString(PyExc_TypeError, "data is not a string");
1734 1734 return -1;
1735 1735 }
1736 1736 size = PyString_GET_SIZE(data_obj);
1737 1737
1738 1738 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
1739 1739 self->data = data_obj;
1740 1740
1741 1741 self->ntlength = self->ntcapacity = 0;
1742 1742 self->ntdepth = self->ntsplits = 0;
1743 1743 self->ntlookups = self->ntmisses = 0;
1744 1744 self->ntrev = -1;
1745 1745 Py_INCREF(self->data);
1746 1746
1747 1747 if (self->inlined) {
1748 1748 long len = inline_scan(self, NULL);
1749 1749 if (len == -1)
1750 1750 goto bail;
1751 1751 self->raw_length = len;
1752 1752 self->length = len + 1;
1753 1753 } else {
1754 1754 if (size % v1_hdrsize) {
1755 1755 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1756 1756 goto bail;
1757 1757 }
1758 1758 self->raw_length = size / v1_hdrsize;
1759 1759 self->length = self->raw_length + 1;
1760 1760 }
1761 1761
1762 1762 return 0;
1763 1763 bail:
1764 1764 return -1;
1765 1765 }
1766 1766
1767 1767 static PyObject *index_nodemap(indexObject *self)
1768 1768 {
1769 1769 Py_INCREF(self);
1770 1770 return (PyObject *)self;
1771 1771 }
1772 1772
1773 1773 static void index_dealloc(indexObject *self)
1774 1774 {
1775 1775 _index_clearcaches(self);
1776 1776 Py_XDECREF(self->data);
1777 1777 Py_XDECREF(self->added);
1778 1778 PyObject_Del(self);
1779 1779 }
1780 1780
1781 1781 static PySequenceMethods index_sequence_methods = {
1782 1782 (lenfunc)index_length, /* sq_length */
1783 1783 0, /* sq_concat */
1784 1784 0, /* sq_repeat */
1785 1785 (ssizeargfunc)index_get, /* sq_item */
1786 1786 0, /* sq_slice */
1787 1787 0, /* sq_ass_item */
1788 1788 0, /* sq_ass_slice */
1789 1789 (objobjproc)index_contains, /* sq_contains */
1790 1790 };
1791 1791
1792 1792 static PyMappingMethods index_mapping_methods = {
1793 1793 (lenfunc)index_length, /* mp_length */
1794 1794 (binaryfunc)index_getitem, /* mp_subscript */
1795 1795 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
1796 1796 };
1797 1797
1798 1798 static PyMethodDef index_methods[] = {
1799 1799 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
1800 1800 "return the gca set of the given revs"},
1801 1801 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
1802 1802 "clear the index caches"},
1803 1803 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1804 1804 "get an index entry"},
1805 1805 {"headrevs", (PyCFunction)index_headrevs, METH_NOARGS,
1806 1806 "get head revisions"},
1807 1807 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1808 1808 "insert an index entry"},
1809 1809 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
1810 1810 "match a potentially ambiguous node ID"},
1811 1811 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1812 1812 "stats for the index"},
1813 1813 {NULL} /* Sentinel */
1814 1814 };
1815 1815
1816 1816 static PyGetSetDef index_getset[] = {
1817 1817 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
1818 1818 {NULL} /* Sentinel */
1819 1819 };
1820 1820
1821 1821 static PyTypeObject indexType = {
1822 1822 PyObject_HEAD_INIT(NULL)
1823 1823 0, /* ob_size */
1824 1824 "parsers.index", /* tp_name */
1825 1825 sizeof(indexObject), /* tp_basicsize */
1826 1826 0, /* tp_itemsize */
1827 1827 (destructor)index_dealloc, /* tp_dealloc */
1828 1828 0, /* tp_print */
1829 1829 0, /* tp_getattr */
1830 1830 0, /* tp_setattr */
1831 1831 0, /* tp_compare */
1832 1832 0, /* tp_repr */
1833 1833 0, /* tp_as_number */
1834 1834 &index_sequence_methods, /* tp_as_sequence */
1835 1835 &index_mapping_methods, /* tp_as_mapping */
1836 1836 0, /* tp_hash */
1837 1837 0, /* tp_call */
1838 1838 0, /* tp_str */
1839 1839 0, /* tp_getattro */
1840 1840 0, /* tp_setattro */
1841 1841 0, /* tp_as_buffer */
1842 1842 Py_TPFLAGS_DEFAULT, /* tp_flags */
1843 1843 "revlog index", /* tp_doc */
1844 1844 0, /* tp_traverse */
1845 1845 0, /* tp_clear */
1846 1846 0, /* tp_richcompare */
1847 1847 0, /* tp_weaklistoffset */
1848 1848 0, /* tp_iter */
1849 1849 0, /* tp_iternext */
1850 1850 index_methods, /* tp_methods */
1851 1851 0, /* tp_members */
1852 1852 index_getset, /* tp_getset */
1853 1853 0, /* tp_base */
1854 1854 0, /* tp_dict */
1855 1855 0, /* tp_descr_get */
1856 1856 0, /* tp_descr_set */
1857 1857 0, /* tp_dictoffset */
1858 1858 (initproc)index_init, /* tp_init */
1859 1859 0, /* tp_alloc */
1860 1860 };
1861 1861
1862 1862 /*
1863 1863 * returns a tuple of the form (index, index, cache) with elements as
1864 1864 * follows:
1865 1865 *
1866 1866 * index: an index object that lazily parses RevlogNG records
1867 1867 * cache: if data is inlined, a tuple (index_file_content, 0), else None
1868 1868 *
1869 1869 * added complications are for backwards compatibility
1870 1870 */
1871 1871 static PyObject *parse_index2(PyObject *self, PyObject *args)
1872 1872 {
1873 1873 PyObject *tuple = NULL, *cache = NULL;
1874 1874 indexObject *idx;
1875 1875 int ret;
1876 1876
1877 1877 idx = PyObject_New(indexObject, &indexType);
1878 1878 if (idx == NULL)
1879 1879 goto bail;
1880 1880
1881 1881 ret = index_init(idx, args);
1882 1882 if (ret == -1)
1883 1883 goto bail;
1884 1884
1885 1885 if (idx->inlined) {
1886 1886 cache = Py_BuildValue("iO", 0, idx->data);
1887 1887 if (cache == NULL)
1888 1888 goto bail;
1889 1889 } else {
1890 1890 cache = Py_None;
1891 1891 Py_INCREF(cache);
1892 1892 }
1893 1893
1894 1894 tuple = Py_BuildValue("NN", idx, cache);
1895 1895 if (!tuple)
1896 1896 goto bail;
1897 1897 return tuple;
1898 1898
1899 1899 bail:
1900 1900 Py_XDECREF(idx);
1901 1901 Py_XDECREF(cache);
1902 1902 Py_XDECREF(tuple);
1903 1903 return NULL;
1904 1904 }
1905 1905
1906 1906 static char parsers_doc[] = "Efficient content parsing.";
1907 1907
1908 1908 PyObject *encodedir(PyObject *self, PyObject *args);
1909 1909 PyObject *pathencode(PyObject *self, PyObject *args);
1910 1910 PyObject *lowerencode(PyObject *self, PyObject *args);
1911 1911
1912 1912 static PyMethodDef methods[] = {
1913 1913 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
1914 1914 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
1915 1915 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1916 1916 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
1917 1917 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
1918 1918 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
1919 1919 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
1920 1920 {NULL, NULL}
1921 1921 };
1922 1922
1923 1923 void dirs_module_init(PyObject *mod);
1924 1924
1925 1925 static void module_init(PyObject *mod)
1926 1926 {
1927 1927 dirs_module_init(mod);
1928 1928
1929 1929 indexType.tp_new = PyType_GenericNew;
1930 1930 if (PyType_Ready(&indexType) < 0)
1931 1931 return;
1932 1932 Py_INCREF(&indexType);
1933 1933
1934 1934 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
1935 1935
1936 1936 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
1937 1937 -1, -1, -1, -1, nullid, 20);
1938 1938 if (nullentry)
1939 1939 PyObject_GC_UnTrack(nullentry);
1940 1940
1941 1941 dirstate_unset = Py_BuildValue("ciii", 'n', 0, -1, -1);
1942 1942 }
1943 1943
1944 static int check_python_version()
1945 {
1946 PyObject *sys = PyImport_ImportModule("sys");
1947 PyObject *hexversion = PyObject_GetAttrString(sys, "hexversion");
1948 long version = PyInt_AsLong(hexversion);
1949 /* sys.hexversion is a 32-bit number by default, so the -1 case
1950 * should only occur in unusual circumstances (e.g. if sys.hexversion
1951 * is manually set to an invalid value). */
1952 if ((version == -1) || (version >> 16 != PY_VERSION_HEX >> 16)) {
1953 PyErr_Format(PyExc_ImportError, "Python minor version mismatch: "
1954 "The Mercurial extension modules were compiled with Python "
1955 PY_VERSION ", but Mercurial is currently using Python with "
1956 "sys.hexversion=%ld: Python %s\n at: %s", version,
1957 Py_GetVersion(), Py_GetProgramFullPath());
1958 return -1;
1959 }
1960 return 0;
1961 }
1962
1963 1944 #ifdef IS_PY3K
1964 1945 static struct PyModuleDef parsers_module = {
1965 1946 PyModuleDef_HEAD_INIT,
1966 1947 "parsers",
1967 1948 parsers_doc,
1968 1949 -1,
1969 1950 methods
1970 1951 };
1971 1952
1972 1953 PyMODINIT_FUNC PyInit_parsers(void)
1973 1954 {
1974 if (check_python_version() == -1)
1975 return;
1976 1955 PyObject *mod = PyModule_Create(&parsers_module);
1977 1956 module_init(mod);
1978 1957 return mod;
1979 1958 }
1980 1959 #else
1981 1960 PyMODINIT_FUNC initparsers(void)
1982 1961 {
1983 if (check_python_version() == -1)
1984 return;
1985 1962 PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc);
1986 1963 module_init(mod);
1987 1964 }
1988 1965 #endif
@@ -1,191 +1,134 b''
1 1 from mercurial import parsers
2 2 from mercurial.node import nullid, nullrev
3 3 import struct
4 import subprocess
5 import sys
6 4
7 5 # This unit test compares the return value of the original Python
8 6 # implementation of parseindex and the new C implementation for
9 7 # an index file with and without inlined data
10 8
11 9 # original python implementation
12 10 def gettype(q):
13 11 return int(q & 0xFFFF)
14 12
15 13 def offset_type(offset, type):
16 14 return long(long(offset) << 16 | type)
17 15
18 16 indexformatng = ">Qiiiiii20s12x"
19 17
20 18 def py_parseindex(data, inline) :
21 19 s = 64
22 20 cache = None
23 21 index = []
24 22 nodemap = {nullid: nullrev}
25 23 n = off = 0
26 24
27 25 l = len(data) - s
28 26 append = index.append
29 27 if inline:
30 28 cache = (0, data)
31 29 while off <= l:
32 30 e = struct.unpack(indexformatng, data[off:off + s])
33 31 nodemap[e[7]] = n
34 32 append(e)
35 33 n += 1
36 34 if e[1] < 0:
37 35 break
38 36 off += e[1] + s
39 37 else:
40 38 while off <= l:
41 39 e = struct.unpack(indexformatng, data[off:off + s])
42 40 nodemap[e[7]] = n
43 41 append(e)
44 42 n += 1
45 43 off += s
46 44
47 45 e = list(index[0])
48 46 type = gettype(e[0])
49 47 e[0] = offset_type(0, type)
50 48 index[0] = tuple(e)
51 49
52 50 # add the magic null revision at -1
53 51 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
54 52
55 53 return index, cache
56 54
57 55 data_inlined = '\x00\x01\x00\x01\x00\x00\x00\x00\x00\x00\x01\x8c' \
58 56 '\x00\x00\x04\x07\x00\x00\x00\x00\x00\x00\x15\x15\xff\xff\xff' \
59 57 '\xff\xff\xff\xff\xff\xebG\x97\xb7\x1fB\x04\xcf\x13V\x81\tw\x1b' \
60 58 'w\xdduR\xda\xc6\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' \
61 59 'x\x9c\x9d\x93?O\xc30\x10\xc5\xf7|\x8a\xdb\x9a\xa8m\x06\xd8*\x95' \
62 60 '\x81B\xa1\xa2\xa2R\xcb\x86Pd\x9a\x0b5$vd_\x04\xfd\xf6\x9c\xff@' \
63 61 '\x11!\x0b\xd9\xec\xf7\xbbw\xe7gG6\xad6\x04\xdaN\xc0\x92\xa0$)' \
64 62 '\xb1\x82\xa2\xd1%\x16\xa4\x8b7\xa9\xca\xd4-\xb2Y\x02\xfc\xc9' \
65 63 '\xcaS\xf9\xaeX\xed\xb6\xd77Q\x02\x83\xd4\x19\xf5--Y\xea\xe1W' \
66 64 '\xab\xed\x10\xceR\x0f_\xdf\xdf\r\xe1,\xf5\xf0\xcb\xf5 \xceR\x0f' \
67 65 '_\xdc\x0e\x0e\xc3R\x0f_\xae\x96\x9b!\x9e\xa5\x1e\xbf\xdb,\x06' \
68 66 '\xc7q\x9a/\x88\x82\xc3B\xea\xb5\xb4TJ\x93\xb6\x82\x0e\xe16\xe6' \
69 67 'KQ\xdb\xaf\xecG\xa3\xd1 \x01\xd3\x0b_^\xe8\xaa\xa0\xae\xad\xd1' \
70 68 '&\xbef\x1bz\x08\xb0|\xc9Xz\x06\xf6Z\x91\x90J\xaa\x17\x90\xaa' \
71 69 '\xd2\xa6\x11$5C\xcf\xba#\xa0\x03\x02*2\x92-\xfc\xb1\x94\xdf\xe2' \
72 70 '\xae\xb8\'m\x8ey0^\x85\xd3\x82\xb4\xf0`:\x9c\x00\x8a\xfd\x01' \
73 71 '\xb0\xc6\x86\x8b\xdd\xae\x80\xf3\xa9\x9fd\x16\n\x00R%\x1a\x06' \
74 72 '\xe9\xd8b\x98\x1d\xf4\xf3+\x9bf\x01\xd8p\x1b\xf3.\xed\x9f^g\xc3' \
75 73 '^\xd9W81T\xdb\xd5\x04sx|\xf2\xeb\xd6`%?x\xed"\x831\xbf\xf3\xdc' \
76 74 'b\xeb%gaY\xe1\xad\x9f\xb9f\'1w\xa9\xa5a\x83s\x82J\xb98\xbc4\x8b' \
77 75 '\x83\x00\x9f$z\xb8#\xa5\xb1\xdf\x98\xd9\xec\x1b\x89O\xe3Ts\x9a4' \
78 76 '\x17m\x8b\xfc\x8f\xa5\x95\x9a\xfc\xfa\xed,\xe5|\xa1\xfe\x15\xb9' \
79 77 '\xbc\xb2\x93\x1f\xf2\x95\xff\xdf,\x1a\xc5\xe7\x17*\x93Oz:>\x0e'
80 78
81 79 data_non_inlined = '\x00\x00\x00\x01\x00\x00\x00\x00\x00\x01D\x19' \
82 80 '\x00\x07e\x12\x00\x00\x00\x00\x00\x00\x00\x00\xff\xff\xff\xff' \
83 81 '\xff\xff\xff\xff\xd1\xf4\xbb\xb0\xbe\xfc\x13\xbd\x8c\xd3\x9d' \
84 82 '\x0f\xcd\xd9;\x8c\x07\x8cJ/\x00\x00\x00\x00\x00\x00\x00\x00\x00' \
85 83 '\x00\x00\x00\x00\x00\x00\x01D\x19\x00\x00\x00\x00\x00\xdf\x00' \
86 84 '\x00\x01q\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x00\xff' \
87 85 '\xff\xff\xff\xc1\x12\xb9\x04\x96\xa4Z1t\x91\xdfsJ\x90\xf0\x9bh' \
88 86 '\x07l&\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' \
89 87 '\x00\x01D\xf8\x00\x00\x00\x00\x01\x1b\x00\x00\x01\xb8\x00\x00' \
90 88 '\x00\x01\x00\x00\x00\x02\x00\x00\x00\x01\xff\xff\xff\xff\x02\n' \
91 89 '\x0e\xc6&\xa1\x92\xae6\x0b\x02i\xfe-\xe5\xbao\x05\xd1\xe7\x00' \
92 90 '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01F' \
93 91 '\x13\x00\x00\x00\x00\x01\xec\x00\x00\x03\x06\x00\x00\x00\x01' \
94 92 '\x00\x00\x00\x03\x00\x00\x00\x02\xff\xff\xff\xff\x12\xcb\xeby1' \
95 93 '\xb6\r\x98B\xcb\x07\xbd`\x8f\x92\xd9\xc4\x84\xbdK\x00\x00\x00' \
96 94 '\x00\x00\x00\x00\x00\x00\x00\x00\x00'
97 95
98 96 def parse_index2(data, inline):
99 97 index, chunkcache = parsers.parse_index2(data, inline)
100 98 return list(index), chunkcache
101 99
102 def importparsers(hexversion):
103 """Import mercurial.parsers with the given sys.hexversion."""
104 # The file parsers.c inspects sys.hexversion to determine the version
105 # of the currently-running Python interpreter, so we monkey-patch
106 # sys.hexversion to simulate using different versions.
107 code = ("import sys; sys.hexversion=%s; "
108 "import mercurial.parsers" % hexversion)
109 cmd = "python -c \"%s\"" % code
110 # We need to do these tests inside a subprocess because parser.c's
111 # version-checking code happens inside the module init function, and
112 # when using reload() to reimport an extension module, "The init function
113 # of extension modules is not called a second time"
114 # (from http://docs.python.org/2/library/functions.html?#reload).
115 p = subprocess.Popen(cmd, shell=True,
116 stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
117 return p.communicate() # returns stdout, stderr
118
119 def printhexfail(testnumber, hexversion, msg):
120 try:
121 hexstring = hex(hexversion)
122 except TypeError:
123 hexstring = None
124 print ("%s) using Python %s and patched sys.hexversion %r (%r): %s" %
125 (testnumber, sys.version_info, hexversion, hexstring, msg))
126
127 def testversionokay(testnumber, hexversion):
128 stdout, stderr = importparsers(hexversion)
129 if stdout:
130 printhexfail(testnumber, hexversion,
131 "Expected no stdout but got: %r" % stdout)
132
133 def testversionfail(testnumber, hexversion):
134 stdout, stderr = importparsers(hexversion)
135 if not "ImportError: Python minor version mismatch" in stdout:
136 printhexfail(testnumber, hexversion,
137 "Expected stdout to contain %r but got: %r" %
138 (errstring, stdout))
139
140 def makehex(major, minor, micro):
141 return int("%x%02x%02x00" % (major, minor, micro), 16)
142
143 def runversiontests():
144 """Test importing parsers using different Python versions."""
145 info = sys.version_info
146 major, minor, micro = info[0], info[1], info[2]
147 # Test same major-minor versions.
148 testversionokay(1, makehex(major, minor, micro))
149 testversionokay(2, makehex(major, minor, micro + 1))
150 # Test different major-minor versions.
151 testversionfail(3, makehex(major + 1, minor, micro))
152 testversionfail(4, makehex(major, minor + 1, micro))
153 testversionfail(5, "'foo'")
154
155 100 def runtest() :
156 runversiontests()
157
158 101 # Check that parse_index2() raises TypeError on bad arguments.
159 102 try:
160 103 parse_index2(0, True)
161 104 except TypeError:
162 105 pass
163 106 else:
164 107 print "Expected to get TypeError."
165 108
166 109 py_res_1 = py_parseindex(data_inlined, True)
167 110 c_res_1 = parse_index2(data_inlined, True)
168 111
169 112 py_res_2 = py_parseindex(data_non_inlined, False)
170 113 c_res_2 = parse_index2(data_non_inlined, False)
171 114
172 115 if py_res_1 != c_res_1:
173 116 print "Parse index result (with inlined data) differs!"
174 117
175 118 if py_res_2 != c_res_2:
176 119 print "Parse index result (no inlined data) differs!"
177 120
178 121 ix = parsers.parse_index2(data_inlined, True)[0]
179 122 for i, r in enumerate(ix):
180 123 if r[7] == nullid:
181 124 i = -1
182 125 try:
183 126 if ix[r[7]] != i:
184 127 print 'Reverse lookup inconsistent for %r' % r[7].encode('hex')
185 128 except TypeError:
186 129 # pure version doesn't support this
187 130 break
188 131
189 132 print "done"
190 133
191 134 runtest()
General Comments 0
You need to be logged in to leave comments. Login now