##// END OF EJS Templates
parsers: fail fast if Python has wrong minor version (issue4110)...
Chris Jerdonek -
r20742:3681de20 default
parent child Browse files
Show More
@@ -1,1952 +1,1986 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #include <Python.h>
11 11 #include <ctype.h>
12 12 #include <stddef.h>
13 13 #include <string.h>
14 14
15 15 #include "util.h"
16 16
17 static char *versionerrortext = "Python minor version mismatch";
18
17 19 static int8_t hextable[256] = {
18 20 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
19 21 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
20 22 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
21 23 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, /* 0-9 */
22 24 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* A-F */
23 25 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
24 26 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* a-f */
25 27 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
26 28 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
27 29 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
28 30 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
29 31 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
30 32 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
31 33 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
32 34 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
33 35 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
34 36 };
35 37
36 38 static inline int hexdigit(const char *p, Py_ssize_t off)
37 39 {
38 40 int8_t val = hextable[(unsigned char)p[off]];
39 41
40 42 if (val >= 0) {
41 43 return val;
42 44 }
43 45
44 46 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
45 47 return 0;
46 48 }
47 49
48 50 /*
49 51 * Turn a hex-encoded string into binary.
50 52 */
51 53 static PyObject *unhexlify(const char *str, int len)
52 54 {
53 55 PyObject *ret;
54 56 char *d;
55 57 int i;
56 58
57 59 ret = PyBytes_FromStringAndSize(NULL, len / 2);
58 60
59 61 if (!ret)
60 62 return NULL;
61 63
62 64 d = PyBytes_AsString(ret);
63 65
64 66 for (i = 0; i < len;) {
65 67 int hi = hexdigit(str, i++);
66 68 int lo = hexdigit(str, i++);
67 69 *d++ = (hi << 4) | lo;
68 70 }
69 71
70 72 return ret;
71 73 }
72 74
73 75 /*
74 76 * This code assumes that a manifest is stitched together with newline
75 77 * ('\n') characters.
76 78 */
77 79 static PyObject *parse_manifest(PyObject *self, PyObject *args)
78 80 {
79 81 PyObject *mfdict, *fdict;
80 82 char *str, *start, *end;
81 83 int len;
82 84
83 85 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
84 86 &PyDict_Type, &mfdict,
85 87 &PyDict_Type, &fdict,
86 88 &str, &len))
87 89 goto quit;
88 90
89 91 start = str;
90 92 end = str + len;
91 93 while (start < end) {
92 94 PyObject *file = NULL, *node = NULL;
93 95 PyObject *flags = NULL;
94 96 char *zero = NULL, *newline = NULL;
95 97 ptrdiff_t nlen;
96 98
97 99 zero = memchr(start, '\0', end - start);
98 100 if (!zero) {
99 101 PyErr_SetString(PyExc_ValueError,
100 102 "manifest entry has no separator");
101 103 goto quit;
102 104 }
103 105
104 106 newline = memchr(zero + 1, '\n', end - (zero + 1));
105 107 if (!newline) {
106 108 PyErr_SetString(PyExc_ValueError,
107 109 "manifest contains trailing garbage");
108 110 goto quit;
109 111 }
110 112
111 113 file = PyBytes_FromStringAndSize(start, zero - start);
112 114
113 115 if (!file)
114 116 goto bail;
115 117
116 118 nlen = newline - zero - 1;
117 119
118 120 node = unhexlify(zero + 1, nlen > 40 ? 40 : (int)nlen);
119 121 if (!node)
120 122 goto bail;
121 123
122 124 if (nlen > 40) {
123 125 flags = PyBytes_FromStringAndSize(zero + 41,
124 126 nlen - 40);
125 127 if (!flags)
126 128 goto bail;
127 129
128 130 if (PyDict_SetItem(fdict, file, flags) == -1)
129 131 goto bail;
130 132 }
131 133
132 134 if (PyDict_SetItem(mfdict, file, node) == -1)
133 135 goto bail;
134 136
135 137 start = newline + 1;
136 138
137 139 Py_XDECREF(flags);
138 140 Py_XDECREF(node);
139 141 Py_XDECREF(file);
140 142 continue;
141 143 bail:
142 144 Py_XDECREF(flags);
143 145 Py_XDECREF(node);
144 146 Py_XDECREF(file);
145 147 goto quit;
146 148 }
147 149
148 150 Py_INCREF(Py_None);
149 151 return Py_None;
150 152 quit:
151 153 return NULL;
152 154 }
153 155
154 156 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
155 157 {
156 158 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
157 159 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
158 160 char state, *cur, *str, *cpos;
159 161 int mode, size, mtime;
160 162 unsigned int flen;
161 163 int len, pos = 40;
162 164
163 165 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
164 166 &PyDict_Type, &dmap,
165 167 &PyDict_Type, &cmap,
166 168 &str, &len))
167 169 goto quit;
168 170
169 171 /* read parents */
170 172 if (len < 40)
171 173 goto quit;
172 174
173 175 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
174 176 if (!parents)
175 177 goto quit;
176 178
177 179 /* read filenames */
178 180 while (pos >= 40 && pos < len) {
179 181 cur = str + pos;
180 182 /* unpack header */
181 183 state = *cur;
182 184 mode = getbe32(cur + 1);
183 185 size = getbe32(cur + 5);
184 186 mtime = getbe32(cur + 9);
185 187 flen = getbe32(cur + 13);
186 188 pos += 17;
187 189 cur += 17;
188 190 if (flen > len - pos) {
189 191 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
190 192 goto quit;
191 193 }
192 194
193 195 entry = Py_BuildValue("ciii", state, mode, size, mtime);
194 196 if (!entry)
195 197 goto quit;
196 198 PyObject_GC_UnTrack(entry); /* don't waste time with this */
197 199
198 200 cpos = memchr(cur, 0, flen);
199 201 if (cpos) {
200 202 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
201 203 cname = PyBytes_FromStringAndSize(cpos + 1,
202 204 flen - (cpos - cur) - 1);
203 205 if (!fname || !cname ||
204 206 PyDict_SetItem(cmap, fname, cname) == -1 ||
205 207 PyDict_SetItem(dmap, fname, entry) == -1)
206 208 goto quit;
207 209 Py_DECREF(cname);
208 210 } else {
209 211 fname = PyBytes_FromStringAndSize(cur, flen);
210 212 if (!fname ||
211 213 PyDict_SetItem(dmap, fname, entry) == -1)
212 214 goto quit;
213 215 }
214 216 Py_DECREF(fname);
215 217 Py_DECREF(entry);
216 218 fname = cname = entry = NULL;
217 219 pos += flen;
218 220 }
219 221
220 222 ret = parents;
221 223 Py_INCREF(ret);
222 224 quit:
223 225 Py_XDECREF(fname);
224 226 Py_XDECREF(cname);
225 227 Py_XDECREF(entry);
226 228 Py_XDECREF(parents);
227 229 return ret;
228 230 }
229 231
230 232 static inline int getintat(PyObject *tuple, int off, uint32_t *v)
231 233 {
232 234 PyObject *o = PyTuple_GET_ITEM(tuple, off);
233 235 long val;
234 236
235 237 if (PyInt_Check(o))
236 238 val = PyInt_AS_LONG(o);
237 239 else if (PyLong_Check(o)) {
238 240 val = PyLong_AsLong(o);
239 241 if (val == -1 && PyErr_Occurred())
240 242 return -1;
241 243 } else {
242 244 PyErr_SetString(PyExc_TypeError, "expected an int or long");
243 245 return -1;
244 246 }
245 247 if (LONG_MAX > INT_MAX && (val > INT_MAX || val < INT_MIN)) {
246 248 PyErr_SetString(PyExc_OverflowError,
247 249 "Python value to large to convert to uint32_t");
248 250 return -1;
249 251 }
250 252 *v = (uint32_t)val;
251 253 return 0;
252 254 }
253 255
254 256 static PyObject *dirstate_unset;
255 257
256 258 /*
257 259 * Efficiently pack a dirstate object into its on-disk format.
258 260 */
259 261 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
260 262 {
261 263 PyObject *packobj = NULL;
262 264 PyObject *map, *copymap, *pl;
263 265 Py_ssize_t nbytes, pos, l;
264 266 PyObject *k, *v, *pn;
265 267 char *p, *s;
266 268 double now;
267 269
268 270 if (!PyArg_ParseTuple(args, "O!O!Od:pack_dirstate",
269 271 &PyDict_Type, &map, &PyDict_Type, &copymap,
270 272 &pl, &now))
271 273 return NULL;
272 274
273 275 if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) {
274 276 PyErr_SetString(PyExc_TypeError, "expected 2-element sequence");
275 277 return NULL;
276 278 }
277 279
278 280 /* Figure out how much we need to allocate. */
279 281 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
280 282 PyObject *c;
281 283 if (!PyString_Check(k)) {
282 284 PyErr_SetString(PyExc_TypeError, "expected string key");
283 285 goto bail;
284 286 }
285 287 nbytes += PyString_GET_SIZE(k) + 17;
286 288 c = PyDict_GetItem(copymap, k);
287 289 if (c) {
288 290 if (!PyString_Check(c)) {
289 291 PyErr_SetString(PyExc_TypeError,
290 292 "expected string key");
291 293 goto bail;
292 294 }
293 295 nbytes += PyString_GET_SIZE(c) + 1;
294 296 }
295 297 }
296 298
297 299 packobj = PyString_FromStringAndSize(NULL, nbytes);
298 300 if (packobj == NULL)
299 301 goto bail;
300 302
301 303 p = PyString_AS_STRING(packobj);
302 304
303 305 pn = PySequence_ITEM(pl, 0);
304 306 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
305 307 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
306 308 goto bail;
307 309 }
308 310 memcpy(p, s, l);
309 311 p += 20;
310 312 pn = PySequence_ITEM(pl, 1);
311 313 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
312 314 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
313 315 goto bail;
314 316 }
315 317 memcpy(p, s, l);
316 318 p += 20;
317 319
318 320 for (pos = 0; PyDict_Next(map, &pos, &k, &v); ) {
319 321 uint32_t mode, size, mtime;
320 322 Py_ssize_t len, l;
321 323 PyObject *o;
322 324 char *s, *t;
323 325
324 326 if (!PyTuple_Check(v) || PyTuple_GET_SIZE(v) != 4) {
325 327 PyErr_SetString(PyExc_TypeError, "expected a 4-tuple");
326 328 goto bail;
327 329 }
328 330 o = PyTuple_GET_ITEM(v, 0);
329 331 if (PyString_AsStringAndSize(o, &s, &l) == -1 || l != 1) {
330 332 PyErr_SetString(PyExc_TypeError, "expected one byte");
331 333 goto bail;
332 334 }
333 335 *p++ = *s;
334 336 if (getintat(v, 1, &mode) == -1)
335 337 goto bail;
336 338 if (getintat(v, 2, &size) == -1)
337 339 goto bail;
338 340 if (getintat(v, 3, &mtime) == -1)
339 341 goto bail;
340 342 if (*s == 'n' && mtime == (uint32_t)now) {
341 343 /* See pure/parsers.py:pack_dirstate for why we do
342 344 * this. */
343 345 if (PyDict_SetItem(map, k, dirstate_unset) == -1)
344 346 goto bail;
345 347 mtime = -1;
346 348 }
347 349 putbe32(mode, p);
348 350 putbe32(size, p + 4);
349 351 putbe32(mtime, p + 8);
350 352 t = p + 12;
351 353 p += 16;
352 354 len = PyString_GET_SIZE(k);
353 355 memcpy(p, PyString_AS_STRING(k), len);
354 356 p += len;
355 357 o = PyDict_GetItem(copymap, k);
356 358 if (o) {
357 359 *p++ = '\0';
358 360 l = PyString_GET_SIZE(o);
359 361 memcpy(p, PyString_AS_STRING(o), l);
360 362 p += l;
361 363 len += l + 1;
362 364 }
363 365 putbe32((uint32_t)len, t);
364 366 }
365 367
366 368 pos = p - PyString_AS_STRING(packobj);
367 369 if (pos != nbytes) {
368 370 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
369 371 (long)pos, (long)nbytes);
370 372 goto bail;
371 373 }
372 374
373 375 return packobj;
374 376 bail:
375 377 Py_XDECREF(packobj);
376 378 return NULL;
377 379 }
378 380
379 381 /*
380 382 * A base-16 trie for fast node->rev mapping.
381 383 *
382 384 * Positive value is index of the next node in the trie
383 385 * Negative value is a leaf: -(rev + 1)
384 386 * Zero is empty
385 387 */
386 388 typedef struct {
387 389 int children[16];
388 390 } nodetree;
389 391
390 392 /*
391 393 * This class has two behaviours.
392 394 *
393 395 * When used in a list-like way (with integer keys), we decode an
394 396 * entry in a RevlogNG index file on demand. Our last entry is a
395 397 * sentinel, always a nullid. We have limited support for
396 398 * integer-keyed insert and delete, only at elements right before the
397 399 * sentinel.
398 400 *
399 401 * With string keys, we lazily perform a reverse mapping from node to
400 402 * rev, using a base-16 trie.
401 403 */
402 404 typedef struct {
403 405 PyObject_HEAD
404 406 /* Type-specific fields go here. */
405 407 PyObject *data; /* raw bytes of index */
406 408 PyObject **cache; /* cached tuples */
407 409 const char **offsets; /* populated on demand */
408 410 Py_ssize_t raw_length; /* original number of elements */
409 411 Py_ssize_t length; /* current number of elements */
410 412 PyObject *added; /* populated on demand */
411 413 PyObject *headrevs; /* cache, invalidated on changes */
412 414 nodetree *nt; /* base-16 trie */
413 415 int ntlength; /* # nodes in use */
414 416 int ntcapacity; /* # nodes allocated */
415 417 int ntdepth; /* maximum depth of tree */
416 418 int ntsplits; /* # splits performed */
417 419 int ntrev; /* last rev scanned */
418 420 int ntlookups; /* # lookups */
419 421 int ntmisses; /* # lookups that miss the cache */
420 422 int inlined;
421 423 } indexObject;
422 424
423 425 static Py_ssize_t index_length(const indexObject *self)
424 426 {
425 427 if (self->added == NULL)
426 428 return self->length;
427 429 return self->length + PyList_GET_SIZE(self->added);
428 430 }
429 431
430 432 static PyObject *nullentry;
431 433 static const char nullid[20];
432 434
433 435 static long inline_scan(indexObject *self, const char **offsets);
434 436
435 437 #if LONG_MAX == 0x7fffffffL
436 438 static char *tuple_format = "Kiiiiiis#";
437 439 #else
438 440 static char *tuple_format = "kiiiiiis#";
439 441 #endif
440 442
441 443 /* A RevlogNG v1 index entry is 64 bytes long. */
442 444 static const long v1_hdrsize = 64;
443 445
444 446 /*
445 447 * Return a pointer to the beginning of a RevlogNG record.
446 448 */
447 449 static const char *index_deref(indexObject *self, Py_ssize_t pos)
448 450 {
449 451 if (self->inlined && pos > 0) {
450 452 if (self->offsets == NULL) {
451 453 self->offsets = malloc(self->raw_length *
452 454 sizeof(*self->offsets));
453 455 if (self->offsets == NULL)
454 456 return (const char *)PyErr_NoMemory();
455 457 inline_scan(self, self->offsets);
456 458 }
457 459 return self->offsets[pos];
458 460 }
459 461
460 462 return PyString_AS_STRING(self->data) + pos * v1_hdrsize;
461 463 }
462 464
463 465 /*
464 466 * RevlogNG format (all in big endian, data may be inlined):
465 467 * 6 bytes: offset
466 468 * 2 bytes: flags
467 469 * 4 bytes: compressed length
468 470 * 4 bytes: uncompressed length
469 471 * 4 bytes: base revision
470 472 * 4 bytes: link revision
471 473 * 4 bytes: parent 1 revision
472 474 * 4 bytes: parent 2 revision
473 475 * 32 bytes: nodeid (only 20 bytes used)
474 476 */
475 477 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
476 478 {
477 479 uint64_t offset_flags;
478 480 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
479 481 const char *c_node_id;
480 482 const char *data;
481 483 Py_ssize_t length = index_length(self);
482 484 PyObject *entry;
483 485
484 486 if (pos < 0)
485 487 pos += length;
486 488
487 489 if (pos < 0 || pos >= length) {
488 490 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
489 491 return NULL;
490 492 }
491 493
492 494 if (pos == length - 1) {
493 495 Py_INCREF(nullentry);
494 496 return nullentry;
495 497 }
496 498
497 499 if (pos >= self->length - 1) {
498 500 PyObject *obj;
499 501 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
500 502 Py_INCREF(obj);
501 503 return obj;
502 504 }
503 505
504 506 if (self->cache) {
505 507 if (self->cache[pos]) {
506 508 Py_INCREF(self->cache[pos]);
507 509 return self->cache[pos];
508 510 }
509 511 } else {
510 512 self->cache = calloc(self->raw_length, sizeof(PyObject *));
511 513 if (self->cache == NULL)
512 514 return PyErr_NoMemory();
513 515 }
514 516
515 517 data = index_deref(self, pos);
516 518 if (data == NULL)
517 519 return NULL;
518 520
519 521 offset_flags = getbe32(data + 4);
520 522 if (pos == 0) /* mask out version number for the first entry */
521 523 offset_flags &= 0xFFFF;
522 524 else {
523 525 uint32_t offset_high = getbe32(data);
524 526 offset_flags |= ((uint64_t)offset_high) << 32;
525 527 }
526 528
527 529 comp_len = getbe32(data + 8);
528 530 uncomp_len = getbe32(data + 12);
529 531 base_rev = getbe32(data + 16);
530 532 link_rev = getbe32(data + 20);
531 533 parent_1 = getbe32(data + 24);
532 534 parent_2 = getbe32(data + 28);
533 535 c_node_id = data + 32;
534 536
535 537 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
536 538 uncomp_len, base_rev, link_rev,
537 539 parent_1, parent_2, c_node_id, 20);
538 540
539 541 if (entry) {
540 542 PyObject_GC_UnTrack(entry);
541 543 Py_INCREF(entry);
542 544 }
543 545
544 546 self->cache[pos] = entry;
545 547
546 548 return entry;
547 549 }
548 550
549 551 /*
550 552 * Return the 20-byte SHA of the node corresponding to the given rev.
551 553 */
552 554 static const char *index_node(indexObject *self, Py_ssize_t pos)
553 555 {
554 556 Py_ssize_t length = index_length(self);
555 557 const char *data;
556 558
557 559 if (pos == length - 1 || pos == INT_MAX)
558 560 return nullid;
559 561
560 562 if (pos >= length)
561 563 return NULL;
562 564
563 565 if (pos >= self->length - 1) {
564 566 PyObject *tuple, *str;
565 567 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
566 568 str = PyTuple_GetItem(tuple, 7);
567 569 return str ? PyString_AS_STRING(str) : NULL;
568 570 }
569 571
570 572 data = index_deref(self, pos);
571 573 return data ? data + 32 : NULL;
572 574 }
573 575
574 576 static int nt_insert(indexObject *self, const char *node, int rev);
575 577
576 578 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
577 579 {
578 580 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
579 581 return -1;
580 582 if (*nodelen == 20)
581 583 return 0;
582 584 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
583 585 return -1;
584 586 }
585 587
586 588 static PyObject *index_insert(indexObject *self, PyObject *args)
587 589 {
588 590 PyObject *obj;
589 591 char *node;
590 592 long offset;
591 593 Py_ssize_t len, nodelen;
592 594
593 595 if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
594 596 return NULL;
595 597
596 598 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
597 599 PyErr_SetString(PyExc_TypeError, "8-tuple required");
598 600 return NULL;
599 601 }
600 602
601 603 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
602 604 return NULL;
603 605
604 606 len = index_length(self);
605 607
606 608 if (offset < 0)
607 609 offset += len;
608 610
609 611 if (offset != len - 1) {
610 612 PyErr_SetString(PyExc_IndexError,
611 613 "insert only supported at index -1");
612 614 return NULL;
613 615 }
614 616
615 617 if (offset > INT_MAX) {
616 618 PyErr_SetString(PyExc_ValueError,
617 619 "currently only 2**31 revs supported");
618 620 return NULL;
619 621 }
620 622
621 623 if (self->added == NULL) {
622 624 self->added = PyList_New(0);
623 625 if (self->added == NULL)
624 626 return NULL;
625 627 }
626 628
627 629 if (PyList_Append(self->added, obj) == -1)
628 630 return NULL;
629 631
630 632 if (self->nt)
631 633 nt_insert(self, node, (int)offset);
632 634
633 635 Py_CLEAR(self->headrevs);
634 636 Py_RETURN_NONE;
635 637 }
636 638
637 639 static void _index_clearcaches(indexObject *self)
638 640 {
639 641 if (self->cache) {
640 642 Py_ssize_t i;
641 643
642 644 for (i = 0; i < self->raw_length; i++)
643 645 Py_CLEAR(self->cache[i]);
644 646 free(self->cache);
645 647 self->cache = NULL;
646 648 }
647 649 if (self->offsets) {
648 650 free(self->offsets);
649 651 self->offsets = NULL;
650 652 }
651 653 if (self->nt) {
652 654 free(self->nt);
653 655 self->nt = NULL;
654 656 }
655 657 Py_CLEAR(self->headrevs);
656 658 }
657 659
658 660 static PyObject *index_clearcaches(indexObject *self)
659 661 {
660 662 _index_clearcaches(self);
661 663 self->ntlength = self->ntcapacity = 0;
662 664 self->ntdepth = self->ntsplits = 0;
663 665 self->ntrev = -1;
664 666 self->ntlookups = self->ntmisses = 0;
665 667 Py_RETURN_NONE;
666 668 }
667 669
668 670 static PyObject *index_stats(indexObject *self)
669 671 {
670 672 PyObject *obj = PyDict_New();
671 673
672 674 if (obj == NULL)
673 675 return NULL;
674 676
675 677 #define istat(__n, __d) \
676 678 if (PyDict_SetItemString(obj, __d, PyInt_FromSsize_t(self->__n)) == -1) \
677 679 goto bail;
678 680
679 681 if (self->added) {
680 682 Py_ssize_t len = PyList_GET_SIZE(self->added);
681 683 if (PyDict_SetItemString(obj, "index entries added",
682 684 PyInt_FromSsize_t(len)) == -1)
683 685 goto bail;
684 686 }
685 687
686 688 if (self->raw_length != self->length - 1)
687 689 istat(raw_length, "revs on disk");
688 690 istat(length, "revs in memory");
689 691 istat(ntcapacity, "node trie capacity");
690 692 istat(ntdepth, "node trie depth");
691 693 istat(ntlength, "node trie count");
692 694 istat(ntlookups, "node trie lookups");
693 695 istat(ntmisses, "node trie misses");
694 696 istat(ntrev, "node trie last rev scanned");
695 697 istat(ntsplits, "node trie splits");
696 698
697 699 #undef istat
698 700
699 701 return obj;
700 702
701 703 bail:
702 704 Py_XDECREF(obj);
703 705 return NULL;
704 706 }
705 707
706 708 /*
707 709 * When we cache a list, we want to be sure the caller can't mutate
708 710 * the cached copy.
709 711 */
710 712 static PyObject *list_copy(PyObject *list)
711 713 {
712 714 Py_ssize_t len = PyList_GET_SIZE(list);
713 715 PyObject *newlist = PyList_New(len);
714 716 Py_ssize_t i;
715 717
716 718 if (newlist == NULL)
717 719 return NULL;
718 720
719 721 for (i = 0; i < len; i++) {
720 722 PyObject *obj = PyList_GET_ITEM(list, i);
721 723 Py_INCREF(obj);
722 724 PyList_SET_ITEM(newlist, i, obj);
723 725 }
724 726
725 727 return newlist;
726 728 }
727 729
728 730 static PyObject *index_headrevs(indexObject *self)
729 731 {
730 732 Py_ssize_t i, len, addlen;
731 733 char *nothead = NULL;
732 734 PyObject *heads;
733 735
734 736 if (self->headrevs)
735 737 return list_copy(self->headrevs);
736 738
737 739 len = index_length(self) - 1;
738 740 heads = PyList_New(0);
739 741 if (heads == NULL)
740 742 goto bail;
741 743 if (len == 0) {
742 744 PyObject *nullid = PyInt_FromLong(-1);
743 745 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
744 746 Py_XDECREF(nullid);
745 747 goto bail;
746 748 }
747 749 goto done;
748 750 }
749 751
750 752 nothead = calloc(len, 1);
751 753 if (nothead == NULL)
752 754 goto bail;
753 755
754 756 for (i = 0; i < self->raw_length; i++) {
755 757 const char *data = index_deref(self, i);
756 758 int parent_1 = getbe32(data + 24);
757 759 int parent_2 = getbe32(data + 28);
758 760 if (parent_1 >= 0)
759 761 nothead[parent_1] = 1;
760 762 if (parent_2 >= 0)
761 763 nothead[parent_2] = 1;
762 764 }
763 765
764 766 addlen = self->added ? PyList_GET_SIZE(self->added) : 0;
765 767
766 768 for (i = 0; i < addlen; i++) {
767 769 PyObject *rev = PyList_GET_ITEM(self->added, i);
768 770 PyObject *p1 = PyTuple_GET_ITEM(rev, 5);
769 771 PyObject *p2 = PyTuple_GET_ITEM(rev, 6);
770 772 long parent_1, parent_2;
771 773
772 774 if (!PyInt_Check(p1) || !PyInt_Check(p2)) {
773 775 PyErr_SetString(PyExc_TypeError,
774 776 "revlog parents are invalid");
775 777 goto bail;
776 778 }
777 779 parent_1 = PyInt_AS_LONG(p1);
778 780 parent_2 = PyInt_AS_LONG(p2);
779 781 if (parent_1 >= 0)
780 782 nothead[parent_1] = 1;
781 783 if (parent_2 >= 0)
782 784 nothead[parent_2] = 1;
783 785 }
784 786
785 787 for (i = 0; i < len; i++) {
786 788 PyObject *head;
787 789
788 790 if (nothead[i])
789 791 continue;
790 792 head = PyInt_FromLong(i);
791 793 if (head == NULL || PyList_Append(heads, head) == -1) {
792 794 Py_XDECREF(head);
793 795 goto bail;
794 796 }
795 797 }
796 798
797 799 done:
798 800 self->headrevs = heads;
799 801 free(nothead);
800 802 return list_copy(self->headrevs);
801 803 bail:
802 804 Py_XDECREF(heads);
803 805 free(nothead);
804 806 return NULL;
805 807 }
806 808
807 809 static inline int nt_level(const char *node, Py_ssize_t level)
808 810 {
809 811 int v = node[level>>1];
810 812 if (!(level & 1))
811 813 v >>= 4;
812 814 return v & 0xf;
813 815 }
814 816
815 817 /*
816 818 * Return values:
817 819 *
818 820 * -4: match is ambiguous (multiple candidates)
819 821 * -2: not found
820 822 * rest: valid rev
821 823 */
822 824 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
823 825 int hex)
824 826 {
825 827 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
826 828 int level, maxlevel, off;
827 829
828 830 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
829 831 return -1;
830 832
831 833 if (self->nt == NULL)
832 834 return -2;
833 835
834 836 if (hex)
835 837 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
836 838 else
837 839 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
838 840
839 841 for (level = off = 0; level < maxlevel; level++) {
840 842 int k = getnybble(node, level);
841 843 nodetree *n = &self->nt[off];
842 844 int v = n->children[k];
843 845
844 846 if (v < 0) {
845 847 const char *n;
846 848 Py_ssize_t i;
847 849
848 850 v = -v - 1;
849 851 n = index_node(self, v);
850 852 if (n == NULL)
851 853 return -2;
852 854 for (i = level; i < maxlevel; i++)
853 855 if (getnybble(node, i) != nt_level(n, i))
854 856 return -2;
855 857 return v;
856 858 }
857 859 if (v == 0)
858 860 return -2;
859 861 off = v;
860 862 }
861 863 /* multiple matches against an ambiguous prefix */
862 864 return -4;
863 865 }
864 866
865 867 static int nt_new(indexObject *self)
866 868 {
867 869 if (self->ntlength == self->ntcapacity) {
868 870 self->ntcapacity *= 2;
869 871 self->nt = realloc(self->nt,
870 872 self->ntcapacity * sizeof(nodetree));
871 873 if (self->nt == NULL) {
872 874 PyErr_SetString(PyExc_MemoryError, "out of memory");
873 875 return -1;
874 876 }
875 877 memset(&self->nt[self->ntlength], 0,
876 878 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
877 879 }
878 880 return self->ntlength++;
879 881 }
880 882
881 883 static int nt_insert(indexObject *self, const char *node, int rev)
882 884 {
883 885 int level = 0;
884 886 int off = 0;
885 887
886 888 while (level < 40) {
887 889 int k = nt_level(node, level);
888 890 nodetree *n;
889 891 int v;
890 892
891 893 n = &self->nt[off];
892 894 v = n->children[k];
893 895
894 896 if (v == 0) {
895 897 n->children[k] = -rev - 1;
896 898 return 0;
897 899 }
898 900 if (v < 0) {
899 901 const char *oldnode = index_node(self, -v - 1);
900 902 int noff;
901 903
902 904 if (!oldnode || !memcmp(oldnode, node, 20)) {
903 905 n->children[k] = -rev - 1;
904 906 return 0;
905 907 }
906 908 noff = nt_new(self);
907 909 if (noff == -1)
908 910 return -1;
909 911 /* self->nt may have been changed by realloc */
910 912 self->nt[off].children[k] = noff;
911 913 off = noff;
912 914 n = &self->nt[off];
913 915 n->children[nt_level(oldnode, ++level)] = v;
914 916 if (level > self->ntdepth)
915 917 self->ntdepth = level;
916 918 self->ntsplits += 1;
917 919 } else {
918 920 level += 1;
919 921 off = v;
920 922 }
921 923 }
922 924
923 925 return -1;
924 926 }
925 927
926 928 static int nt_init(indexObject *self)
927 929 {
928 930 if (self->nt == NULL) {
929 931 if (self->raw_length > INT_MAX) {
930 932 PyErr_SetString(PyExc_ValueError, "overflow in nt_init");
931 933 return -1;
932 934 }
933 935 self->ntcapacity = self->raw_length < 4
934 936 ? 4 : (int)self->raw_length / 2;
935 937
936 938 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
937 939 if (self->nt == NULL) {
938 940 PyErr_NoMemory();
939 941 return -1;
940 942 }
941 943 self->ntlength = 1;
942 944 self->ntrev = (int)index_length(self) - 1;
943 945 self->ntlookups = 1;
944 946 self->ntmisses = 0;
945 947 if (nt_insert(self, nullid, INT_MAX) == -1)
946 948 return -1;
947 949 }
948 950 return 0;
949 951 }
950 952
951 953 /*
952 954 * Return values:
953 955 *
954 956 * -3: error (exception set)
955 957 * -2: not found (no exception set)
956 958 * rest: valid rev
957 959 */
958 960 static int index_find_node(indexObject *self,
959 961 const char *node, Py_ssize_t nodelen)
960 962 {
961 963 int rev;
962 964
963 965 self->ntlookups++;
964 966 rev = nt_find(self, node, nodelen, 0);
965 967 if (rev >= -1)
966 968 return rev;
967 969
968 970 if (nt_init(self) == -1)
969 971 return -3;
970 972
971 973 /*
972 974 * For the first handful of lookups, we scan the entire index,
973 975 * and cache only the matching nodes. This optimizes for cases
974 976 * like "hg tip", where only a few nodes are accessed.
975 977 *
976 978 * After that, we cache every node we visit, using a single
977 979 * scan amortized over multiple lookups. This gives the best
978 980 * bulk performance, e.g. for "hg log".
979 981 */
980 982 if (self->ntmisses++ < 4) {
981 983 for (rev = self->ntrev - 1; rev >= 0; rev--) {
982 984 const char *n = index_node(self, rev);
983 985 if (n == NULL)
984 986 return -2;
985 987 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
986 988 if (nt_insert(self, n, rev) == -1)
987 989 return -3;
988 990 break;
989 991 }
990 992 }
991 993 } else {
992 994 for (rev = self->ntrev - 1; rev >= 0; rev--) {
993 995 const char *n = index_node(self, rev);
994 996 if (n == NULL) {
995 997 self->ntrev = rev + 1;
996 998 return -2;
997 999 }
998 1000 if (nt_insert(self, n, rev) == -1) {
999 1001 self->ntrev = rev + 1;
1000 1002 return -3;
1001 1003 }
1002 1004 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1003 1005 break;
1004 1006 }
1005 1007 }
1006 1008 self->ntrev = rev;
1007 1009 }
1008 1010
1009 1011 if (rev >= 0)
1010 1012 return rev;
1011 1013 return -2;
1012 1014 }
1013 1015
1014 1016 static PyObject *raise_revlog_error(void)
1015 1017 {
1016 1018 static PyObject *errclass;
1017 1019 PyObject *mod = NULL, *errobj;
1018 1020
1019 1021 if (errclass == NULL) {
1020 1022 PyObject *dict;
1021 1023
1022 1024 mod = PyImport_ImportModule("mercurial.error");
1023 1025 if (mod == NULL)
1024 1026 goto classfail;
1025 1027
1026 1028 dict = PyModule_GetDict(mod);
1027 1029 if (dict == NULL)
1028 1030 goto classfail;
1029 1031
1030 1032 errclass = PyDict_GetItemString(dict, "RevlogError");
1031 1033 if (errclass == NULL) {
1032 1034 PyErr_SetString(PyExc_SystemError,
1033 1035 "could not find RevlogError");
1034 1036 goto classfail;
1035 1037 }
1036 1038 Py_INCREF(errclass);
1037 1039 }
1038 1040
1039 1041 errobj = PyObject_CallFunction(errclass, NULL);
1040 1042 if (errobj == NULL)
1041 1043 return NULL;
1042 1044 PyErr_SetObject(errclass, errobj);
1043 1045 return errobj;
1044 1046
1045 1047 classfail:
1046 1048 Py_XDECREF(mod);
1047 1049 return NULL;
1048 1050 }
1049 1051
1050 1052 static PyObject *index_getitem(indexObject *self, PyObject *value)
1051 1053 {
1052 1054 char *node;
1053 1055 Py_ssize_t nodelen;
1054 1056 int rev;
1055 1057
1056 1058 if (PyInt_Check(value))
1057 1059 return index_get(self, PyInt_AS_LONG(value));
1058 1060
1059 1061 if (node_check(value, &node, &nodelen) == -1)
1060 1062 return NULL;
1061 1063 rev = index_find_node(self, node, nodelen);
1062 1064 if (rev >= -1)
1063 1065 return PyInt_FromLong(rev);
1064 1066 if (rev == -2)
1065 1067 raise_revlog_error();
1066 1068 return NULL;
1067 1069 }
1068 1070
1069 1071 static int nt_partialmatch(indexObject *self, const char *node,
1070 1072 Py_ssize_t nodelen)
1071 1073 {
1072 1074 int rev;
1073 1075
1074 1076 if (nt_init(self) == -1)
1075 1077 return -3;
1076 1078
1077 1079 if (self->ntrev > 0) {
1078 1080 /* ensure that the radix tree is fully populated */
1079 1081 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1080 1082 const char *n = index_node(self, rev);
1081 1083 if (n == NULL)
1082 1084 return -2;
1083 1085 if (nt_insert(self, n, rev) == -1)
1084 1086 return -3;
1085 1087 }
1086 1088 self->ntrev = rev;
1087 1089 }
1088 1090
1089 1091 return nt_find(self, node, nodelen, 1);
1090 1092 }
1091 1093
1092 1094 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1093 1095 {
1094 1096 const char *fullnode;
1095 1097 int nodelen;
1096 1098 char *node;
1097 1099 int rev, i;
1098 1100
1099 1101 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
1100 1102 return NULL;
1101 1103
1102 1104 if (nodelen < 4) {
1103 1105 PyErr_SetString(PyExc_ValueError, "key too short");
1104 1106 return NULL;
1105 1107 }
1106 1108
1107 1109 if (nodelen > 40) {
1108 1110 PyErr_SetString(PyExc_ValueError, "key too long");
1109 1111 return NULL;
1110 1112 }
1111 1113
1112 1114 for (i = 0; i < nodelen; i++)
1113 1115 hexdigit(node, i);
1114 1116 if (PyErr_Occurred()) {
1115 1117 /* input contains non-hex characters */
1116 1118 PyErr_Clear();
1117 1119 Py_RETURN_NONE;
1118 1120 }
1119 1121
1120 1122 rev = nt_partialmatch(self, node, nodelen);
1121 1123
1122 1124 switch (rev) {
1123 1125 case -4:
1124 1126 raise_revlog_error();
1125 1127 case -3:
1126 1128 return NULL;
1127 1129 case -2:
1128 1130 Py_RETURN_NONE;
1129 1131 case -1:
1130 1132 return PyString_FromStringAndSize(nullid, 20);
1131 1133 }
1132 1134
1133 1135 fullnode = index_node(self, rev);
1134 1136 if (fullnode == NULL) {
1135 1137 PyErr_Format(PyExc_IndexError,
1136 1138 "could not access rev %d", rev);
1137 1139 return NULL;
1138 1140 }
1139 1141 return PyString_FromStringAndSize(fullnode, 20);
1140 1142 }
1141 1143
1142 1144 static PyObject *index_m_get(indexObject *self, PyObject *args)
1143 1145 {
1144 1146 Py_ssize_t nodelen;
1145 1147 PyObject *val;
1146 1148 char *node;
1147 1149 int rev;
1148 1150
1149 1151 if (!PyArg_ParseTuple(args, "O", &val))
1150 1152 return NULL;
1151 1153 if (node_check(val, &node, &nodelen) == -1)
1152 1154 return NULL;
1153 1155 rev = index_find_node(self, node, nodelen);
1154 1156 if (rev == -3)
1155 1157 return NULL;
1156 1158 if (rev == -2)
1157 1159 Py_RETURN_NONE;
1158 1160 return PyInt_FromLong(rev);
1159 1161 }
1160 1162
1161 1163 static int index_contains(indexObject *self, PyObject *value)
1162 1164 {
1163 1165 char *node;
1164 1166 Py_ssize_t nodelen;
1165 1167
1166 1168 if (PyInt_Check(value)) {
1167 1169 long rev = PyInt_AS_LONG(value);
1168 1170 return rev >= -1 && rev < index_length(self);
1169 1171 }
1170 1172
1171 1173 if (node_check(value, &node, &nodelen) == -1)
1172 1174 return -1;
1173 1175
1174 1176 switch (index_find_node(self, node, nodelen)) {
1175 1177 case -3:
1176 1178 return -1;
1177 1179 case -2:
1178 1180 return 0;
1179 1181 default:
1180 1182 return 1;
1181 1183 }
1182 1184 }
1183 1185
1184 1186 static inline void index_get_parents(indexObject *self, int rev, int *ps)
1185 1187 {
1186 1188 if (rev >= self->length - 1) {
1187 1189 PyObject *tuple = PyList_GET_ITEM(self->added,
1188 1190 rev - self->length + 1);
1189 1191 ps[0] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 5));
1190 1192 ps[1] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 6));
1191 1193 } else {
1192 1194 const char *data = index_deref(self, rev);
1193 1195 ps[0] = getbe32(data + 24);
1194 1196 ps[1] = getbe32(data + 28);
1195 1197 }
1196 1198 }
1197 1199
1198 1200 typedef uint64_t bitmask;
1199 1201
1200 1202 /*
1201 1203 * Given a disjoint set of revs, return all candidates for the
1202 1204 * greatest common ancestor. In revset notation, this is the set
1203 1205 * "heads(::a and ::b and ...)"
1204 1206 */
1205 1207 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
1206 1208 int revcount)
1207 1209 {
1208 1210 const bitmask allseen = (1ull << revcount) - 1;
1209 1211 const bitmask poison = 1ull << revcount;
1210 1212 PyObject *gca = PyList_New(0);
1211 1213 int i, v, interesting;
1212 1214 int maxrev = -1;
1213 1215 long sp;
1214 1216 bitmask *seen;
1215 1217
1216 1218 if (gca == NULL)
1217 1219 return PyErr_NoMemory();
1218 1220
1219 1221 for (i = 0; i < revcount; i++) {
1220 1222 if (revs[i] > maxrev)
1221 1223 maxrev = revs[i];
1222 1224 }
1223 1225
1224 1226 seen = calloc(sizeof(*seen), maxrev + 1);
1225 1227 if (seen == NULL) {
1226 1228 Py_DECREF(gca);
1227 1229 return PyErr_NoMemory();
1228 1230 }
1229 1231
1230 1232 for (i = 0; i < revcount; i++)
1231 1233 seen[revs[i]] = 1ull << i;
1232 1234
1233 1235 interesting = revcount;
1234 1236
1235 1237 for (v = maxrev; v >= 0 && interesting; v--) {
1236 1238 long sv = seen[v];
1237 1239 int parents[2];
1238 1240
1239 1241 if (!sv)
1240 1242 continue;
1241 1243
1242 1244 if (sv < poison) {
1243 1245 interesting -= 1;
1244 1246 if (sv == allseen) {
1245 1247 PyObject *obj = PyInt_FromLong(v);
1246 1248 if (obj == NULL)
1247 1249 goto bail;
1248 1250 if (PyList_Append(gca, obj) == -1) {
1249 1251 Py_DECREF(obj);
1250 1252 goto bail;
1251 1253 }
1252 1254 sv |= poison;
1253 1255 for (i = 0; i < revcount; i++) {
1254 1256 if (revs[i] == v)
1255 1257 goto done;
1256 1258 }
1257 1259 }
1258 1260 }
1259 1261 index_get_parents(self, v, parents);
1260 1262
1261 1263 for (i = 0; i < 2; i++) {
1262 1264 int p = parents[i];
1263 1265 if (p == -1)
1264 1266 continue;
1265 1267 sp = seen[p];
1266 1268 if (sv < poison) {
1267 1269 if (sp == 0) {
1268 1270 seen[p] = sv;
1269 1271 interesting++;
1270 1272 }
1271 1273 else if (sp != sv)
1272 1274 seen[p] |= sv;
1273 1275 } else {
1274 1276 if (sp && sp < poison)
1275 1277 interesting--;
1276 1278 seen[p] = sv;
1277 1279 }
1278 1280 }
1279 1281 }
1280 1282
1281 1283 done:
1282 1284 free(seen);
1283 1285 return gca;
1284 1286 bail:
1285 1287 free(seen);
1286 1288 Py_XDECREF(gca);
1287 1289 return NULL;
1288 1290 }
1289 1291
1290 1292 /*
1291 1293 * Given a disjoint set of revs, return the subset with the longest
1292 1294 * path to the root.
1293 1295 */
1294 1296 static PyObject *find_deepest(indexObject *self, PyObject *revs)
1295 1297 {
1296 1298 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
1297 1299 static const Py_ssize_t capacity = 24;
1298 1300 int *depth, *interesting = NULL;
1299 1301 int i, j, v, ninteresting;
1300 1302 PyObject *dict = NULL, *keys;
1301 1303 long *seen = NULL;
1302 1304 int maxrev = -1;
1303 1305 long final;
1304 1306
1305 1307 if (revcount > capacity) {
1306 1308 PyErr_Format(PyExc_OverflowError,
1307 1309 "bitset size (%ld) > capacity (%ld)",
1308 1310 (long)revcount, (long)capacity);
1309 1311 return NULL;
1310 1312 }
1311 1313
1312 1314 for (i = 0; i < revcount; i++) {
1313 1315 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1314 1316 if (n > maxrev)
1315 1317 maxrev = n;
1316 1318 }
1317 1319
1318 1320 depth = calloc(sizeof(*depth), maxrev + 1);
1319 1321 if (depth == NULL)
1320 1322 return PyErr_NoMemory();
1321 1323
1322 1324 seen = calloc(sizeof(*seen), maxrev + 1);
1323 1325 if (seen == NULL) {
1324 1326 PyErr_NoMemory();
1325 1327 goto bail;
1326 1328 }
1327 1329
1328 1330 interesting = calloc(sizeof(*interesting), 2 << revcount);
1329 1331 if (interesting == NULL) {
1330 1332 PyErr_NoMemory();
1331 1333 goto bail;
1332 1334 }
1333 1335
1334 1336 if (PyList_Sort(revs) == -1)
1335 1337 goto bail;
1336 1338
1337 1339 for (i = 0; i < revcount; i++) {
1338 1340 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1339 1341 long b = 1l << i;
1340 1342 depth[n] = 1;
1341 1343 seen[n] = b;
1342 1344 interesting[b] = 1;
1343 1345 }
1344 1346
1345 1347 ninteresting = (int)revcount;
1346 1348
1347 1349 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
1348 1350 int dv = depth[v];
1349 1351 int parents[2];
1350 1352 long sv;
1351 1353
1352 1354 if (dv == 0)
1353 1355 continue;
1354 1356
1355 1357 sv = seen[v];
1356 1358 index_get_parents(self, v, parents);
1357 1359
1358 1360 for (i = 0; i < 2; i++) {
1359 1361 int p = parents[i];
1360 1362 long nsp, sp;
1361 1363 int dp;
1362 1364
1363 1365 if (p == -1)
1364 1366 continue;
1365 1367
1366 1368 dp = depth[p];
1367 1369 nsp = sp = seen[p];
1368 1370 if (dp <= dv) {
1369 1371 depth[p] = dv + 1;
1370 1372 if (sp != sv) {
1371 1373 interesting[sv] += 1;
1372 1374 nsp = seen[p] = sv;
1373 1375 if (sp) {
1374 1376 interesting[sp] -= 1;
1375 1377 if (interesting[sp] == 0)
1376 1378 ninteresting -= 1;
1377 1379 }
1378 1380 }
1379 1381 }
1380 1382 else if (dv == dp - 1) {
1381 1383 nsp = sp | sv;
1382 1384 if (nsp == sp)
1383 1385 continue;
1384 1386 seen[p] = nsp;
1385 1387 interesting[sp] -= 1;
1386 1388 if (interesting[sp] == 0 && interesting[nsp] > 0)
1387 1389 ninteresting -= 1;
1388 1390 interesting[nsp] += 1;
1389 1391 }
1390 1392 }
1391 1393 interesting[sv] -= 1;
1392 1394 if (interesting[sv] == 0)
1393 1395 ninteresting -= 1;
1394 1396 }
1395 1397
1396 1398 final = 0;
1397 1399 j = ninteresting;
1398 1400 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
1399 1401 if (interesting[i] == 0)
1400 1402 continue;
1401 1403 final |= i;
1402 1404 j -= 1;
1403 1405 }
1404 1406 if (final == 0)
1405 1407 return PyList_New(0);
1406 1408
1407 1409 dict = PyDict_New();
1408 1410 if (dict == NULL)
1409 1411 goto bail;
1410 1412
1411 1413 for (i = 0; i < revcount; i++) {
1412 1414 PyObject *key;
1413 1415
1414 1416 if ((final & (1 << i)) == 0)
1415 1417 continue;
1416 1418
1417 1419 key = PyList_GET_ITEM(revs, i);
1418 1420 Py_INCREF(key);
1419 1421 Py_INCREF(Py_None);
1420 1422 if (PyDict_SetItem(dict, key, Py_None) == -1) {
1421 1423 Py_DECREF(key);
1422 1424 Py_DECREF(Py_None);
1423 1425 goto bail;
1424 1426 }
1425 1427 }
1426 1428
1427 1429 keys = PyDict_Keys(dict);
1428 1430
1429 1431 free(depth);
1430 1432 free(seen);
1431 1433 free(interesting);
1432 1434 Py_DECREF(dict);
1433 1435
1434 1436 return keys;
1435 1437 bail:
1436 1438 free(depth);
1437 1439 free(seen);
1438 1440 free(interesting);
1439 1441 Py_XDECREF(dict);
1440 1442
1441 1443 return NULL;
1442 1444 }
1443 1445
1444 1446 /*
1445 1447 * Given a (possibly overlapping) set of revs, return the greatest
1446 1448 * common ancestors: those with the longest path to the root.
1447 1449 */
1448 1450 static PyObject *index_ancestors(indexObject *self, PyObject *args)
1449 1451 {
1450 1452 PyObject *ret = NULL, *gca = NULL;
1451 1453 Py_ssize_t argcount, i, len;
1452 1454 bitmask repeat = 0;
1453 1455 int revcount = 0;
1454 1456 int *revs;
1455 1457
1456 1458 argcount = PySequence_Length(args);
1457 1459 revs = malloc(argcount * sizeof(*revs));
1458 1460 if (argcount > 0 && revs == NULL)
1459 1461 return PyErr_NoMemory();
1460 1462 len = index_length(self) - 1;
1461 1463
1462 1464 for (i = 0; i < argcount; i++) {
1463 1465 static const int capacity = 24;
1464 1466 PyObject *obj = PySequence_GetItem(args, i);
1465 1467 bitmask x;
1466 1468 long val;
1467 1469
1468 1470 if (!PyInt_Check(obj)) {
1469 1471 PyErr_SetString(PyExc_TypeError,
1470 1472 "arguments must all be ints");
1471 1473 goto bail;
1472 1474 }
1473 1475 val = PyInt_AsLong(obj);
1474 1476 if (val == -1) {
1475 1477 ret = PyList_New(0);
1476 1478 goto done;
1477 1479 }
1478 1480 if (val < 0 || val >= len) {
1479 1481 PyErr_SetString(PyExc_IndexError,
1480 1482 "index out of range");
1481 1483 goto bail;
1482 1484 }
1483 1485 /* this cheesy bloom filter lets us avoid some more
1484 1486 * expensive duplicate checks in the common set-is-disjoint
1485 1487 * case */
1486 1488 x = 1ull << (val & 0x3f);
1487 1489 if (repeat & x) {
1488 1490 int k;
1489 1491 for (k = 0; k < revcount; k++) {
1490 1492 if (val == revs[k])
1491 1493 goto duplicate;
1492 1494 }
1493 1495 }
1494 1496 else repeat |= x;
1495 1497 if (revcount >= capacity) {
1496 1498 PyErr_Format(PyExc_OverflowError,
1497 1499 "bitset size (%d) > capacity (%d)",
1498 1500 revcount, capacity);
1499 1501 goto bail;
1500 1502 }
1501 1503 revs[revcount++] = (int)val;
1502 1504 duplicate:;
1503 1505 }
1504 1506
1505 1507 if (revcount == 0) {
1506 1508 ret = PyList_New(0);
1507 1509 goto done;
1508 1510 }
1509 1511 if (revcount == 1) {
1510 1512 PyObject *obj;
1511 1513 ret = PyList_New(1);
1512 1514 if (ret == NULL)
1513 1515 goto bail;
1514 1516 obj = PyInt_FromLong(revs[0]);
1515 1517 if (obj == NULL)
1516 1518 goto bail;
1517 1519 PyList_SET_ITEM(ret, 0, obj);
1518 1520 goto done;
1519 1521 }
1520 1522
1521 1523 gca = find_gca_candidates(self, revs, revcount);
1522 1524 if (gca == NULL)
1523 1525 goto bail;
1524 1526
1525 1527 if (PyList_GET_SIZE(gca) <= 1) {
1526 1528 ret = gca;
1527 1529 Py_INCREF(gca);
1528 1530 }
1529 1531 else ret = find_deepest(self, gca);
1530 1532
1531 1533 done:
1532 1534 free(revs);
1533 1535 Py_XDECREF(gca);
1534 1536
1535 1537 return ret;
1536 1538
1537 1539 bail:
1538 1540 free(revs);
1539 1541 Py_XDECREF(gca);
1540 1542 Py_XDECREF(ret);
1541 1543 return NULL;
1542 1544 }
1543 1545
1544 1546 /*
1545 1547 * Invalidate any trie entries introduced by added revs.
1546 1548 */
1547 1549 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
1548 1550 {
1549 1551 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
1550 1552
1551 1553 for (i = start; i < len; i++) {
1552 1554 PyObject *tuple = PyList_GET_ITEM(self->added, i);
1553 1555 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
1554 1556
1555 1557 nt_insert(self, PyString_AS_STRING(node), -1);
1556 1558 }
1557 1559
1558 1560 if (start == 0)
1559 1561 Py_CLEAR(self->added);
1560 1562 }
1561 1563
1562 1564 /*
1563 1565 * Delete a numeric range of revs, which must be at the end of the
1564 1566 * range, but exclude the sentinel nullid entry.
1565 1567 */
1566 1568 static int index_slice_del(indexObject *self, PyObject *item)
1567 1569 {
1568 1570 Py_ssize_t start, stop, step, slicelength;
1569 1571 Py_ssize_t length = index_length(self);
1570 1572 int ret = 0;
1571 1573
1572 1574 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
1573 1575 &start, &stop, &step, &slicelength) < 0)
1574 1576 return -1;
1575 1577
1576 1578 if (slicelength <= 0)
1577 1579 return 0;
1578 1580
1579 1581 if ((step < 0 && start < stop) || (step > 0 && start > stop))
1580 1582 stop = start;
1581 1583
1582 1584 if (step < 0) {
1583 1585 stop = start + 1;
1584 1586 start = stop + step*(slicelength - 1) - 1;
1585 1587 step = -step;
1586 1588 }
1587 1589
1588 1590 if (step != 1) {
1589 1591 PyErr_SetString(PyExc_ValueError,
1590 1592 "revlog index delete requires step size of 1");
1591 1593 return -1;
1592 1594 }
1593 1595
1594 1596 if (stop != length - 1) {
1595 1597 PyErr_SetString(PyExc_IndexError,
1596 1598 "revlog index deletion indices are invalid");
1597 1599 return -1;
1598 1600 }
1599 1601
1600 1602 if (start < self->length - 1) {
1601 1603 if (self->nt) {
1602 1604 Py_ssize_t i;
1603 1605
1604 1606 for (i = start + 1; i < self->length - 1; i++) {
1605 1607 const char *node = index_node(self, i);
1606 1608
1607 1609 if (node)
1608 1610 nt_insert(self, node, -1);
1609 1611 }
1610 1612 if (self->added)
1611 1613 nt_invalidate_added(self, 0);
1612 1614 if (self->ntrev > start)
1613 1615 self->ntrev = (int)start;
1614 1616 }
1615 1617 self->length = start + 1;
1616 1618 if (start < self->raw_length) {
1617 1619 if (self->cache) {
1618 1620 Py_ssize_t i;
1619 1621 for (i = start; i < self->raw_length; i++)
1620 1622 Py_CLEAR(self->cache[i]);
1621 1623 }
1622 1624 self->raw_length = start;
1623 1625 }
1624 1626 goto done;
1625 1627 }
1626 1628
1627 1629 if (self->nt) {
1628 1630 nt_invalidate_added(self, start - self->length + 1);
1629 1631 if (self->ntrev > start)
1630 1632 self->ntrev = (int)start;
1631 1633 }
1632 1634 if (self->added)
1633 1635 ret = PyList_SetSlice(self->added, start - self->length + 1,
1634 1636 PyList_GET_SIZE(self->added), NULL);
1635 1637 done:
1636 1638 Py_CLEAR(self->headrevs);
1637 1639 return ret;
1638 1640 }
1639 1641
1640 1642 /*
1641 1643 * Supported ops:
1642 1644 *
1643 1645 * slice deletion
1644 1646 * string assignment (extend node->rev mapping)
1645 1647 * string deletion (shrink node->rev mapping)
1646 1648 */
1647 1649 static int index_assign_subscript(indexObject *self, PyObject *item,
1648 1650 PyObject *value)
1649 1651 {
1650 1652 char *node;
1651 1653 Py_ssize_t nodelen;
1652 1654 long rev;
1653 1655
1654 1656 if (PySlice_Check(item) && value == NULL)
1655 1657 return index_slice_del(self, item);
1656 1658
1657 1659 if (node_check(item, &node, &nodelen) == -1)
1658 1660 return -1;
1659 1661
1660 1662 if (value == NULL)
1661 1663 return self->nt ? nt_insert(self, node, -1) : 0;
1662 1664 rev = PyInt_AsLong(value);
1663 1665 if (rev > INT_MAX || rev < 0) {
1664 1666 if (!PyErr_Occurred())
1665 1667 PyErr_SetString(PyExc_ValueError, "rev out of range");
1666 1668 return -1;
1667 1669 }
1668 1670 return nt_insert(self, node, (int)rev);
1669 1671 }
1670 1672
1671 1673 /*
1672 1674 * Find all RevlogNG entries in an index that has inline data. Update
1673 1675 * the optional "offsets" table with those entries.
1674 1676 */
1675 1677 static long inline_scan(indexObject *self, const char **offsets)
1676 1678 {
1677 1679 const char *data = PyString_AS_STRING(self->data);
1678 1680 Py_ssize_t pos = 0;
1679 1681 Py_ssize_t end = PyString_GET_SIZE(self->data);
1680 1682 long incr = v1_hdrsize;
1681 1683 Py_ssize_t len = 0;
1682 1684
1683 1685 while (pos + v1_hdrsize <= end && pos >= 0) {
1684 1686 uint32_t comp_len;
1685 1687 /* 3rd element of header is length of compressed inline data */
1686 1688 comp_len = getbe32(data + pos + 8);
1687 1689 incr = v1_hdrsize + comp_len;
1688 1690 if (offsets)
1689 1691 offsets[len] = data + pos;
1690 1692 len++;
1691 1693 pos += incr;
1692 1694 }
1693 1695
1694 1696 if (pos != end) {
1695 1697 if (!PyErr_Occurred())
1696 1698 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1697 1699 return -1;
1698 1700 }
1699 1701
1700 1702 return len;
1701 1703 }
1702 1704
1703 1705 static int index_init(indexObject *self, PyObject *args)
1704 1706 {
1705 1707 PyObject *data_obj, *inlined_obj;
1706 1708 Py_ssize_t size;
1707 1709
1708 1710 /* Initialize before argument-checking to avoid index_dealloc() crash. */
1709 1711 self->raw_length = 0;
1710 1712 self->added = NULL;
1711 1713 self->cache = NULL;
1712 1714 self->data = NULL;
1713 1715 self->headrevs = NULL;
1714 1716 self->nt = NULL;
1715 1717 self->offsets = NULL;
1716 1718
1717 1719 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
1718 1720 return -1;
1719 1721 if (!PyString_Check(data_obj)) {
1720 1722 PyErr_SetString(PyExc_TypeError, "data is not a string");
1721 1723 return -1;
1722 1724 }
1723 1725 size = PyString_GET_SIZE(data_obj);
1724 1726
1725 1727 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
1726 1728 self->data = data_obj;
1727 1729
1728 1730 self->ntlength = self->ntcapacity = 0;
1729 1731 self->ntdepth = self->ntsplits = 0;
1730 1732 self->ntlookups = self->ntmisses = 0;
1731 1733 self->ntrev = -1;
1732 1734 Py_INCREF(self->data);
1733 1735
1734 1736 if (self->inlined) {
1735 1737 long len = inline_scan(self, NULL);
1736 1738 if (len == -1)
1737 1739 goto bail;
1738 1740 self->raw_length = len;
1739 1741 self->length = len + 1;
1740 1742 } else {
1741 1743 if (size % v1_hdrsize) {
1742 1744 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1743 1745 goto bail;
1744 1746 }
1745 1747 self->raw_length = size / v1_hdrsize;
1746 1748 self->length = self->raw_length + 1;
1747 1749 }
1748 1750
1749 1751 return 0;
1750 1752 bail:
1751 1753 return -1;
1752 1754 }
1753 1755
1754 1756 static PyObject *index_nodemap(indexObject *self)
1755 1757 {
1756 1758 Py_INCREF(self);
1757 1759 return (PyObject *)self;
1758 1760 }
1759 1761
1760 1762 static void index_dealloc(indexObject *self)
1761 1763 {
1762 1764 _index_clearcaches(self);
1763 1765 Py_XDECREF(self->data);
1764 1766 Py_XDECREF(self->added);
1765 1767 PyObject_Del(self);
1766 1768 }
1767 1769
1768 1770 static PySequenceMethods index_sequence_methods = {
1769 1771 (lenfunc)index_length, /* sq_length */
1770 1772 0, /* sq_concat */
1771 1773 0, /* sq_repeat */
1772 1774 (ssizeargfunc)index_get, /* sq_item */
1773 1775 0, /* sq_slice */
1774 1776 0, /* sq_ass_item */
1775 1777 0, /* sq_ass_slice */
1776 1778 (objobjproc)index_contains, /* sq_contains */
1777 1779 };
1778 1780
1779 1781 static PyMappingMethods index_mapping_methods = {
1780 1782 (lenfunc)index_length, /* mp_length */
1781 1783 (binaryfunc)index_getitem, /* mp_subscript */
1782 1784 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
1783 1785 };
1784 1786
1785 1787 static PyMethodDef index_methods[] = {
1786 1788 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
1787 1789 "return the gca set of the given revs"},
1788 1790 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
1789 1791 "clear the index caches"},
1790 1792 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1791 1793 "get an index entry"},
1792 1794 {"headrevs", (PyCFunction)index_headrevs, METH_NOARGS,
1793 1795 "get head revisions"},
1794 1796 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1795 1797 "insert an index entry"},
1796 1798 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
1797 1799 "match a potentially ambiguous node ID"},
1798 1800 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1799 1801 "stats for the index"},
1800 1802 {NULL} /* Sentinel */
1801 1803 };
1802 1804
1803 1805 static PyGetSetDef index_getset[] = {
1804 1806 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
1805 1807 {NULL} /* Sentinel */
1806 1808 };
1807 1809
1808 1810 static PyTypeObject indexType = {
1809 1811 PyObject_HEAD_INIT(NULL)
1810 1812 0, /* ob_size */
1811 1813 "parsers.index", /* tp_name */
1812 1814 sizeof(indexObject), /* tp_basicsize */
1813 1815 0, /* tp_itemsize */
1814 1816 (destructor)index_dealloc, /* tp_dealloc */
1815 1817 0, /* tp_print */
1816 1818 0, /* tp_getattr */
1817 1819 0, /* tp_setattr */
1818 1820 0, /* tp_compare */
1819 1821 0, /* tp_repr */
1820 1822 0, /* tp_as_number */
1821 1823 &index_sequence_methods, /* tp_as_sequence */
1822 1824 &index_mapping_methods, /* tp_as_mapping */
1823 1825 0, /* tp_hash */
1824 1826 0, /* tp_call */
1825 1827 0, /* tp_str */
1826 1828 0, /* tp_getattro */
1827 1829 0, /* tp_setattro */
1828 1830 0, /* tp_as_buffer */
1829 1831 Py_TPFLAGS_DEFAULT, /* tp_flags */
1830 1832 "revlog index", /* tp_doc */
1831 1833 0, /* tp_traverse */
1832 1834 0, /* tp_clear */
1833 1835 0, /* tp_richcompare */
1834 1836 0, /* tp_weaklistoffset */
1835 1837 0, /* tp_iter */
1836 1838 0, /* tp_iternext */
1837 1839 index_methods, /* tp_methods */
1838 1840 0, /* tp_members */
1839 1841 index_getset, /* tp_getset */
1840 1842 0, /* tp_base */
1841 1843 0, /* tp_dict */
1842 1844 0, /* tp_descr_get */
1843 1845 0, /* tp_descr_set */
1844 1846 0, /* tp_dictoffset */
1845 1847 (initproc)index_init, /* tp_init */
1846 1848 0, /* tp_alloc */
1847 1849 };
1848 1850
1849 1851 /*
1850 1852 * returns a tuple of the form (index, index, cache) with elements as
1851 1853 * follows:
1852 1854 *
1853 1855 * index: an index object that lazily parses RevlogNG records
1854 1856 * cache: if data is inlined, a tuple (index_file_content, 0), else None
1855 1857 *
1856 1858 * added complications are for backwards compatibility
1857 1859 */
1858 1860 static PyObject *parse_index2(PyObject *self, PyObject *args)
1859 1861 {
1860 1862 PyObject *tuple = NULL, *cache = NULL;
1861 1863 indexObject *idx;
1862 1864 int ret;
1863 1865
1864 1866 idx = PyObject_New(indexObject, &indexType);
1865 1867 if (idx == NULL)
1866 1868 goto bail;
1867 1869
1868 1870 ret = index_init(idx, args);
1869 1871 if (ret == -1)
1870 1872 goto bail;
1871 1873
1872 1874 if (idx->inlined) {
1873 1875 cache = Py_BuildValue("iO", 0, idx->data);
1874 1876 if (cache == NULL)
1875 1877 goto bail;
1876 1878 } else {
1877 1879 cache = Py_None;
1878 1880 Py_INCREF(cache);
1879 1881 }
1880 1882
1881 1883 tuple = Py_BuildValue("NN", idx, cache);
1882 1884 if (!tuple)
1883 1885 goto bail;
1884 1886 return tuple;
1885 1887
1886 1888 bail:
1887 1889 Py_XDECREF(idx);
1888 1890 Py_XDECREF(cache);
1889 1891 Py_XDECREF(tuple);
1890 1892 return NULL;
1891 1893 }
1892 1894
1893 1895 static char parsers_doc[] = "Efficient content parsing.";
1894 1896
1895 1897 PyObject *encodedir(PyObject *self, PyObject *args);
1896 1898 PyObject *pathencode(PyObject *self, PyObject *args);
1897 1899 PyObject *lowerencode(PyObject *self, PyObject *args);
1898 1900
1899 1901 static PyMethodDef methods[] = {
1900 1902 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
1901 1903 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
1902 1904 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1903 1905 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
1904 1906 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
1905 1907 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
1906 1908 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
1907 1909 {NULL, NULL}
1908 1910 };
1909 1911
1910 1912 void dirs_module_init(PyObject *mod);
1911 1913
1912 1914 static void module_init(PyObject *mod)
1913 1915 {
1916 /* This module constant has two purposes. First, it lets us unit test
1917 * the ImportError raised without hard-coding any error text. This
1918 * means we can change the text in the future without breaking tests,
1919 * even across changesets without a recompile. Second, its presence
1920 * can be used to determine whether the version-checking logic is
1921 * present, which also helps in testing across changesets without a
1922 * recompile. Note that this means the pure-Python version of parsers
1923 * should not have this module constant. */
1924 PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
1925
1914 1926 dirs_module_init(mod);
1915 1927
1916 1928 indexType.tp_new = PyType_GenericNew;
1917 1929 if (PyType_Ready(&indexType) < 0)
1918 1930 return;
1919 1931 Py_INCREF(&indexType);
1920 1932
1921 1933 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
1922 1934
1923 1935 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
1924 1936 -1, -1, -1, -1, nullid, 20);
1925 1937 if (nullentry)
1926 1938 PyObject_GC_UnTrack(nullentry);
1927 1939
1928 1940 dirstate_unset = Py_BuildValue("ciii", 'n', 0, -1, -1);
1929 1941 }
1930 1942
1943 static int check_python_version(void)
1944 {
1945 PyObject *sys = PyImport_ImportModule("sys");
1946 long hexversion = PyInt_AsLong(PyObject_GetAttrString(sys, "hexversion"));
1947 /* sys.hexversion is a 32-bit number by default, so the -1 case
1948 * should only occur in unusual circumstances (e.g. if sys.hexversion
1949 * is manually set to an invalid value). */
1950 if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
1951 PyErr_Format(PyExc_ImportError, "%s: The Mercurial extension "
1952 "modules were compiled with Python " PY_VERSION ", but "
1953 "Mercurial is currently using Python with sys.hexversion=%ld: "
1954 "Python %s\n at: %s", versionerrortext, hexversion,
1955 Py_GetVersion(), Py_GetProgramFullPath());
1956 return -1;
1957 }
1958 return 0;
1959 }
1960
1931 1961 #ifdef IS_PY3K
1932 1962 static struct PyModuleDef parsers_module = {
1933 1963 PyModuleDef_HEAD_INIT,
1934 1964 "parsers",
1935 1965 parsers_doc,
1936 1966 -1,
1937 1967 methods
1938 1968 };
1939 1969
1940 1970 PyMODINIT_FUNC PyInit_parsers(void)
1941 1971 {
1972 if (check_python_version() == -1)
1973 return;
1942 1974 PyObject *mod = PyModule_Create(&parsers_module);
1943 1975 module_init(mod);
1944 1976 return mod;
1945 1977 }
1946 1978 #else
1947 1979 PyMODINIT_FUNC initparsers(void)
1948 1980 {
1981 if (check_python_version() == -1)
1982 return;
1949 1983 PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc);
1950 1984 module_init(mod);
1951 1985 }
1952 1986 #endif
@@ -1,135 +1,203 b''
1 """This unit test tests parsers.parse_index2()."""
1 """This unit test primarily tests parsers.parse_index2().
2
3 It also checks certain aspects of the parsers module as a whole.
4 """
2 5
3 6 from mercurial import parsers
4 7 from mercurial.node import nullid, nullrev
5 8 import struct
9 import subprocess
10 import sys
6 11
7 12 # original python implementation
8 13 def gettype(q):
9 14 return int(q & 0xFFFF)
10 15
11 16 def offset_type(offset, type):
12 17 return long(long(offset) << 16 | type)
13 18
14 19 indexformatng = ">Qiiiiii20s12x"
15 20
16 21 def py_parseindex(data, inline) :
17 22 s = 64
18 23 cache = None
19 24 index = []
20 25 nodemap = {nullid: nullrev}
21 26 n = off = 0
22 27
23 28 l = len(data) - s
24 29 append = index.append
25 30 if inline:
26 31 cache = (0, data)
27 32 while off <= l:
28 33 e = struct.unpack(indexformatng, data[off:off + s])
29 34 nodemap[e[7]] = n
30 35 append(e)
31 36 n += 1
32 37 if e[1] < 0:
33 38 break
34 39 off += e[1] + s
35 40 else:
36 41 while off <= l:
37 42 e = struct.unpack(indexformatng, data[off:off + s])
38 43 nodemap[e[7]] = n
39 44 append(e)
40 45 n += 1
41 46 off += s
42 47
43 48 e = list(index[0])
44 49 type = gettype(e[0])
45 50 e[0] = offset_type(0, type)
46 51 index[0] = tuple(e)
47 52
48 53 # add the magic null revision at -1
49 54 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
50 55
51 56 return index, cache
52 57
53 58 data_inlined = '\x00\x01\x00\x01\x00\x00\x00\x00\x00\x00\x01\x8c' \
54 59 '\x00\x00\x04\x07\x00\x00\x00\x00\x00\x00\x15\x15\xff\xff\xff' \
55 60 '\xff\xff\xff\xff\xff\xebG\x97\xb7\x1fB\x04\xcf\x13V\x81\tw\x1b' \
56 61 'w\xdduR\xda\xc6\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' \
57 62 'x\x9c\x9d\x93?O\xc30\x10\xc5\xf7|\x8a\xdb\x9a\xa8m\x06\xd8*\x95' \
58 63 '\x81B\xa1\xa2\xa2R\xcb\x86Pd\x9a\x0b5$vd_\x04\xfd\xf6\x9c\xff@' \
59 64 '\x11!\x0b\xd9\xec\xf7\xbbw\xe7gG6\xad6\x04\xdaN\xc0\x92\xa0$)' \
60 65 '\xb1\x82\xa2\xd1%\x16\xa4\x8b7\xa9\xca\xd4-\xb2Y\x02\xfc\xc9' \
61 66 '\xcaS\xf9\xaeX\xed\xb6\xd77Q\x02\x83\xd4\x19\xf5--Y\xea\xe1W' \
62 67 '\xab\xed\x10\xceR\x0f_\xdf\xdf\r\xe1,\xf5\xf0\xcb\xf5 \xceR\x0f' \
63 68 '_\xdc\x0e\x0e\xc3R\x0f_\xae\x96\x9b!\x9e\xa5\x1e\xbf\xdb,\x06' \
64 69 '\xc7q\x9a/\x88\x82\xc3B\xea\xb5\xb4TJ\x93\xb6\x82\x0e\xe16\xe6' \
65 70 'KQ\xdb\xaf\xecG\xa3\xd1 \x01\xd3\x0b_^\xe8\xaa\xa0\xae\xad\xd1' \
66 71 '&\xbef\x1bz\x08\xb0|\xc9Xz\x06\xf6Z\x91\x90J\xaa\x17\x90\xaa' \
67 72 '\xd2\xa6\x11$5C\xcf\xba#\xa0\x03\x02*2\x92-\xfc\xb1\x94\xdf\xe2' \
68 73 '\xae\xb8\'m\x8ey0^\x85\xd3\x82\xb4\xf0`:\x9c\x00\x8a\xfd\x01' \
69 74 '\xb0\xc6\x86\x8b\xdd\xae\x80\xf3\xa9\x9fd\x16\n\x00R%\x1a\x06' \
70 75 '\xe9\xd8b\x98\x1d\xf4\xf3+\x9bf\x01\xd8p\x1b\xf3.\xed\x9f^g\xc3' \
71 76 '^\xd9W81T\xdb\xd5\x04sx|\xf2\xeb\xd6`%?x\xed"\x831\xbf\xf3\xdc' \
72 77 'b\xeb%gaY\xe1\xad\x9f\xb9f\'1w\xa9\xa5a\x83s\x82J\xb98\xbc4\x8b' \
73 78 '\x83\x00\x9f$z\xb8#\xa5\xb1\xdf\x98\xd9\xec\x1b\x89O\xe3Ts\x9a4' \
74 79 '\x17m\x8b\xfc\x8f\xa5\x95\x9a\xfc\xfa\xed,\xe5|\xa1\xfe\x15\xb9' \
75 80 '\xbc\xb2\x93\x1f\xf2\x95\xff\xdf,\x1a\xc5\xe7\x17*\x93Oz:>\x0e'
76 81
77 82 data_non_inlined = '\x00\x00\x00\x01\x00\x00\x00\x00\x00\x01D\x19' \
78 83 '\x00\x07e\x12\x00\x00\x00\x00\x00\x00\x00\x00\xff\xff\xff\xff' \
79 84 '\xff\xff\xff\xff\xd1\xf4\xbb\xb0\xbe\xfc\x13\xbd\x8c\xd3\x9d' \
80 85 '\x0f\xcd\xd9;\x8c\x07\x8cJ/\x00\x00\x00\x00\x00\x00\x00\x00\x00' \
81 86 '\x00\x00\x00\x00\x00\x00\x01D\x19\x00\x00\x00\x00\x00\xdf\x00' \
82 87 '\x00\x01q\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x00\xff' \
83 88 '\xff\xff\xff\xc1\x12\xb9\x04\x96\xa4Z1t\x91\xdfsJ\x90\xf0\x9bh' \
84 89 '\x07l&\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' \
85 90 '\x00\x01D\xf8\x00\x00\x00\x00\x01\x1b\x00\x00\x01\xb8\x00\x00' \
86 91 '\x00\x01\x00\x00\x00\x02\x00\x00\x00\x01\xff\xff\xff\xff\x02\n' \
87 92 '\x0e\xc6&\xa1\x92\xae6\x0b\x02i\xfe-\xe5\xbao\x05\xd1\xe7\x00' \
88 93 '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01F' \
89 94 '\x13\x00\x00\x00\x00\x01\xec\x00\x00\x03\x06\x00\x00\x00\x01' \
90 95 '\x00\x00\x00\x03\x00\x00\x00\x02\xff\xff\xff\xff\x12\xcb\xeby1' \
91 96 '\xb6\r\x98B\xcb\x07\xbd`\x8f\x92\xd9\xc4\x84\xbdK\x00\x00\x00' \
92 97 '\x00\x00\x00\x00\x00\x00\x00\x00\x00'
93 98
94 99 def parse_index2(data, inline):
95 100 index, chunkcache = parsers.parse_index2(data, inline)
96 101 return list(index), chunkcache
97 102
103 def importparsers(hexversion):
104 """Import mercurial.parsers with the given sys.hexversion."""
105 # The file parsers.c inspects sys.hexversion to determine the version
106 # of the currently-running Python interpreter, so we monkey-patch
107 # sys.hexversion to simulate using different versions.
108 code = ("import sys; sys.hexversion=%s; "
109 "import mercurial.parsers" % hexversion)
110 cmd = "python -c \"%s\"" % code
111 # We need to do these tests inside a subprocess because parser.c's
112 # version-checking code happens inside the module init function, and
113 # when using reload() to reimport an extension module, "The init function
114 # of extension modules is not called a second time"
115 # (from http://docs.python.org/2/library/functions.html?#reload).
116 p = subprocess.Popen(cmd, shell=True,
117 stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
118 return p.communicate() # returns stdout, stderr
119
120 def printhexfail(testnumber, hexversion, stdout, expected):
121 try:
122 hexstring = hex(hexversion)
123 except TypeError:
124 hexstring = None
125 print ("FAILED: version test #%s with Python %s and patched "
126 "sys.hexversion %r (%r):\n Expected %s but got:\n-->'%s'\n" %
127 (testnumber, sys.version_info, hexversion, hexstring, expected,
128 stdout))
129
130 def testversionokay(testnumber, hexversion):
131 stdout, stderr = importparsers(hexversion)
132 if stdout:
133 printhexfail(testnumber, hexversion, stdout, expected="no stdout")
134
135 def testversionfail(testnumber, hexversion):
136 stdout, stderr = importparsers(hexversion)
137 # We include versionerrortext to distinguish from other ImportErrors.
138 errtext = "ImportError: %s" % parsers.versionerrortext
139 if errtext not in stdout:
140 printhexfail(testnumber, hexversion, stdout,
141 expected="stdout to contain %r" % errtext)
142
143 def makehex(major, minor, micro):
144 return int("%x%02x%02x00" % (major, minor, micro), 16)
145
146 def runversiontests():
147 """Check the version-detection logic when importing parsers."""
148 info = sys.version_info
149 major, minor, micro = info[0], info[1], info[2]
150 # Test same major-minor versions.
151 testversionokay(1, makehex(major, minor, micro))
152 testversionokay(2, makehex(major, minor, micro + 1))
153 # Test different major-minor versions.
154 testversionfail(3, makehex(major + 1, minor, micro))
155 testversionfail(4, makehex(major, minor + 1, micro))
156 testversionfail(5, "'foo'")
157
98 158 def runtest() :
159 # Only test the version-detection logic if it is present.
160 try:
161 parsers.versionerrortext
162 except AttributeError:
163 pass
164 else:
165 runversiontests()
166
99 167 # Check that parse_index2() raises TypeError on bad arguments.
100 168 try:
101 169 parse_index2(0, True)
102 170 except TypeError:
103 171 pass
104 172 else:
105 173 print "Expected to get TypeError."
106 174
107 175 # Check parsers.parse_index2() on an index file against the original
108 176 # Python implementation of parseindex, both with and without inlined data.
109 177
110 178 py_res_1 = py_parseindex(data_inlined, True)
111 179 c_res_1 = parse_index2(data_inlined, True)
112 180
113 181 py_res_2 = py_parseindex(data_non_inlined, False)
114 182 c_res_2 = parse_index2(data_non_inlined, False)
115 183
116 184 if py_res_1 != c_res_1:
117 185 print "Parse index result (with inlined data) differs!"
118 186
119 187 if py_res_2 != c_res_2:
120 188 print "Parse index result (no inlined data) differs!"
121 189
122 190 ix = parsers.parse_index2(data_inlined, True)[0]
123 191 for i, r in enumerate(ix):
124 192 if r[7] == nullid:
125 193 i = -1
126 194 try:
127 195 if ix[r[7]] != i:
128 196 print 'Reverse lookup inconsistent for %r' % r[7].encode('hex')
129 197 except TypeError:
130 198 # pure version doesn't support this
131 199 break
132 200
133 201 print "done"
134 202
135 203 runtest()
General Comments 0
You need to be logged in to leave comments. Login now