##// END OF EJS Templates
parsers.c: fix a couple of memory leaks
Danek Duvall -
r21730:8da10038 stable
parent child Browse files
Show More
@@ -1,2084 +1,2080 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #include <Python.h>
11 11 #include <ctype.h>
12 12 #include <stddef.h>
13 13 #include <string.h>
14 14
15 15 #include "util.h"
16 16
17 17 static char *versionerrortext = "Python minor version mismatch";
18 18
19 19 static int8_t hextable[256] = {
20 20 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
21 21 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
22 22 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
23 23 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, /* 0-9 */
24 24 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* A-F */
25 25 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
26 26 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* a-f */
27 27 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
28 28 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
29 29 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
30 30 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
31 31 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
32 32 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
33 33 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
34 34 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
35 35 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
36 36 };
37 37
38 38 static inline int hexdigit(const char *p, Py_ssize_t off)
39 39 {
40 40 int8_t val = hextable[(unsigned char)p[off]];
41 41
42 42 if (val >= 0) {
43 43 return val;
44 44 }
45 45
46 46 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
47 47 return 0;
48 48 }
49 49
50 50 /*
51 51 * Turn a hex-encoded string into binary.
52 52 */
53 53 static PyObject *unhexlify(const char *str, int len)
54 54 {
55 55 PyObject *ret;
56 56 char *d;
57 57 int i;
58 58
59 59 ret = PyBytes_FromStringAndSize(NULL, len / 2);
60 60
61 61 if (!ret)
62 62 return NULL;
63 63
64 64 d = PyBytes_AsString(ret);
65 65
66 66 for (i = 0; i < len;) {
67 67 int hi = hexdigit(str, i++);
68 68 int lo = hexdigit(str, i++);
69 69 *d++ = (hi << 4) | lo;
70 70 }
71 71
72 72 return ret;
73 73 }
74 74
75 75 /*
76 76 * This code assumes that a manifest is stitched together with newline
77 77 * ('\n') characters.
78 78 */
79 79 static PyObject *parse_manifest(PyObject *self, PyObject *args)
80 80 {
81 81 PyObject *mfdict, *fdict;
82 82 char *str, *start, *end;
83 83 int len;
84 84
85 85 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
86 86 &PyDict_Type, &mfdict,
87 87 &PyDict_Type, &fdict,
88 88 &str, &len))
89 89 goto quit;
90 90
91 91 start = str;
92 92 end = str + len;
93 93 while (start < end) {
94 94 PyObject *file = NULL, *node = NULL;
95 95 PyObject *flags = NULL;
96 96 char *zero = NULL, *newline = NULL;
97 97 ptrdiff_t nlen;
98 98
99 99 zero = memchr(start, '\0', end - start);
100 100 if (!zero) {
101 101 PyErr_SetString(PyExc_ValueError,
102 102 "manifest entry has no separator");
103 103 goto quit;
104 104 }
105 105
106 106 newline = memchr(zero + 1, '\n', end - (zero + 1));
107 107 if (!newline) {
108 108 PyErr_SetString(PyExc_ValueError,
109 109 "manifest contains trailing garbage");
110 110 goto quit;
111 111 }
112 112
113 113 file = PyBytes_FromStringAndSize(start, zero - start);
114 114
115 115 if (!file)
116 116 goto bail;
117 117
118 118 nlen = newline - zero - 1;
119 119
120 120 node = unhexlify(zero + 1, nlen > 40 ? 40 : (int)nlen);
121 121 if (!node)
122 122 goto bail;
123 123
124 124 if (nlen > 40) {
125 125 flags = PyBytes_FromStringAndSize(zero + 41,
126 126 nlen - 40);
127 127 if (!flags)
128 128 goto bail;
129 129
130 130 if (PyDict_SetItem(fdict, file, flags) == -1)
131 131 goto bail;
132 132 }
133 133
134 134 if (PyDict_SetItem(mfdict, file, node) == -1)
135 135 goto bail;
136 136
137 137 start = newline + 1;
138 138
139 139 Py_XDECREF(flags);
140 140 Py_XDECREF(node);
141 141 Py_XDECREF(file);
142 142 continue;
143 143 bail:
144 144 Py_XDECREF(flags);
145 145 Py_XDECREF(node);
146 146 Py_XDECREF(file);
147 147 goto quit;
148 148 }
149 149
150 150 Py_INCREF(Py_None);
151 151 return Py_None;
152 152 quit:
153 153 return NULL;
154 154 }
155 155
156 156 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
157 157 {
158 158 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
159 159 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
160 160 char state, *cur, *str, *cpos;
161 161 int mode, size, mtime;
162 162 unsigned int flen;
163 163 int len, pos = 40;
164 164
165 165 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
166 166 &PyDict_Type, &dmap,
167 167 &PyDict_Type, &cmap,
168 168 &str, &len))
169 169 goto quit;
170 170
171 171 /* read parents */
172 172 if (len < 40)
173 173 goto quit;
174 174
175 175 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
176 176 if (!parents)
177 177 goto quit;
178 178
179 179 /* read filenames */
180 180 while (pos >= 40 && pos < len) {
181 181 cur = str + pos;
182 182 /* unpack header */
183 183 state = *cur;
184 184 mode = getbe32(cur + 1);
185 185 size = getbe32(cur + 5);
186 186 mtime = getbe32(cur + 9);
187 187 flen = getbe32(cur + 13);
188 188 pos += 17;
189 189 cur += 17;
190 190 if (flen > len - pos) {
191 191 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
192 192 goto quit;
193 193 }
194 194
195 195 entry = Py_BuildValue("ciii", state, mode, size, mtime);
196 196 if (!entry)
197 197 goto quit;
198 198 PyObject_GC_UnTrack(entry); /* don't waste time with this */
199 199
200 200 cpos = memchr(cur, 0, flen);
201 201 if (cpos) {
202 202 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
203 203 cname = PyBytes_FromStringAndSize(cpos + 1,
204 204 flen - (cpos - cur) - 1);
205 205 if (!fname || !cname ||
206 206 PyDict_SetItem(cmap, fname, cname) == -1 ||
207 207 PyDict_SetItem(dmap, fname, entry) == -1)
208 208 goto quit;
209 209 Py_DECREF(cname);
210 210 } else {
211 211 fname = PyBytes_FromStringAndSize(cur, flen);
212 212 if (!fname ||
213 213 PyDict_SetItem(dmap, fname, entry) == -1)
214 214 goto quit;
215 215 }
216 216 Py_DECREF(fname);
217 217 Py_DECREF(entry);
218 218 fname = cname = entry = NULL;
219 219 pos += flen;
220 220 }
221 221
222 222 ret = parents;
223 223 Py_INCREF(ret);
224 224 quit:
225 225 Py_XDECREF(fname);
226 226 Py_XDECREF(cname);
227 227 Py_XDECREF(entry);
228 228 Py_XDECREF(parents);
229 229 return ret;
230 230 }
231 231
232 232 static inline int getintat(PyObject *tuple, int off, uint32_t *v)
233 233 {
234 234 PyObject *o = PyTuple_GET_ITEM(tuple, off);
235 235 long val;
236 236
237 237 if (PyInt_Check(o))
238 238 val = PyInt_AS_LONG(o);
239 239 else if (PyLong_Check(o)) {
240 240 val = PyLong_AsLong(o);
241 241 if (val == -1 && PyErr_Occurred())
242 242 return -1;
243 243 } else {
244 244 PyErr_SetString(PyExc_TypeError, "expected an int or long");
245 245 return -1;
246 246 }
247 247 if (LONG_MAX > INT_MAX && (val > INT_MAX || val < INT_MIN)) {
248 248 PyErr_SetString(PyExc_OverflowError,
249 249 "Python value to large to convert to uint32_t");
250 250 return -1;
251 251 }
252 252 *v = (uint32_t)val;
253 253 return 0;
254 254 }
255 255
256 256 static PyObject *dirstate_unset;
257 257
258 258 /*
259 259 * Efficiently pack a dirstate object into its on-disk format.
260 260 */
261 261 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
262 262 {
263 263 PyObject *packobj = NULL;
264 264 PyObject *map, *copymap, *pl;
265 265 Py_ssize_t nbytes, pos, l;
266 266 PyObject *k, *v, *pn;
267 267 char *p, *s;
268 268 double now;
269 269
270 270 if (!PyArg_ParseTuple(args, "O!O!Od:pack_dirstate",
271 271 &PyDict_Type, &map, &PyDict_Type, &copymap,
272 272 &pl, &now))
273 273 return NULL;
274 274
275 275 if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) {
276 276 PyErr_SetString(PyExc_TypeError, "expected 2-element sequence");
277 277 return NULL;
278 278 }
279 279
280 280 /* Figure out how much we need to allocate. */
281 281 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
282 282 PyObject *c;
283 283 if (!PyString_Check(k)) {
284 284 PyErr_SetString(PyExc_TypeError, "expected string key");
285 285 goto bail;
286 286 }
287 287 nbytes += PyString_GET_SIZE(k) + 17;
288 288 c = PyDict_GetItem(copymap, k);
289 289 if (c) {
290 290 if (!PyString_Check(c)) {
291 291 PyErr_SetString(PyExc_TypeError,
292 292 "expected string key");
293 293 goto bail;
294 294 }
295 295 nbytes += PyString_GET_SIZE(c) + 1;
296 296 }
297 297 }
298 298
299 299 packobj = PyString_FromStringAndSize(NULL, nbytes);
300 300 if (packobj == NULL)
301 301 goto bail;
302 302
303 303 p = PyString_AS_STRING(packobj);
304 304
305 305 pn = PySequence_ITEM(pl, 0);
306 306 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
307 307 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
308 308 goto bail;
309 309 }
310 310 memcpy(p, s, l);
311 311 p += 20;
312 312 pn = PySequence_ITEM(pl, 1);
313 313 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
314 314 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
315 315 goto bail;
316 316 }
317 317 memcpy(p, s, l);
318 318 p += 20;
319 319
320 320 for (pos = 0; PyDict_Next(map, &pos, &k, &v); ) {
321 321 uint32_t mode, size, mtime;
322 322 Py_ssize_t len, l;
323 323 PyObject *o;
324 324 char *s, *t;
325 325
326 326 if (!PyTuple_Check(v) || PyTuple_GET_SIZE(v) != 4) {
327 327 PyErr_SetString(PyExc_TypeError, "expected a 4-tuple");
328 328 goto bail;
329 329 }
330 330 o = PyTuple_GET_ITEM(v, 0);
331 331 if (PyString_AsStringAndSize(o, &s, &l) == -1 || l != 1) {
332 332 PyErr_SetString(PyExc_TypeError, "expected one byte");
333 333 goto bail;
334 334 }
335 335 *p++ = *s;
336 336 if (getintat(v, 1, &mode) == -1)
337 337 goto bail;
338 338 if (getintat(v, 2, &size) == -1)
339 339 goto bail;
340 340 if (getintat(v, 3, &mtime) == -1)
341 341 goto bail;
342 342 if (*s == 'n' && mtime == (uint32_t)now) {
343 343 /* See pure/parsers.py:pack_dirstate for why we do
344 344 * this. */
345 345 if (PyDict_SetItem(map, k, dirstate_unset) == -1)
346 346 goto bail;
347 347 mtime = -1;
348 348 }
349 349 putbe32(mode, p);
350 350 putbe32(size, p + 4);
351 351 putbe32(mtime, p + 8);
352 352 t = p + 12;
353 353 p += 16;
354 354 len = PyString_GET_SIZE(k);
355 355 memcpy(p, PyString_AS_STRING(k), len);
356 356 p += len;
357 357 o = PyDict_GetItem(copymap, k);
358 358 if (o) {
359 359 *p++ = '\0';
360 360 l = PyString_GET_SIZE(o);
361 361 memcpy(p, PyString_AS_STRING(o), l);
362 362 p += l;
363 363 len += l + 1;
364 364 }
365 365 putbe32((uint32_t)len, t);
366 366 }
367 367
368 368 pos = p - PyString_AS_STRING(packobj);
369 369 if (pos != nbytes) {
370 370 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
371 371 (long)pos, (long)nbytes);
372 372 goto bail;
373 373 }
374 374
375 375 return packobj;
376 376 bail:
377 377 Py_XDECREF(packobj);
378 378 return NULL;
379 379 }
380 380
381 381 /*
382 382 * A base-16 trie for fast node->rev mapping.
383 383 *
384 384 * Positive value is index of the next node in the trie
385 385 * Negative value is a leaf: -(rev + 1)
386 386 * Zero is empty
387 387 */
388 388 typedef struct {
389 389 int children[16];
390 390 } nodetree;
391 391
392 392 /*
393 393 * This class has two behaviours.
394 394 *
395 395 * When used in a list-like way (with integer keys), we decode an
396 396 * entry in a RevlogNG index file on demand. Our last entry is a
397 397 * sentinel, always a nullid. We have limited support for
398 398 * integer-keyed insert and delete, only at elements right before the
399 399 * sentinel.
400 400 *
401 401 * With string keys, we lazily perform a reverse mapping from node to
402 402 * rev, using a base-16 trie.
403 403 */
404 404 typedef struct {
405 405 PyObject_HEAD
406 406 /* Type-specific fields go here. */
407 407 PyObject *data; /* raw bytes of index */
408 408 PyObject **cache; /* cached tuples */
409 409 const char **offsets; /* populated on demand */
410 410 Py_ssize_t raw_length; /* original number of elements */
411 411 Py_ssize_t length; /* current number of elements */
412 412 PyObject *added; /* populated on demand */
413 413 PyObject *headrevs; /* cache, invalidated on changes */
414 414 nodetree *nt; /* base-16 trie */
415 415 int ntlength; /* # nodes in use */
416 416 int ntcapacity; /* # nodes allocated */
417 417 int ntdepth; /* maximum depth of tree */
418 418 int ntsplits; /* # splits performed */
419 419 int ntrev; /* last rev scanned */
420 420 int ntlookups; /* # lookups */
421 421 int ntmisses; /* # lookups that miss the cache */
422 422 int inlined;
423 423 } indexObject;
424 424
425 425 static Py_ssize_t index_length(const indexObject *self)
426 426 {
427 427 if (self->added == NULL)
428 428 return self->length;
429 429 return self->length + PyList_GET_SIZE(self->added);
430 430 }
431 431
432 432 static PyObject *nullentry;
433 433 static const char nullid[20];
434 434
435 435 static long inline_scan(indexObject *self, const char **offsets);
436 436
437 437 #if LONG_MAX == 0x7fffffffL
438 438 static char *tuple_format = "Kiiiiiis#";
439 439 #else
440 440 static char *tuple_format = "kiiiiiis#";
441 441 #endif
442 442
443 443 /* A RevlogNG v1 index entry is 64 bytes long. */
444 444 static const long v1_hdrsize = 64;
445 445
446 446 /*
447 447 * Return a pointer to the beginning of a RevlogNG record.
448 448 */
449 449 static const char *index_deref(indexObject *self, Py_ssize_t pos)
450 450 {
451 451 if (self->inlined && pos > 0) {
452 452 if (self->offsets == NULL) {
453 453 self->offsets = malloc(self->raw_length *
454 454 sizeof(*self->offsets));
455 455 if (self->offsets == NULL)
456 456 return (const char *)PyErr_NoMemory();
457 457 inline_scan(self, self->offsets);
458 458 }
459 459 return self->offsets[pos];
460 460 }
461 461
462 462 return PyString_AS_STRING(self->data) + pos * v1_hdrsize;
463 463 }
464 464
465 465 /*
466 466 * RevlogNG format (all in big endian, data may be inlined):
467 467 * 6 bytes: offset
468 468 * 2 bytes: flags
469 469 * 4 bytes: compressed length
470 470 * 4 bytes: uncompressed length
471 471 * 4 bytes: base revision
472 472 * 4 bytes: link revision
473 473 * 4 bytes: parent 1 revision
474 474 * 4 bytes: parent 2 revision
475 475 * 32 bytes: nodeid (only 20 bytes used)
476 476 */
477 477 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
478 478 {
479 479 uint64_t offset_flags;
480 480 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
481 481 const char *c_node_id;
482 482 const char *data;
483 483 Py_ssize_t length = index_length(self);
484 484 PyObject *entry;
485 485
486 486 if (pos < 0)
487 487 pos += length;
488 488
489 489 if (pos < 0 || pos >= length) {
490 490 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
491 491 return NULL;
492 492 }
493 493
494 494 if (pos == length - 1) {
495 495 Py_INCREF(nullentry);
496 496 return nullentry;
497 497 }
498 498
499 499 if (pos >= self->length - 1) {
500 500 PyObject *obj;
501 501 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
502 502 Py_INCREF(obj);
503 503 return obj;
504 504 }
505 505
506 506 if (self->cache) {
507 507 if (self->cache[pos]) {
508 508 Py_INCREF(self->cache[pos]);
509 509 return self->cache[pos];
510 510 }
511 511 } else {
512 512 self->cache = calloc(self->raw_length, sizeof(PyObject *));
513 513 if (self->cache == NULL)
514 514 return PyErr_NoMemory();
515 515 }
516 516
517 517 data = index_deref(self, pos);
518 518 if (data == NULL)
519 519 return NULL;
520 520
521 521 offset_flags = getbe32(data + 4);
522 522 if (pos == 0) /* mask out version number for the first entry */
523 523 offset_flags &= 0xFFFF;
524 524 else {
525 525 uint32_t offset_high = getbe32(data);
526 526 offset_flags |= ((uint64_t)offset_high) << 32;
527 527 }
528 528
529 529 comp_len = getbe32(data + 8);
530 530 uncomp_len = getbe32(data + 12);
531 531 base_rev = getbe32(data + 16);
532 532 link_rev = getbe32(data + 20);
533 533 parent_1 = getbe32(data + 24);
534 534 parent_2 = getbe32(data + 28);
535 535 c_node_id = data + 32;
536 536
537 537 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
538 538 uncomp_len, base_rev, link_rev,
539 539 parent_1, parent_2, c_node_id, 20);
540 540
541 541 if (entry) {
542 542 PyObject_GC_UnTrack(entry);
543 543 Py_INCREF(entry);
544 544 }
545 545
546 546 self->cache[pos] = entry;
547 547
548 548 return entry;
549 549 }
550 550
551 551 /*
552 552 * Return the 20-byte SHA of the node corresponding to the given rev.
553 553 */
554 554 static const char *index_node(indexObject *self, Py_ssize_t pos)
555 555 {
556 556 Py_ssize_t length = index_length(self);
557 557 const char *data;
558 558
559 559 if (pos == length - 1 || pos == INT_MAX)
560 560 return nullid;
561 561
562 562 if (pos >= length)
563 563 return NULL;
564 564
565 565 if (pos >= self->length - 1) {
566 566 PyObject *tuple, *str;
567 567 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
568 568 str = PyTuple_GetItem(tuple, 7);
569 569 return str ? PyString_AS_STRING(str) : NULL;
570 570 }
571 571
572 572 data = index_deref(self, pos);
573 573 return data ? data + 32 : NULL;
574 574 }
575 575
576 576 static int nt_insert(indexObject *self, const char *node, int rev);
577 577
578 578 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
579 579 {
580 580 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
581 581 return -1;
582 582 if (*nodelen == 20)
583 583 return 0;
584 584 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
585 585 return -1;
586 586 }
587 587
588 588 static PyObject *index_insert(indexObject *self, PyObject *args)
589 589 {
590 590 PyObject *obj;
591 591 char *node;
592 592 long offset;
593 593 Py_ssize_t len, nodelen;
594 594
595 595 if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
596 596 return NULL;
597 597
598 598 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
599 599 PyErr_SetString(PyExc_TypeError, "8-tuple required");
600 600 return NULL;
601 601 }
602 602
603 603 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
604 604 return NULL;
605 605
606 606 len = index_length(self);
607 607
608 608 if (offset < 0)
609 609 offset += len;
610 610
611 611 if (offset != len - 1) {
612 612 PyErr_SetString(PyExc_IndexError,
613 613 "insert only supported at index -1");
614 614 return NULL;
615 615 }
616 616
617 617 if (offset > INT_MAX) {
618 618 PyErr_SetString(PyExc_ValueError,
619 619 "currently only 2**31 revs supported");
620 620 return NULL;
621 621 }
622 622
623 623 if (self->added == NULL) {
624 624 self->added = PyList_New(0);
625 625 if (self->added == NULL)
626 626 return NULL;
627 627 }
628 628
629 629 if (PyList_Append(self->added, obj) == -1)
630 630 return NULL;
631 631
632 632 if (self->nt)
633 633 nt_insert(self, node, (int)offset);
634 634
635 635 Py_CLEAR(self->headrevs);
636 636 Py_RETURN_NONE;
637 637 }
638 638
639 639 static void _index_clearcaches(indexObject *self)
640 640 {
641 641 if (self->cache) {
642 642 Py_ssize_t i;
643 643
644 644 for (i = 0; i < self->raw_length; i++)
645 645 Py_CLEAR(self->cache[i]);
646 646 free(self->cache);
647 647 self->cache = NULL;
648 648 }
649 649 if (self->offsets) {
650 650 free(self->offsets);
651 651 self->offsets = NULL;
652 652 }
653 653 if (self->nt) {
654 654 free(self->nt);
655 655 self->nt = NULL;
656 656 }
657 657 Py_CLEAR(self->headrevs);
658 658 }
659 659
660 660 static PyObject *index_clearcaches(indexObject *self)
661 661 {
662 662 _index_clearcaches(self);
663 663 self->ntlength = self->ntcapacity = 0;
664 664 self->ntdepth = self->ntsplits = 0;
665 665 self->ntrev = -1;
666 666 self->ntlookups = self->ntmisses = 0;
667 667 Py_RETURN_NONE;
668 668 }
669 669
670 670 static PyObject *index_stats(indexObject *self)
671 671 {
672 672 PyObject *obj = PyDict_New();
673 673
674 674 if (obj == NULL)
675 675 return NULL;
676 676
677 677 #define istat(__n, __d) \
678 678 if (PyDict_SetItemString(obj, __d, PyInt_FromSsize_t(self->__n)) == -1) \
679 679 goto bail;
680 680
681 681 if (self->added) {
682 682 Py_ssize_t len = PyList_GET_SIZE(self->added);
683 683 if (PyDict_SetItemString(obj, "index entries added",
684 684 PyInt_FromSsize_t(len)) == -1)
685 685 goto bail;
686 686 }
687 687
688 688 if (self->raw_length != self->length - 1)
689 689 istat(raw_length, "revs on disk");
690 690 istat(length, "revs in memory");
691 691 istat(ntcapacity, "node trie capacity");
692 692 istat(ntdepth, "node trie depth");
693 693 istat(ntlength, "node trie count");
694 694 istat(ntlookups, "node trie lookups");
695 695 istat(ntmisses, "node trie misses");
696 696 istat(ntrev, "node trie last rev scanned");
697 697 istat(ntsplits, "node trie splits");
698 698
699 699 #undef istat
700 700
701 701 return obj;
702 702
703 703 bail:
704 704 Py_XDECREF(obj);
705 705 return NULL;
706 706 }
707 707
708 708 /*
709 709 * When we cache a list, we want to be sure the caller can't mutate
710 710 * the cached copy.
711 711 */
712 712 static PyObject *list_copy(PyObject *list)
713 713 {
714 714 Py_ssize_t len = PyList_GET_SIZE(list);
715 715 PyObject *newlist = PyList_New(len);
716 716 Py_ssize_t i;
717 717
718 718 if (newlist == NULL)
719 719 return NULL;
720 720
721 721 for (i = 0; i < len; i++) {
722 722 PyObject *obj = PyList_GET_ITEM(list, i);
723 723 Py_INCREF(obj);
724 724 PyList_SET_ITEM(newlist, i, obj);
725 725 }
726 726
727 727 return newlist;
728 728 }
729 729
730 730 static PyObject *index_headrevs(indexObject *self)
731 731 {
732 732 Py_ssize_t i, len, addlen;
733 733 char *nothead = NULL;
734 734 PyObject *heads;
735 735
736 736 if (self->headrevs)
737 737 return list_copy(self->headrevs);
738 738
739 739 len = index_length(self) - 1;
740 740 heads = PyList_New(0);
741 741 if (heads == NULL)
742 742 goto bail;
743 743 if (len == 0) {
744 744 PyObject *nullid = PyInt_FromLong(-1);
745 745 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
746 746 Py_XDECREF(nullid);
747 747 goto bail;
748 748 }
749 749 goto done;
750 750 }
751 751
752 752 nothead = calloc(len, 1);
753 753 if (nothead == NULL)
754 754 goto bail;
755 755
756 756 for (i = 0; i < self->raw_length; i++) {
757 757 const char *data = index_deref(self, i);
758 758 int parent_1 = getbe32(data + 24);
759 759 int parent_2 = getbe32(data + 28);
760 760 if (parent_1 >= 0)
761 761 nothead[parent_1] = 1;
762 762 if (parent_2 >= 0)
763 763 nothead[parent_2] = 1;
764 764 }
765 765
766 766 addlen = self->added ? PyList_GET_SIZE(self->added) : 0;
767 767
768 768 for (i = 0; i < addlen; i++) {
769 769 PyObject *rev = PyList_GET_ITEM(self->added, i);
770 770 PyObject *p1 = PyTuple_GET_ITEM(rev, 5);
771 771 PyObject *p2 = PyTuple_GET_ITEM(rev, 6);
772 772 long parent_1, parent_2;
773 773
774 774 if (!PyInt_Check(p1) || !PyInt_Check(p2)) {
775 775 PyErr_SetString(PyExc_TypeError,
776 776 "revlog parents are invalid");
777 777 goto bail;
778 778 }
779 779 parent_1 = PyInt_AS_LONG(p1);
780 780 parent_2 = PyInt_AS_LONG(p2);
781 781 if (parent_1 >= 0)
782 782 nothead[parent_1] = 1;
783 783 if (parent_2 >= 0)
784 784 nothead[parent_2] = 1;
785 785 }
786 786
787 787 for (i = 0; i < len; i++) {
788 788 PyObject *head;
789 789
790 790 if (nothead[i])
791 791 continue;
792 792 head = PyInt_FromLong(i);
793 793 if (head == NULL || PyList_Append(heads, head) == -1) {
794 794 Py_XDECREF(head);
795 795 goto bail;
796 796 }
797 797 }
798 798
799 799 done:
800 800 self->headrevs = heads;
801 801 free(nothead);
802 802 return list_copy(self->headrevs);
803 803 bail:
804 804 Py_XDECREF(heads);
805 805 free(nothead);
806 806 return NULL;
807 807 }
808 808
809 809 static inline int nt_level(const char *node, Py_ssize_t level)
810 810 {
811 811 int v = node[level>>1];
812 812 if (!(level & 1))
813 813 v >>= 4;
814 814 return v & 0xf;
815 815 }
816 816
817 817 /*
818 818 * Return values:
819 819 *
820 820 * -4: match is ambiguous (multiple candidates)
821 821 * -2: not found
822 822 * rest: valid rev
823 823 */
824 824 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
825 825 int hex)
826 826 {
827 827 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
828 828 int level, maxlevel, off;
829 829
830 830 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
831 831 return -1;
832 832
833 833 if (self->nt == NULL)
834 834 return -2;
835 835
836 836 if (hex)
837 837 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
838 838 else
839 839 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
840 840
841 841 for (level = off = 0; level < maxlevel; level++) {
842 842 int k = getnybble(node, level);
843 843 nodetree *n = &self->nt[off];
844 844 int v = n->children[k];
845 845
846 846 if (v < 0) {
847 847 const char *n;
848 848 Py_ssize_t i;
849 849
850 850 v = -v - 1;
851 851 n = index_node(self, v);
852 852 if (n == NULL)
853 853 return -2;
854 854 for (i = level; i < maxlevel; i++)
855 855 if (getnybble(node, i) != nt_level(n, i))
856 856 return -2;
857 857 return v;
858 858 }
859 859 if (v == 0)
860 860 return -2;
861 861 off = v;
862 862 }
863 863 /* multiple matches against an ambiguous prefix */
864 864 return -4;
865 865 }
866 866
867 867 static int nt_new(indexObject *self)
868 868 {
869 869 if (self->ntlength == self->ntcapacity) {
870 870 self->ntcapacity *= 2;
871 871 self->nt = realloc(self->nt,
872 872 self->ntcapacity * sizeof(nodetree));
873 873 if (self->nt == NULL) {
874 874 PyErr_SetString(PyExc_MemoryError, "out of memory");
875 875 return -1;
876 876 }
877 877 memset(&self->nt[self->ntlength], 0,
878 878 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
879 879 }
880 880 return self->ntlength++;
881 881 }
882 882
883 883 static int nt_insert(indexObject *self, const char *node, int rev)
884 884 {
885 885 int level = 0;
886 886 int off = 0;
887 887
888 888 while (level < 40) {
889 889 int k = nt_level(node, level);
890 890 nodetree *n;
891 891 int v;
892 892
893 893 n = &self->nt[off];
894 894 v = n->children[k];
895 895
896 896 if (v == 0) {
897 897 n->children[k] = -rev - 1;
898 898 return 0;
899 899 }
900 900 if (v < 0) {
901 901 const char *oldnode = index_node(self, -v - 1);
902 902 int noff;
903 903
904 904 if (!oldnode || !memcmp(oldnode, node, 20)) {
905 905 n->children[k] = -rev - 1;
906 906 return 0;
907 907 }
908 908 noff = nt_new(self);
909 909 if (noff == -1)
910 910 return -1;
911 911 /* self->nt may have been changed by realloc */
912 912 self->nt[off].children[k] = noff;
913 913 off = noff;
914 914 n = &self->nt[off];
915 915 n->children[nt_level(oldnode, ++level)] = v;
916 916 if (level > self->ntdepth)
917 917 self->ntdepth = level;
918 918 self->ntsplits += 1;
919 919 } else {
920 920 level += 1;
921 921 off = v;
922 922 }
923 923 }
924 924
925 925 return -1;
926 926 }
927 927
928 928 static int nt_init(indexObject *self)
929 929 {
930 930 if (self->nt == NULL) {
931 931 if (self->raw_length > INT_MAX) {
932 932 PyErr_SetString(PyExc_ValueError, "overflow in nt_init");
933 933 return -1;
934 934 }
935 935 self->ntcapacity = self->raw_length < 4
936 936 ? 4 : (int)self->raw_length / 2;
937 937
938 938 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
939 939 if (self->nt == NULL) {
940 940 PyErr_NoMemory();
941 941 return -1;
942 942 }
943 943 self->ntlength = 1;
944 944 self->ntrev = (int)index_length(self) - 1;
945 945 self->ntlookups = 1;
946 946 self->ntmisses = 0;
947 947 if (nt_insert(self, nullid, INT_MAX) == -1)
948 948 return -1;
949 949 }
950 950 return 0;
951 951 }
952 952
953 953 /*
954 954 * Return values:
955 955 *
956 956 * -3: error (exception set)
957 957 * -2: not found (no exception set)
958 958 * rest: valid rev
959 959 */
960 960 static int index_find_node(indexObject *self,
961 961 const char *node, Py_ssize_t nodelen)
962 962 {
963 963 int rev;
964 964
965 965 self->ntlookups++;
966 966 rev = nt_find(self, node, nodelen, 0);
967 967 if (rev >= -1)
968 968 return rev;
969 969
970 970 if (nt_init(self) == -1)
971 971 return -3;
972 972
973 973 /*
974 974 * For the first handful of lookups, we scan the entire index,
975 975 * and cache only the matching nodes. This optimizes for cases
976 976 * like "hg tip", where only a few nodes are accessed.
977 977 *
978 978 * After that, we cache every node we visit, using a single
979 979 * scan amortized over multiple lookups. This gives the best
980 980 * bulk performance, e.g. for "hg log".
981 981 */
982 982 if (self->ntmisses++ < 4) {
983 983 for (rev = self->ntrev - 1; rev >= 0; rev--) {
984 984 const char *n = index_node(self, rev);
985 985 if (n == NULL)
986 986 return -2;
987 987 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
988 988 if (nt_insert(self, n, rev) == -1)
989 989 return -3;
990 990 break;
991 991 }
992 992 }
993 993 } else {
994 994 for (rev = self->ntrev - 1; rev >= 0; rev--) {
995 995 const char *n = index_node(self, rev);
996 996 if (n == NULL) {
997 997 self->ntrev = rev + 1;
998 998 return -2;
999 999 }
1000 1000 if (nt_insert(self, n, rev) == -1) {
1001 1001 self->ntrev = rev + 1;
1002 1002 return -3;
1003 1003 }
1004 1004 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1005 1005 break;
1006 1006 }
1007 1007 }
1008 1008 self->ntrev = rev;
1009 1009 }
1010 1010
1011 1011 if (rev >= 0)
1012 1012 return rev;
1013 1013 return -2;
1014 1014 }
1015 1015
1016 1016 static PyObject *raise_revlog_error(void)
1017 1017 {
1018 1018 static PyObject *errclass;
1019 1019 PyObject *mod = NULL, *errobj;
1020 1020
1021 1021 if (errclass == NULL) {
1022 1022 PyObject *dict;
1023 1023
1024 1024 mod = PyImport_ImportModule("mercurial.error");
1025 1025 if (mod == NULL)
1026 1026 goto classfail;
1027 1027
1028 1028 dict = PyModule_GetDict(mod);
1029 1029 if (dict == NULL)
1030 1030 goto classfail;
1031 1031
1032 1032 errclass = PyDict_GetItemString(dict, "RevlogError");
1033 1033 if (errclass == NULL) {
1034 1034 PyErr_SetString(PyExc_SystemError,
1035 1035 "could not find RevlogError");
1036 1036 goto classfail;
1037 1037 }
1038 1038 Py_INCREF(errclass);
1039 1039 }
1040 1040
1041 1041 errobj = PyObject_CallFunction(errclass, NULL);
1042 1042 if (errobj == NULL)
1043 1043 return NULL;
1044 1044 PyErr_SetObject(errclass, errobj);
1045 1045 return errobj;
1046 1046
1047 1047 classfail:
1048 1048 Py_XDECREF(mod);
1049 1049 return NULL;
1050 1050 }
1051 1051
1052 1052 static PyObject *index_getitem(indexObject *self, PyObject *value)
1053 1053 {
1054 1054 char *node;
1055 1055 Py_ssize_t nodelen;
1056 1056 int rev;
1057 1057
1058 1058 if (PyInt_Check(value))
1059 1059 return index_get(self, PyInt_AS_LONG(value));
1060 1060
1061 1061 if (node_check(value, &node, &nodelen) == -1)
1062 1062 return NULL;
1063 1063 rev = index_find_node(self, node, nodelen);
1064 1064 if (rev >= -1)
1065 1065 return PyInt_FromLong(rev);
1066 1066 if (rev == -2)
1067 1067 raise_revlog_error();
1068 1068 return NULL;
1069 1069 }
1070 1070
1071 1071 static int nt_partialmatch(indexObject *self, const char *node,
1072 1072 Py_ssize_t nodelen)
1073 1073 {
1074 1074 int rev;
1075 1075
1076 1076 if (nt_init(self) == -1)
1077 1077 return -3;
1078 1078
1079 1079 if (self->ntrev > 0) {
1080 1080 /* ensure that the radix tree is fully populated */
1081 1081 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1082 1082 const char *n = index_node(self, rev);
1083 1083 if (n == NULL)
1084 1084 return -2;
1085 1085 if (nt_insert(self, n, rev) == -1)
1086 1086 return -3;
1087 1087 }
1088 1088 self->ntrev = rev;
1089 1089 }
1090 1090
1091 1091 return nt_find(self, node, nodelen, 1);
1092 1092 }
1093 1093
1094 1094 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1095 1095 {
1096 1096 const char *fullnode;
1097 1097 int nodelen;
1098 1098 char *node;
1099 1099 int rev, i;
1100 1100
1101 1101 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
1102 1102 return NULL;
1103 1103
1104 1104 if (nodelen < 4) {
1105 1105 PyErr_SetString(PyExc_ValueError, "key too short");
1106 1106 return NULL;
1107 1107 }
1108 1108
1109 1109 if (nodelen > 40) {
1110 1110 PyErr_SetString(PyExc_ValueError, "key too long");
1111 1111 return NULL;
1112 1112 }
1113 1113
1114 1114 for (i = 0; i < nodelen; i++)
1115 1115 hexdigit(node, i);
1116 1116 if (PyErr_Occurred()) {
1117 1117 /* input contains non-hex characters */
1118 1118 PyErr_Clear();
1119 1119 Py_RETURN_NONE;
1120 1120 }
1121 1121
1122 1122 rev = nt_partialmatch(self, node, nodelen);
1123 1123
1124 1124 switch (rev) {
1125 1125 case -4:
1126 1126 raise_revlog_error();
1127 1127 case -3:
1128 1128 return NULL;
1129 1129 case -2:
1130 1130 Py_RETURN_NONE;
1131 1131 case -1:
1132 1132 return PyString_FromStringAndSize(nullid, 20);
1133 1133 }
1134 1134
1135 1135 fullnode = index_node(self, rev);
1136 1136 if (fullnode == NULL) {
1137 1137 PyErr_Format(PyExc_IndexError,
1138 1138 "could not access rev %d", rev);
1139 1139 return NULL;
1140 1140 }
1141 1141 return PyString_FromStringAndSize(fullnode, 20);
1142 1142 }
1143 1143
1144 1144 static PyObject *index_m_get(indexObject *self, PyObject *args)
1145 1145 {
1146 1146 Py_ssize_t nodelen;
1147 1147 PyObject *val;
1148 1148 char *node;
1149 1149 int rev;
1150 1150
1151 1151 if (!PyArg_ParseTuple(args, "O", &val))
1152 1152 return NULL;
1153 1153 if (node_check(val, &node, &nodelen) == -1)
1154 1154 return NULL;
1155 1155 rev = index_find_node(self, node, nodelen);
1156 1156 if (rev == -3)
1157 1157 return NULL;
1158 1158 if (rev == -2)
1159 1159 Py_RETURN_NONE;
1160 1160 return PyInt_FromLong(rev);
1161 1161 }
1162 1162
1163 1163 static int index_contains(indexObject *self, PyObject *value)
1164 1164 {
1165 1165 char *node;
1166 1166 Py_ssize_t nodelen;
1167 1167
1168 1168 if (PyInt_Check(value)) {
1169 1169 long rev = PyInt_AS_LONG(value);
1170 1170 return rev >= -1 && rev < index_length(self);
1171 1171 }
1172 1172
1173 1173 if (node_check(value, &node, &nodelen) == -1)
1174 1174 return -1;
1175 1175
1176 1176 switch (index_find_node(self, node, nodelen)) {
1177 1177 case -3:
1178 1178 return -1;
1179 1179 case -2:
1180 1180 return 0;
1181 1181 default:
1182 1182 return 1;
1183 1183 }
1184 1184 }
1185 1185
1186 1186 static inline void index_get_parents(indexObject *self, int rev, int *ps)
1187 1187 {
1188 1188 if (rev >= self->length - 1) {
1189 1189 PyObject *tuple = PyList_GET_ITEM(self->added,
1190 1190 rev - self->length + 1);
1191 1191 ps[0] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 5));
1192 1192 ps[1] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 6));
1193 1193 } else {
1194 1194 const char *data = index_deref(self, rev);
1195 1195 ps[0] = getbe32(data + 24);
1196 1196 ps[1] = getbe32(data + 28);
1197 1197 }
1198 1198 }
1199 1199
1200 1200 typedef uint64_t bitmask;
1201 1201
1202 1202 /*
1203 1203 * Given a disjoint set of revs, return all candidates for the
1204 1204 * greatest common ancestor. In revset notation, this is the set
1205 1205 * "heads(::a and ::b and ...)"
1206 1206 */
1207 1207 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
1208 1208 int revcount)
1209 1209 {
1210 1210 const bitmask allseen = (1ull << revcount) - 1;
1211 1211 const bitmask poison = 1ull << revcount;
1212 1212 PyObject *gca = PyList_New(0);
1213 1213 int i, v, interesting;
1214 1214 int maxrev = -1;
1215 1215 long sp;
1216 1216 bitmask *seen;
1217 1217
1218 1218 if (gca == NULL)
1219 1219 return PyErr_NoMemory();
1220 1220
1221 1221 for (i = 0; i < revcount; i++) {
1222 1222 if (revs[i] > maxrev)
1223 1223 maxrev = revs[i];
1224 1224 }
1225 1225
1226 1226 seen = calloc(sizeof(*seen), maxrev + 1);
1227 1227 if (seen == NULL) {
1228 1228 Py_DECREF(gca);
1229 1229 return PyErr_NoMemory();
1230 1230 }
1231 1231
1232 1232 for (i = 0; i < revcount; i++)
1233 1233 seen[revs[i]] = 1ull << i;
1234 1234
1235 1235 interesting = revcount;
1236 1236
1237 1237 for (v = maxrev; v >= 0 && interesting; v--) {
1238 1238 long sv = seen[v];
1239 1239 int parents[2];
1240 1240
1241 1241 if (!sv)
1242 1242 continue;
1243 1243
1244 1244 if (sv < poison) {
1245 1245 interesting -= 1;
1246 1246 if (sv == allseen) {
1247 1247 PyObject *obj = PyInt_FromLong(v);
1248 1248 if (obj == NULL)
1249 1249 goto bail;
1250 1250 if (PyList_Append(gca, obj) == -1) {
1251 1251 Py_DECREF(obj);
1252 1252 goto bail;
1253 1253 }
1254 1254 sv |= poison;
1255 1255 for (i = 0; i < revcount; i++) {
1256 1256 if (revs[i] == v)
1257 1257 goto done;
1258 1258 }
1259 1259 }
1260 1260 }
1261 1261 index_get_parents(self, v, parents);
1262 1262
1263 1263 for (i = 0; i < 2; i++) {
1264 1264 int p = parents[i];
1265 1265 if (p == -1)
1266 1266 continue;
1267 1267 sp = seen[p];
1268 1268 if (sv < poison) {
1269 1269 if (sp == 0) {
1270 1270 seen[p] = sv;
1271 1271 interesting++;
1272 1272 }
1273 1273 else if (sp != sv)
1274 1274 seen[p] |= sv;
1275 1275 } else {
1276 1276 if (sp && sp < poison)
1277 1277 interesting--;
1278 1278 seen[p] = sv;
1279 1279 }
1280 1280 }
1281 1281 }
1282 1282
1283 1283 done:
1284 1284 free(seen);
1285 1285 return gca;
1286 1286 bail:
1287 1287 free(seen);
1288 1288 Py_XDECREF(gca);
1289 1289 return NULL;
1290 1290 }
1291 1291
1292 1292 /*
1293 1293 * Given a disjoint set of revs, return the subset with the longest
1294 1294 * path to the root.
1295 1295 */
1296 1296 static PyObject *find_deepest(indexObject *self, PyObject *revs)
1297 1297 {
1298 1298 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
1299 1299 static const Py_ssize_t capacity = 24;
1300 1300 int *depth, *interesting = NULL;
1301 1301 int i, j, v, ninteresting;
1302 PyObject *dict = NULL, *keys;
1302 PyObject *dict = NULL, *keys = NULL;
1303 1303 long *seen = NULL;
1304 1304 int maxrev = -1;
1305 1305 long final;
1306 1306
1307 1307 if (revcount > capacity) {
1308 1308 PyErr_Format(PyExc_OverflowError,
1309 1309 "bitset size (%ld) > capacity (%ld)",
1310 1310 (long)revcount, (long)capacity);
1311 1311 return NULL;
1312 1312 }
1313 1313
1314 1314 for (i = 0; i < revcount; i++) {
1315 1315 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1316 1316 if (n > maxrev)
1317 1317 maxrev = n;
1318 1318 }
1319 1319
1320 1320 depth = calloc(sizeof(*depth), maxrev + 1);
1321 1321 if (depth == NULL)
1322 1322 return PyErr_NoMemory();
1323 1323
1324 1324 seen = calloc(sizeof(*seen), maxrev + 1);
1325 1325 if (seen == NULL) {
1326 1326 PyErr_NoMemory();
1327 1327 goto bail;
1328 1328 }
1329 1329
1330 1330 interesting = calloc(sizeof(*interesting), 2 << revcount);
1331 1331 if (interesting == NULL) {
1332 1332 PyErr_NoMemory();
1333 1333 goto bail;
1334 1334 }
1335 1335
1336 1336 if (PyList_Sort(revs) == -1)
1337 1337 goto bail;
1338 1338
1339 1339 for (i = 0; i < revcount; i++) {
1340 1340 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1341 1341 long b = 1l << i;
1342 1342 depth[n] = 1;
1343 1343 seen[n] = b;
1344 1344 interesting[b] = 1;
1345 1345 }
1346 1346
1347 1347 ninteresting = (int)revcount;
1348 1348
1349 1349 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
1350 1350 int dv = depth[v];
1351 1351 int parents[2];
1352 1352 long sv;
1353 1353
1354 1354 if (dv == 0)
1355 1355 continue;
1356 1356
1357 1357 sv = seen[v];
1358 1358 index_get_parents(self, v, parents);
1359 1359
1360 1360 for (i = 0; i < 2; i++) {
1361 1361 int p = parents[i];
1362 1362 long nsp, sp;
1363 1363 int dp;
1364 1364
1365 1365 if (p == -1)
1366 1366 continue;
1367 1367
1368 1368 dp = depth[p];
1369 1369 nsp = sp = seen[p];
1370 1370 if (dp <= dv) {
1371 1371 depth[p] = dv + 1;
1372 1372 if (sp != sv) {
1373 1373 interesting[sv] += 1;
1374 1374 nsp = seen[p] = sv;
1375 1375 if (sp) {
1376 1376 interesting[sp] -= 1;
1377 1377 if (interesting[sp] == 0)
1378 1378 ninteresting -= 1;
1379 1379 }
1380 1380 }
1381 1381 }
1382 1382 else if (dv == dp - 1) {
1383 1383 nsp = sp | sv;
1384 1384 if (nsp == sp)
1385 1385 continue;
1386 1386 seen[p] = nsp;
1387 1387 interesting[sp] -= 1;
1388 1388 if (interesting[sp] == 0 && interesting[nsp] > 0)
1389 1389 ninteresting -= 1;
1390 1390 interesting[nsp] += 1;
1391 1391 }
1392 1392 }
1393 1393 interesting[sv] -= 1;
1394 1394 if (interesting[sv] == 0)
1395 1395 ninteresting -= 1;
1396 1396 }
1397 1397
1398 1398 final = 0;
1399 1399 j = ninteresting;
1400 1400 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
1401 1401 if (interesting[i] == 0)
1402 1402 continue;
1403 1403 final |= i;
1404 1404 j -= 1;
1405 1405 }
1406 if (final == 0)
1407 return PyList_New(0);
1406 if (final == 0) {
1407 keys = PyList_New(0);
1408 goto bail;
1409 }
1408 1410
1409 1411 dict = PyDict_New();
1410 1412 if (dict == NULL)
1411 1413 goto bail;
1412 1414
1413 1415 for (i = 0; i < revcount; i++) {
1414 1416 PyObject *key;
1415 1417
1416 1418 if ((final & (1 << i)) == 0)
1417 1419 continue;
1418 1420
1419 1421 key = PyList_GET_ITEM(revs, i);
1420 1422 Py_INCREF(key);
1421 1423 Py_INCREF(Py_None);
1422 1424 if (PyDict_SetItem(dict, key, Py_None) == -1) {
1423 1425 Py_DECREF(key);
1424 1426 Py_DECREF(Py_None);
1425 1427 goto bail;
1426 1428 }
1427 1429 }
1428 1430
1429 1431 keys = PyDict_Keys(dict);
1430 1432
1431 free(depth);
1432 free(seen);
1433 free(interesting);
1434 Py_DECREF(dict);
1435
1436 return keys;
1437 1433 bail:
1438 1434 free(depth);
1439 1435 free(seen);
1440 1436 free(interesting);
1441 1437 Py_XDECREF(dict);
1442 1438
1443 return NULL;
1439 return keys;
1444 1440 }
1445 1441
1446 1442 /*
1447 1443 * Given a (possibly overlapping) set of revs, return the greatest
1448 1444 * common ancestors: those with the longest path to the root.
1449 1445 */
1450 1446 static PyObject *index_ancestors(indexObject *self, PyObject *args)
1451 1447 {
1452 1448 PyObject *ret = NULL, *gca = NULL;
1453 1449 Py_ssize_t argcount, i, len;
1454 1450 bitmask repeat = 0;
1455 1451 int revcount = 0;
1456 1452 int *revs;
1457 1453
1458 1454 argcount = PySequence_Length(args);
1459 1455 revs = malloc(argcount * sizeof(*revs));
1460 1456 if (argcount > 0 && revs == NULL)
1461 1457 return PyErr_NoMemory();
1462 1458 len = index_length(self) - 1;
1463 1459
1464 1460 for (i = 0; i < argcount; i++) {
1465 1461 static const int capacity = 24;
1466 1462 PyObject *obj = PySequence_GetItem(args, i);
1467 1463 bitmask x;
1468 1464 long val;
1469 1465
1470 1466 if (!PyInt_Check(obj)) {
1471 1467 PyErr_SetString(PyExc_TypeError,
1472 1468 "arguments must all be ints");
1473 1469 goto bail;
1474 1470 }
1475 1471 val = PyInt_AsLong(obj);
1476 1472 if (val == -1) {
1477 1473 ret = PyList_New(0);
1478 1474 goto done;
1479 1475 }
1480 1476 if (val < 0 || val >= len) {
1481 1477 PyErr_SetString(PyExc_IndexError,
1482 1478 "index out of range");
1483 1479 goto bail;
1484 1480 }
1485 1481 /* this cheesy bloom filter lets us avoid some more
1486 1482 * expensive duplicate checks in the common set-is-disjoint
1487 1483 * case */
1488 1484 x = 1ull << (val & 0x3f);
1489 1485 if (repeat & x) {
1490 1486 int k;
1491 1487 for (k = 0; k < revcount; k++) {
1492 1488 if (val == revs[k])
1493 1489 goto duplicate;
1494 1490 }
1495 1491 }
1496 1492 else repeat |= x;
1497 1493 if (revcount >= capacity) {
1498 1494 PyErr_Format(PyExc_OverflowError,
1499 1495 "bitset size (%d) > capacity (%d)",
1500 1496 revcount, capacity);
1501 1497 goto bail;
1502 1498 }
1503 1499 revs[revcount++] = (int)val;
1504 1500 duplicate:;
1505 1501 }
1506 1502
1507 1503 if (revcount == 0) {
1508 1504 ret = PyList_New(0);
1509 1505 goto done;
1510 1506 }
1511 1507 if (revcount == 1) {
1512 1508 PyObject *obj;
1513 1509 ret = PyList_New(1);
1514 1510 if (ret == NULL)
1515 1511 goto bail;
1516 1512 obj = PyInt_FromLong(revs[0]);
1517 1513 if (obj == NULL)
1518 1514 goto bail;
1519 1515 PyList_SET_ITEM(ret, 0, obj);
1520 1516 goto done;
1521 1517 }
1522 1518
1523 1519 gca = find_gca_candidates(self, revs, revcount);
1524 1520 if (gca == NULL)
1525 1521 goto bail;
1526 1522
1527 1523 if (PyList_GET_SIZE(gca) <= 1) {
1528 1524 ret = gca;
1529 1525 Py_INCREF(gca);
1530 1526 }
1531 1527 else ret = find_deepest(self, gca);
1532 1528
1533 1529 done:
1534 1530 free(revs);
1535 1531 Py_XDECREF(gca);
1536 1532
1537 1533 return ret;
1538 1534
1539 1535 bail:
1540 1536 free(revs);
1541 1537 Py_XDECREF(gca);
1542 1538 Py_XDECREF(ret);
1543 1539 return NULL;
1544 1540 }
1545 1541
1546 1542 /*
1547 1543 * Given a (possibly overlapping) set of revs, return all the
1548 1544 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
1549 1545 */
1550 1546 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
1551 1547 {
1552 1548 PyObject *ret = NULL;
1553 1549 Py_ssize_t argcount, i, len;
1554 1550 bitmask repeat = 0;
1555 1551 int revcount = 0;
1556 1552 int *revs;
1557 1553
1558 1554 argcount = PySequence_Length(args);
1559 1555 revs = malloc(argcount * sizeof(*revs));
1560 1556 if (argcount > 0 && revs == NULL)
1561 1557 return PyErr_NoMemory();
1562 1558 len = index_length(self) - 1;
1563 1559
1564 1560 for (i = 0; i < argcount; i++) {
1565 1561 static const int capacity = 24;
1566 1562 PyObject *obj = PySequence_GetItem(args, i);
1567 1563 bitmask x;
1568 1564 long val;
1569 1565
1570 1566 if (!PyInt_Check(obj)) {
1571 1567 PyErr_SetString(PyExc_TypeError,
1572 1568 "arguments must all be ints");
1573 1569 goto bail;
1574 1570 }
1575 1571 val = PyInt_AsLong(obj);
1576 1572 if (val == -1) {
1577 1573 ret = PyList_New(0);
1578 1574 goto done;
1579 1575 }
1580 1576 if (val < 0 || val >= len) {
1581 1577 PyErr_SetString(PyExc_IndexError,
1582 1578 "index out of range");
1583 1579 goto bail;
1584 1580 }
1585 1581 /* this cheesy bloom filter lets us avoid some more
1586 1582 * expensive duplicate checks in the common set-is-disjoint
1587 1583 * case */
1588 1584 x = 1ull << (val & 0x3f);
1589 1585 if (repeat & x) {
1590 1586 int k;
1591 1587 for (k = 0; k < revcount; k++) {
1592 1588 if (val == revs[k])
1593 1589 goto duplicate;
1594 1590 }
1595 1591 }
1596 1592 else repeat |= x;
1597 1593 if (revcount >= capacity) {
1598 1594 PyErr_Format(PyExc_OverflowError,
1599 1595 "bitset size (%d) > capacity (%d)",
1600 1596 revcount, capacity);
1601 1597 goto bail;
1602 1598 }
1603 1599 revs[revcount++] = (int)val;
1604 1600 duplicate:;
1605 1601 }
1606 1602
1607 1603 if (revcount == 0) {
1608 1604 ret = PyList_New(0);
1609 1605 goto done;
1610 1606 }
1611 1607 if (revcount == 1) {
1612 1608 PyObject *obj;
1613 1609 ret = PyList_New(1);
1614 1610 if (ret == NULL)
1615 1611 goto bail;
1616 1612 obj = PyInt_FromLong(revs[0]);
1617 1613 if (obj == NULL)
1618 1614 goto bail;
1619 1615 PyList_SET_ITEM(ret, 0, obj);
1620 1616 goto done;
1621 1617 }
1622 1618
1623 1619 ret = find_gca_candidates(self, revs, revcount);
1624 1620 if (ret == NULL)
1625 1621 goto bail;
1626 1622
1627 1623 done:
1628 1624 free(revs);
1629 1625 return ret;
1630 1626
1631 1627 bail:
1632 1628 free(revs);
1633 1629 Py_XDECREF(ret);
1634 1630 return NULL;
1635 1631 }
1636 1632
1637 1633 /*
1638 1634 * Invalidate any trie entries introduced by added revs.
1639 1635 */
1640 1636 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
1641 1637 {
1642 1638 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
1643 1639
1644 1640 for (i = start; i < len; i++) {
1645 1641 PyObject *tuple = PyList_GET_ITEM(self->added, i);
1646 1642 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
1647 1643
1648 1644 nt_insert(self, PyString_AS_STRING(node), -1);
1649 1645 }
1650 1646
1651 1647 if (start == 0)
1652 1648 Py_CLEAR(self->added);
1653 1649 }
1654 1650
1655 1651 /*
1656 1652 * Delete a numeric range of revs, which must be at the end of the
1657 1653 * range, but exclude the sentinel nullid entry.
1658 1654 */
1659 1655 static int index_slice_del(indexObject *self, PyObject *item)
1660 1656 {
1661 1657 Py_ssize_t start, stop, step, slicelength;
1662 1658 Py_ssize_t length = index_length(self);
1663 1659 int ret = 0;
1664 1660
1665 1661 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
1666 1662 &start, &stop, &step, &slicelength) < 0)
1667 1663 return -1;
1668 1664
1669 1665 if (slicelength <= 0)
1670 1666 return 0;
1671 1667
1672 1668 if ((step < 0 && start < stop) || (step > 0 && start > stop))
1673 1669 stop = start;
1674 1670
1675 1671 if (step < 0) {
1676 1672 stop = start + 1;
1677 1673 start = stop + step*(slicelength - 1) - 1;
1678 1674 step = -step;
1679 1675 }
1680 1676
1681 1677 if (step != 1) {
1682 1678 PyErr_SetString(PyExc_ValueError,
1683 1679 "revlog index delete requires step size of 1");
1684 1680 return -1;
1685 1681 }
1686 1682
1687 1683 if (stop != length - 1) {
1688 1684 PyErr_SetString(PyExc_IndexError,
1689 1685 "revlog index deletion indices are invalid");
1690 1686 return -1;
1691 1687 }
1692 1688
1693 1689 if (start < self->length - 1) {
1694 1690 if (self->nt) {
1695 1691 Py_ssize_t i;
1696 1692
1697 1693 for (i = start + 1; i < self->length - 1; i++) {
1698 1694 const char *node = index_node(self, i);
1699 1695
1700 1696 if (node)
1701 1697 nt_insert(self, node, -1);
1702 1698 }
1703 1699 if (self->added)
1704 1700 nt_invalidate_added(self, 0);
1705 1701 if (self->ntrev > start)
1706 1702 self->ntrev = (int)start;
1707 1703 }
1708 1704 self->length = start + 1;
1709 1705 if (start < self->raw_length) {
1710 1706 if (self->cache) {
1711 1707 Py_ssize_t i;
1712 1708 for (i = start; i < self->raw_length; i++)
1713 1709 Py_CLEAR(self->cache[i]);
1714 1710 }
1715 1711 self->raw_length = start;
1716 1712 }
1717 1713 goto done;
1718 1714 }
1719 1715
1720 1716 if (self->nt) {
1721 1717 nt_invalidate_added(self, start - self->length + 1);
1722 1718 if (self->ntrev > start)
1723 1719 self->ntrev = (int)start;
1724 1720 }
1725 1721 if (self->added)
1726 1722 ret = PyList_SetSlice(self->added, start - self->length + 1,
1727 1723 PyList_GET_SIZE(self->added), NULL);
1728 1724 done:
1729 1725 Py_CLEAR(self->headrevs);
1730 1726 return ret;
1731 1727 }
1732 1728
1733 1729 /*
1734 1730 * Supported ops:
1735 1731 *
1736 1732 * slice deletion
1737 1733 * string assignment (extend node->rev mapping)
1738 1734 * string deletion (shrink node->rev mapping)
1739 1735 */
1740 1736 static int index_assign_subscript(indexObject *self, PyObject *item,
1741 1737 PyObject *value)
1742 1738 {
1743 1739 char *node;
1744 1740 Py_ssize_t nodelen;
1745 1741 long rev;
1746 1742
1747 1743 if (PySlice_Check(item) && value == NULL)
1748 1744 return index_slice_del(self, item);
1749 1745
1750 1746 if (node_check(item, &node, &nodelen) == -1)
1751 1747 return -1;
1752 1748
1753 1749 if (value == NULL)
1754 1750 return self->nt ? nt_insert(self, node, -1) : 0;
1755 1751 rev = PyInt_AsLong(value);
1756 1752 if (rev > INT_MAX || rev < 0) {
1757 1753 if (!PyErr_Occurred())
1758 1754 PyErr_SetString(PyExc_ValueError, "rev out of range");
1759 1755 return -1;
1760 1756 }
1761 1757 return nt_insert(self, node, (int)rev);
1762 1758 }
1763 1759
1764 1760 /*
1765 1761 * Find all RevlogNG entries in an index that has inline data. Update
1766 1762 * the optional "offsets" table with those entries.
1767 1763 */
1768 1764 static long inline_scan(indexObject *self, const char **offsets)
1769 1765 {
1770 1766 const char *data = PyString_AS_STRING(self->data);
1771 1767 Py_ssize_t pos = 0;
1772 1768 Py_ssize_t end = PyString_GET_SIZE(self->data);
1773 1769 long incr = v1_hdrsize;
1774 1770 Py_ssize_t len = 0;
1775 1771
1776 1772 while (pos + v1_hdrsize <= end && pos >= 0) {
1777 1773 uint32_t comp_len;
1778 1774 /* 3rd element of header is length of compressed inline data */
1779 1775 comp_len = getbe32(data + pos + 8);
1780 1776 incr = v1_hdrsize + comp_len;
1781 1777 if (offsets)
1782 1778 offsets[len] = data + pos;
1783 1779 len++;
1784 1780 pos += incr;
1785 1781 }
1786 1782
1787 1783 if (pos != end) {
1788 1784 if (!PyErr_Occurred())
1789 1785 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1790 1786 return -1;
1791 1787 }
1792 1788
1793 1789 return len;
1794 1790 }
1795 1791
1796 1792 static int index_init(indexObject *self, PyObject *args)
1797 1793 {
1798 1794 PyObject *data_obj, *inlined_obj;
1799 1795 Py_ssize_t size;
1800 1796
1801 1797 /* Initialize before argument-checking to avoid index_dealloc() crash. */
1802 1798 self->raw_length = 0;
1803 1799 self->added = NULL;
1804 1800 self->cache = NULL;
1805 1801 self->data = NULL;
1806 1802 self->headrevs = NULL;
1807 1803 self->nt = NULL;
1808 1804 self->offsets = NULL;
1809 1805
1810 1806 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
1811 1807 return -1;
1812 1808 if (!PyString_Check(data_obj)) {
1813 1809 PyErr_SetString(PyExc_TypeError, "data is not a string");
1814 1810 return -1;
1815 1811 }
1816 1812 size = PyString_GET_SIZE(data_obj);
1817 1813
1818 1814 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
1819 1815 self->data = data_obj;
1820 1816
1821 1817 self->ntlength = self->ntcapacity = 0;
1822 1818 self->ntdepth = self->ntsplits = 0;
1823 1819 self->ntlookups = self->ntmisses = 0;
1824 1820 self->ntrev = -1;
1825 1821 Py_INCREF(self->data);
1826 1822
1827 1823 if (self->inlined) {
1828 1824 long len = inline_scan(self, NULL);
1829 1825 if (len == -1)
1830 1826 goto bail;
1831 1827 self->raw_length = len;
1832 1828 self->length = len + 1;
1833 1829 } else {
1834 1830 if (size % v1_hdrsize) {
1835 1831 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1836 1832 goto bail;
1837 1833 }
1838 1834 self->raw_length = size / v1_hdrsize;
1839 1835 self->length = self->raw_length + 1;
1840 1836 }
1841 1837
1842 1838 return 0;
1843 1839 bail:
1844 1840 return -1;
1845 1841 }
1846 1842
1847 1843 static PyObject *index_nodemap(indexObject *self)
1848 1844 {
1849 1845 Py_INCREF(self);
1850 1846 return (PyObject *)self;
1851 1847 }
1852 1848
1853 1849 static void index_dealloc(indexObject *self)
1854 1850 {
1855 1851 _index_clearcaches(self);
1856 1852 Py_XDECREF(self->data);
1857 1853 Py_XDECREF(self->added);
1858 1854 PyObject_Del(self);
1859 1855 }
1860 1856
1861 1857 static PySequenceMethods index_sequence_methods = {
1862 1858 (lenfunc)index_length, /* sq_length */
1863 1859 0, /* sq_concat */
1864 1860 0, /* sq_repeat */
1865 1861 (ssizeargfunc)index_get, /* sq_item */
1866 1862 0, /* sq_slice */
1867 1863 0, /* sq_ass_item */
1868 1864 0, /* sq_ass_slice */
1869 1865 (objobjproc)index_contains, /* sq_contains */
1870 1866 };
1871 1867
1872 1868 static PyMappingMethods index_mapping_methods = {
1873 1869 (lenfunc)index_length, /* mp_length */
1874 1870 (binaryfunc)index_getitem, /* mp_subscript */
1875 1871 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
1876 1872 };
1877 1873
1878 1874 static PyMethodDef index_methods[] = {
1879 1875 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
1880 1876 "return the gca set of the given revs"},
1881 1877 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
1882 1878 METH_VARARGS,
1883 1879 "return the heads of the common ancestors of the given revs"},
1884 1880 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
1885 1881 "clear the index caches"},
1886 1882 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1887 1883 "get an index entry"},
1888 1884 {"headrevs", (PyCFunction)index_headrevs, METH_NOARGS,
1889 1885 "get head revisions"},
1890 1886 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1891 1887 "insert an index entry"},
1892 1888 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
1893 1889 "match a potentially ambiguous node ID"},
1894 1890 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1895 1891 "stats for the index"},
1896 1892 {NULL} /* Sentinel */
1897 1893 };
1898 1894
1899 1895 static PyGetSetDef index_getset[] = {
1900 1896 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
1901 1897 {NULL} /* Sentinel */
1902 1898 };
1903 1899
1904 1900 static PyTypeObject indexType = {
1905 1901 PyObject_HEAD_INIT(NULL)
1906 1902 0, /* ob_size */
1907 1903 "parsers.index", /* tp_name */
1908 1904 sizeof(indexObject), /* tp_basicsize */
1909 1905 0, /* tp_itemsize */
1910 1906 (destructor)index_dealloc, /* tp_dealloc */
1911 1907 0, /* tp_print */
1912 1908 0, /* tp_getattr */
1913 1909 0, /* tp_setattr */
1914 1910 0, /* tp_compare */
1915 1911 0, /* tp_repr */
1916 1912 0, /* tp_as_number */
1917 1913 &index_sequence_methods, /* tp_as_sequence */
1918 1914 &index_mapping_methods, /* tp_as_mapping */
1919 1915 0, /* tp_hash */
1920 1916 0, /* tp_call */
1921 1917 0, /* tp_str */
1922 1918 0, /* tp_getattro */
1923 1919 0, /* tp_setattro */
1924 1920 0, /* tp_as_buffer */
1925 1921 Py_TPFLAGS_DEFAULT, /* tp_flags */
1926 1922 "revlog index", /* tp_doc */
1927 1923 0, /* tp_traverse */
1928 1924 0, /* tp_clear */
1929 1925 0, /* tp_richcompare */
1930 1926 0, /* tp_weaklistoffset */
1931 1927 0, /* tp_iter */
1932 1928 0, /* tp_iternext */
1933 1929 index_methods, /* tp_methods */
1934 1930 0, /* tp_members */
1935 1931 index_getset, /* tp_getset */
1936 1932 0, /* tp_base */
1937 1933 0, /* tp_dict */
1938 1934 0, /* tp_descr_get */
1939 1935 0, /* tp_descr_set */
1940 1936 0, /* tp_dictoffset */
1941 1937 (initproc)index_init, /* tp_init */
1942 1938 0, /* tp_alloc */
1943 1939 };
1944 1940
1945 1941 /*
1946 1942 * returns a tuple of the form (index, index, cache) with elements as
1947 1943 * follows:
1948 1944 *
1949 1945 * index: an index object that lazily parses RevlogNG records
1950 1946 * cache: if data is inlined, a tuple (index_file_content, 0), else None
1951 1947 *
1952 1948 * added complications are for backwards compatibility
1953 1949 */
1954 1950 static PyObject *parse_index2(PyObject *self, PyObject *args)
1955 1951 {
1956 1952 PyObject *tuple = NULL, *cache = NULL;
1957 1953 indexObject *idx;
1958 1954 int ret;
1959 1955
1960 1956 idx = PyObject_New(indexObject, &indexType);
1961 1957 if (idx == NULL)
1962 1958 goto bail;
1963 1959
1964 1960 ret = index_init(idx, args);
1965 1961 if (ret == -1)
1966 1962 goto bail;
1967 1963
1968 1964 if (idx->inlined) {
1969 1965 cache = Py_BuildValue("iO", 0, idx->data);
1970 1966 if (cache == NULL)
1971 1967 goto bail;
1972 1968 } else {
1973 1969 cache = Py_None;
1974 1970 Py_INCREF(cache);
1975 1971 }
1976 1972
1977 1973 tuple = Py_BuildValue("NN", idx, cache);
1978 1974 if (!tuple)
1979 1975 goto bail;
1980 1976 return tuple;
1981 1977
1982 1978 bail:
1983 1979 Py_XDECREF(idx);
1984 1980 Py_XDECREF(cache);
1985 1981 Py_XDECREF(tuple);
1986 1982 return NULL;
1987 1983 }
1988 1984
1989 1985 static char parsers_doc[] = "Efficient content parsing.";
1990 1986
1991 1987 PyObject *encodedir(PyObject *self, PyObject *args);
1992 1988 PyObject *pathencode(PyObject *self, PyObject *args);
1993 1989 PyObject *lowerencode(PyObject *self, PyObject *args);
1994 1990
1995 1991 static PyMethodDef methods[] = {
1996 1992 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
1997 1993 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
1998 1994 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1999 1995 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
2000 1996 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
2001 1997 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
2002 1998 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
2003 1999 {NULL, NULL}
2004 2000 };
2005 2001
2006 2002 void dirs_module_init(PyObject *mod);
2007 2003
2008 2004 static void module_init(PyObject *mod)
2009 2005 {
2010 2006 /* This module constant has two purposes. First, it lets us unit test
2011 2007 * the ImportError raised without hard-coding any error text. This
2012 2008 * means we can change the text in the future without breaking tests,
2013 2009 * even across changesets without a recompile. Second, its presence
2014 2010 * can be used to determine whether the version-checking logic is
2015 2011 * present, which also helps in testing across changesets without a
2016 2012 * recompile. Note that this means the pure-Python version of parsers
2017 2013 * should not have this module constant. */
2018 2014 PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
2019 2015
2020 2016 dirs_module_init(mod);
2021 2017
2022 2018 indexType.tp_new = PyType_GenericNew;
2023 2019 if (PyType_Ready(&indexType) < 0)
2024 2020 return;
2025 2021 Py_INCREF(&indexType);
2026 2022
2027 2023 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
2028 2024
2029 2025 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
2030 2026 -1, -1, -1, -1, nullid, 20);
2031 2027 if (nullentry)
2032 2028 PyObject_GC_UnTrack(nullentry);
2033 2029
2034 2030 dirstate_unset = Py_BuildValue("ciii", 'n', 0, -1, -1);
2035 2031 }
2036 2032
2037 2033 static int check_python_version(void)
2038 2034 {
2039 2035 PyObject *sys = PyImport_ImportModule("sys");
2040 2036 long hexversion = PyInt_AsLong(PyObject_GetAttrString(sys, "hexversion"));
2041 2037 /* sys.hexversion is a 32-bit number by default, so the -1 case
2042 2038 * should only occur in unusual circumstances (e.g. if sys.hexversion
2043 2039 * is manually set to an invalid value). */
2044 2040 if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
2045 2041 PyErr_Format(PyExc_ImportError, "%s: The Mercurial extension "
2046 2042 "modules were compiled with Python " PY_VERSION ", but "
2047 2043 "Mercurial is currently using Python with sys.hexversion=%ld: "
2048 2044 "Python %s\n at: %s", versionerrortext, hexversion,
2049 2045 Py_GetVersion(), Py_GetProgramFullPath());
2050 2046 return -1;
2051 2047 }
2052 2048 return 0;
2053 2049 }
2054 2050
2055 2051 #ifdef IS_PY3K
2056 2052 static struct PyModuleDef parsers_module = {
2057 2053 PyModuleDef_HEAD_INIT,
2058 2054 "parsers",
2059 2055 parsers_doc,
2060 2056 -1,
2061 2057 methods
2062 2058 };
2063 2059
2064 2060 PyMODINIT_FUNC PyInit_parsers(void)
2065 2061 {
2066 2062 PyObject *mod;
2067 2063
2068 2064 if (check_python_version() == -1)
2069 2065 return;
2070 2066 mod = PyModule_Create(&parsers_module);
2071 2067 module_init(mod);
2072 2068 return mod;
2073 2069 }
2074 2070 #else
2075 2071 PyMODINIT_FUNC initparsers(void)
2076 2072 {
2077 2073 PyObject *mod;
2078 2074
2079 2075 if (check_python_version() == -1)
2080 2076 return;
2081 2077 mod = Py_InitModule3("parsers", methods, parsers_doc);
2082 2078 module_init(mod);
2083 2079 }
2084 2080 #endif
General Comments 0
You need to be logged in to leave comments. Login now