##// END OF EJS Templates
parsers: avoid signed/unsigned comparison mismatch...
Henrik Stuart -
r22403:41e9d58e default
parent child Browse files
Show More
@@ -1,2172 +1,2177
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #include <Python.h>
11 11 #include <ctype.h>
12 12 #include <stddef.h>
13 13 #include <string.h>
14 14
15 15 #include "util.h"
16 16
17 17 static char *versionerrortext = "Python minor version mismatch";
18 18
19 19 static int8_t hextable[256] = {
20 20 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
21 21 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
22 22 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
23 23 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, /* 0-9 */
24 24 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* A-F */
25 25 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
26 26 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* a-f */
27 27 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
28 28 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
29 29 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
30 30 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
31 31 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
32 32 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
33 33 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
34 34 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
35 35 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
36 36 };
37 37
38 38 static inline int hexdigit(const char *p, Py_ssize_t off)
39 39 {
40 40 int8_t val = hextable[(unsigned char)p[off]];
41 41
42 42 if (val >= 0) {
43 43 return val;
44 44 }
45 45
46 46 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
47 47 return 0;
48 48 }
49 49
50 50 /*
51 51 * Turn a hex-encoded string into binary.
52 52 */
53 53 static PyObject *unhexlify(const char *str, int len)
54 54 {
55 55 PyObject *ret;
56 56 char *d;
57 57 int i;
58 58
59 59 ret = PyBytes_FromStringAndSize(NULL, len / 2);
60 60
61 61 if (!ret)
62 62 return NULL;
63 63
64 64 d = PyBytes_AsString(ret);
65 65
66 66 for (i = 0; i < len;) {
67 67 int hi = hexdigit(str, i++);
68 68 int lo = hexdigit(str, i++);
69 69 *d++ = (hi << 4) | lo;
70 70 }
71 71
72 72 return ret;
73 73 }
74 74
75 75 /*
76 76 * This code assumes that a manifest is stitched together with newline
77 77 * ('\n') characters.
78 78 */
79 79 static PyObject *parse_manifest(PyObject *self, PyObject *args)
80 80 {
81 81 PyObject *mfdict, *fdict;
82 82 char *str, *start, *end;
83 83 int len;
84 84
85 85 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
86 86 &PyDict_Type, &mfdict,
87 87 &PyDict_Type, &fdict,
88 88 &str, &len))
89 89 goto quit;
90 90
91 91 start = str;
92 92 end = str + len;
93 93 while (start < end) {
94 94 PyObject *file = NULL, *node = NULL;
95 95 PyObject *flags = NULL;
96 96 char *zero = NULL, *newline = NULL;
97 97 ptrdiff_t nlen;
98 98
99 99 zero = memchr(start, '\0', end - start);
100 100 if (!zero) {
101 101 PyErr_SetString(PyExc_ValueError,
102 102 "manifest entry has no separator");
103 103 goto quit;
104 104 }
105 105
106 106 newline = memchr(zero + 1, '\n', end - (zero + 1));
107 107 if (!newline) {
108 108 PyErr_SetString(PyExc_ValueError,
109 109 "manifest contains trailing garbage");
110 110 goto quit;
111 111 }
112 112
113 113 file = PyBytes_FromStringAndSize(start, zero - start);
114 114
115 115 if (!file)
116 116 goto bail;
117 117
118 118 nlen = newline - zero - 1;
119 119
120 120 node = unhexlify(zero + 1, nlen > 40 ? 40 : (int)nlen);
121 121 if (!node)
122 122 goto bail;
123 123
124 124 if (nlen > 40) {
125 125 flags = PyBytes_FromStringAndSize(zero + 41,
126 126 nlen - 40);
127 127 if (!flags)
128 128 goto bail;
129 129
130 130 if (PyDict_SetItem(fdict, file, flags) == -1)
131 131 goto bail;
132 132 }
133 133
134 134 if (PyDict_SetItem(mfdict, file, node) == -1)
135 135 goto bail;
136 136
137 137 start = newline + 1;
138 138
139 139 Py_XDECREF(flags);
140 140 Py_XDECREF(node);
141 141 Py_XDECREF(file);
142 142 continue;
143 143 bail:
144 144 Py_XDECREF(flags);
145 145 Py_XDECREF(node);
146 146 Py_XDECREF(file);
147 147 goto quit;
148 148 }
149 149
150 150 Py_INCREF(Py_None);
151 151 return Py_None;
152 152 quit:
153 153 return NULL;
154 154 }
155 155
156 156 static inline dirstateTupleObject *make_dirstate_tuple(char state, int mode,
157 157 int size, int mtime)
158 158 {
159 159 dirstateTupleObject *t = PyObject_New(dirstateTupleObject,
160 160 &dirstateTupleType);
161 161 if (!t)
162 162 return NULL;
163 163 t->state = state;
164 164 t->mode = mode;
165 165 t->size = size;
166 166 t->mtime = mtime;
167 167 return t;
168 168 }
169 169
170 170 static PyObject *dirstate_tuple_new(PyTypeObject *subtype, PyObject *args,
171 171 PyObject *kwds)
172 172 {
173 173 /* We do all the initialization here and not a tp_init function because
174 174 * dirstate_tuple is immutable. */
175 175 dirstateTupleObject *t;
176 176 char state;
177 177 int size, mode, mtime;
178 178 if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime))
179 179 return NULL;
180 180
181 181 t = (dirstateTupleObject *)subtype->tp_alloc(subtype, 1);
182 182 if (!t)
183 183 return NULL;
184 184 t->state = state;
185 185 t->mode = mode;
186 186 t->size = size;
187 187 t->mtime = mtime;
188 188
189 189 return (PyObject *)t;
190 190 }
191 191
192 192 static void dirstate_tuple_dealloc(PyObject *o)
193 193 {
194 194 PyObject_Del(o);
195 195 }
196 196
197 197 static Py_ssize_t dirstate_tuple_length(PyObject *o)
198 198 {
199 199 return 4;
200 200 }
201 201
202 202 static PyObject *dirstate_tuple_item(PyObject *o, Py_ssize_t i)
203 203 {
204 204 dirstateTupleObject *t = (dirstateTupleObject *)o;
205 205 switch (i) {
206 206 case 0:
207 207 return PyBytes_FromStringAndSize(&t->state, 1);
208 208 case 1:
209 209 return PyInt_FromLong(t->mode);
210 210 case 2:
211 211 return PyInt_FromLong(t->size);
212 212 case 3:
213 213 return PyInt_FromLong(t->mtime);
214 214 default:
215 215 PyErr_SetString(PyExc_IndexError, "index out of range");
216 216 return NULL;
217 217 }
218 218 }
219 219
220 220 static PySequenceMethods dirstate_tuple_sq = {
221 221 dirstate_tuple_length, /* sq_length */
222 222 0, /* sq_concat */
223 223 0, /* sq_repeat */
224 224 dirstate_tuple_item, /* sq_item */
225 225 0, /* sq_ass_item */
226 226 0, /* sq_contains */
227 227 0, /* sq_inplace_concat */
228 228 0 /* sq_inplace_repeat */
229 229 };
230 230
231 231 PyTypeObject dirstateTupleType = {
232 232 PyVarObject_HEAD_INIT(NULL, 0)
233 233 "dirstate_tuple", /* tp_name */
234 234 sizeof(dirstateTupleObject),/* tp_basicsize */
235 235 0, /* tp_itemsize */
236 236 (destructor)dirstate_tuple_dealloc, /* tp_dealloc */
237 237 0, /* tp_print */
238 238 0, /* tp_getattr */
239 239 0, /* tp_setattr */
240 240 0, /* tp_compare */
241 241 0, /* tp_repr */
242 242 0, /* tp_as_number */
243 243 &dirstate_tuple_sq, /* tp_as_sequence */
244 244 0, /* tp_as_mapping */
245 245 0, /* tp_hash */
246 246 0, /* tp_call */
247 247 0, /* tp_str */
248 248 0, /* tp_getattro */
249 249 0, /* tp_setattro */
250 250 0, /* tp_as_buffer */
251 251 Py_TPFLAGS_DEFAULT, /* tp_flags */
252 252 "dirstate tuple", /* tp_doc */
253 253 0, /* tp_traverse */
254 254 0, /* tp_clear */
255 255 0, /* tp_richcompare */
256 256 0, /* tp_weaklistoffset */
257 257 0, /* tp_iter */
258 258 0, /* tp_iternext */
259 259 0, /* tp_methods */
260 260 0, /* tp_members */
261 261 0, /* tp_getset */
262 262 0, /* tp_base */
263 263 0, /* tp_dict */
264 264 0, /* tp_descr_get */
265 265 0, /* tp_descr_set */
266 266 0, /* tp_dictoffset */
267 267 0, /* tp_init */
268 268 0, /* tp_alloc */
269 269 dirstate_tuple_new, /* tp_new */
270 270 };
271 271
272 272 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
273 273 {
274 274 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
275 275 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
276 276 char state, *cur, *str, *cpos;
277 277 int mode, size, mtime;
278 unsigned int flen;
279 int len, pos = 40;
278 unsigned int flen, len, pos = 40;
279 int readlen;
280 280
281 281 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
282 282 &PyDict_Type, &dmap,
283 283 &PyDict_Type, &cmap,
284 &str, &len))
284 &str, &readlen))
285 285 goto quit;
286 286
287 if (readlen < 0)
288 goto quit;
289
290 len = readlen;
291
287 292 /* read parents */
288 293 if (len < 40)
289 294 goto quit;
290 295
291 296 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
292 297 if (!parents)
293 298 goto quit;
294 299
295 300 /* read filenames */
296 301 while (pos >= 40 && pos < len) {
297 302 cur = str + pos;
298 303 /* unpack header */
299 304 state = *cur;
300 305 mode = getbe32(cur + 1);
301 306 size = getbe32(cur + 5);
302 307 mtime = getbe32(cur + 9);
303 308 flen = getbe32(cur + 13);
304 309 pos += 17;
305 310 cur += 17;
306 311 if (flen > len - pos) {
307 312 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
308 313 goto quit;
309 314 }
310 315
311 316 entry = (PyObject *)make_dirstate_tuple(state, mode, size,
312 317 mtime);
313 318 cpos = memchr(cur, 0, flen);
314 319 if (cpos) {
315 320 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
316 321 cname = PyBytes_FromStringAndSize(cpos + 1,
317 322 flen - (cpos - cur) - 1);
318 323 if (!fname || !cname ||
319 324 PyDict_SetItem(cmap, fname, cname) == -1 ||
320 325 PyDict_SetItem(dmap, fname, entry) == -1)
321 326 goto quit;
322 327 Py_DECREF(cname);
323 328 } else {
324 329 fname = PyBytes_FromStringAndSize(cur, flen);
325 330 if (!fname ||
326 331 PyDict_SetItem(dmap, fname, entry) == -1)
327 332 goto quit;
328 333 }
329 334 Py_DECREF(fname);
330 335 Py_DECREF(entry);
331 336 fname = cname = entry = NULL;
332 337 pos += flen;
333 338 }
334 339
335 340 ret = parents;
336 341 Py_INCREF(ret);
337 342 quit:
338 343 Py_XDECREF(fname);
339 344 Py_XDECREF(cname);
340 345 Py_XDECREF(entry);
341 346 Py_XDECREF(parents);
342 347 return ret;
343 348 }
344 349
345 350 /*
346 351 * Efficiently pack a dirstate object into its on-disk format.
347 352 */
348 353 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
349 354 {
350 355 PyObject *packobj = NULL;
351 356 PyObject *map, *copymap, *pl, *mtime_unset = NULL;
352 357 Py_ssize_t nbytes, pos, l;
353 358 PyObject *k, *v, *pn;
354 359 char *p, *s;
355 360 double now;
356 361
357 362 if (!PyArg_ParseTuple(args, "O!O!Od:pack_dirstate",
358 363 &PyDict_Type, &map, &PyDict_Type, &copymap,
359 364 &pl, &now))
360 365 return NULL;
361 366
362 367 if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) {
363 368 PyErr_SetString(PyExc_TypeError, "expected 2-element sequence");
364 369 return NULL;
365 370 }
366 371
367 372 /* Figure out how much we need to allocate. */
368 373 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
369 374 PyObject *c;
370 375 if (!PyString_Check(k)) {
371 376 PyErr_SetString(PyExc_TypeError, "expected string key");
372 377 goto bail;
373 378 }
374 379 nbytes += PyString_GET_SIZE(k) + 17;
375 380 c = PyDict_GetItem(copymap, k);
376 381 if (c) {
377 382 if (!PyString_Check(c)) {
378 383 PyErr_SetString(PyExc_TypeError,
379 384 "expected string key");
380 385 goto bail;
381 386 }
382 387 nbytes += PyString_GET_SIZE(c) + 1;
383 388 }
384 389 }
385 390
386 391 packobj = PyString_FromStringAndSize(NULL, nbytes);
387 392 if (packobj == NULL)
388 393 goto bail;
389 394
390 395 p = PyString_AS_STRING(packobj);
391 396
392 397 pn = PySequence_ITEM(pl, 0);
393 398 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
394 399 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
395 400 goto bail;
396 401 }
397 402 memcpy(p, s, l);
398 403 p += 20;
399 404 pn = PySequence_ITEM(pl, 1);
400 405 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
401 406 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
402 407 goto bail;
403 408 }
404 409 memcpy(p, s, l);
405 410 p += 20;
406 411
407 412 for (pos = 0; PyDict_Next(map, &pos, &k, &v); ) {
408 413 dirstateTupleObject *tuple;
409 414 char state;
410 415 uint32_t mode, size, mtime;
411 416 Py_ssize_t len, l;
412 417 PyObject *o;
413 418 char *t;
414 419
415 420 if (!dirstate_tuple_check(v)) {
416 421 PyErr_SetString(PyExc_TypeError,
417 422 "expected a dirstate tuple");
418 423 goto bail;
419 424 }
420 425 tuple = (dirstateTupleObject *)v;
421 426
422 427 state = tuple->state;
423 428 mode = tuple->mode;
424 429 size = tuple->size;
425 430 mtime = tuple->mtime;
426 431 if (state == 'n' && mtime == (uint32_t)now) {
427 432 /* See pure/parsers.py:pack_dirstate for why we do
428 433 * this. */
429 434 mtime = -1;
430 435 mtime_unset = (PyObject *)make_dirstate_tuple(
431 436 state, mode, size, mtime);
432 437 if (!mtime_unset)
433 438 goto bail;
434 439 if (PyDict_SetItem(map, k, mtime_unset) == -1)
435 440 goto bail;
436 441 Py_DECREF(mtime_unset);
437 442 mtime_unset = NULL;
438 443 }
439 444 *p++ = state;
440 445 putbe32(mode, p);
441 446 putbe32(size, p + 4);
442 447 putbe32(mtime, p + 8);
443 448 t = p + 12;
444 449 p += 16;
445 450 len = PyString_GET_SIZE(k);
446 451 memcpy(p, PyString_AS_STRING(k), len);
447 452 p += len;
448 453 o = PyDict_GetItem(copymap, k);
449 454 if (o) {
450 455 *p++ = '\0';
451 456 l = PyString_GET_SIZE(o);
452 457 memcpy(p, PyString_AS_STRING(o), l);
453 458 p += l;
454 459 len += l + 1;
455 460 }
456 461 putbe32((uint32_t)len, t);
457 462 }
458 463
459 464 pos = p - PyString_AS_STRING(packobj);
460 465 if (pos != nbytes) {
461 466 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
462 467 (long)pos, (long)nbytes);
463 468 goto bail;
464 469 }
465 470
466 471 return packobj;
467 472 bail:
468 473 Py_XDECREF(mtime_unset);
469 474 Py_XDECREF(packobj);
470 475 return NULL;
471 476 }
472 477
473 478 /*
474 479 * A base-16 trie for fast node->rev mapping.
475 480 *
476 481 * Positive value is index of the next node in the trie
477 482 * Negative value is a leaf: -(rev + 1)
478 483 * Zero is empty
479 484 */
480 485 typedef struct {
481 486 int children[16];
482 487 } nodetree;
483 488
484 489 /*
485 490 * This class has two behaviours.
486 491 *
487 492 * When used in a list-like way (with integer keys), we decode an
488 493 * entry in a RevlogNG index file on demand. Our last entry is a
489 494 * sentinel, always a nullid. We have limited support for
490 495 * integer-keyed insert and delete, only at elements right before the
491 496 * sentinel.
492 497 *
493 498 * With string keys, we lazily perform a reverse mapping from node to
494 499 * rev, using a base-16 trie.
495 500 */
496 501 typedef struct {
497 502 PyObject_HEAD
498 503 /* Type-specific fields go here. */
499 504 PyObject *data; /* raw bytes of index */
500 505 PyObject **cache; /* cached tuples */
501 506 const char **offsets; /* populated on demand */
502 507 Py_ssize_t raw_length; /* original number of elements */
503 508 Py_ssize_t length; /* current number of elements */
504 509 PyObject *added; /* populated on demand */
505 510 PyObject *headrevs; /* cache, invalidated on changes */
506 511 nodetree *nt; /* base-16 trie */
507 512 int ntlength; /* # nodes in use */
508 513 int ntcapacity; /* # nodes allocated */
509 514 int ntdepth; /* maximum depth of tree */
510 515 int ntsplits; /* # splits performed */
511 516 int ntrev; /* last rev scanned */
512 517 int ntlookups; /* # lookups */
513 518 int ntmisses; /* # lookups that miss the cache */
514 519 int inlined;
515 520 } indexObject;
516 521
517 522 static Py_ssize_t index_length(const indexObject *self)
518 523 {
519 524 if (self->added == NULL)
520 525 return self->length;
521 526 return self->length + PyList_GET_SIZE(self->added);
522 527 }
523 528
524 529 static PyObject *nullentry;
525 530 static const char nullid[20];
526 531
527 532 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
528 533
529 534 #if LONG_MAX == 0x7fffffffL
530 535 static char *tuple_format = "Kiiiiiis#";
531 536 #else
532 537 static char *tuple_format = "kiiiiiis#";
533 538 #endif
534 539
535 540 /* A RevlogNG v1 index entry is 64 bytes long. */
536 541 static const long v1_hdrsize = 64;
537 542
538 543 /*
539 544 * Return a pointer to the beginning of a RevlogNG record.
540 545 */
541 546 static const char *index_deref(indexObject *self, Py_ssize_t pos)
542 547 {
543 548 if (self->inlined && pos > 0) {
544 549 if (self->offsets == NULL) {
545 550 self->offsets = malloc(self->raw_length *
546 551 sizeof(*self->offsets));
547 552 if (self->offsets == NULL)
548 553 return (const char *)PyErr_NoMemory();
549 554 inline_scan(self, self->offsets);
550 555 }
551 556 return self->offsets[pos];
552 557 }
553 558
554 559 return PyString_AS_STRING(self->data) + pos * v1_hdrsize;
555 560 }
556 561
557 562 /*
558 563 * RevlogNG format (all in big endian, data may be inlined):
559 564 * 6 bytes: offset
560 565 * 2 bytes: flags
561 566 * 4 bytes: compressed length
562 567 * 4 bytes: uncompressed length
563 568 * 4 bytes: base revision
564 569 * 4 bytes: link revision
565 570 * 4 bytes: parent 1 revision
566 571 * 4 bytes: parent 2 revision
567 572 * 32 bytes: nodeid (only 20 bytes used)
568 573 */
569 574 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
570 575 {
571 576 uint64_t offset_flags;
572 577 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
573 578 const char *c_node_id;
574 579 const char *data;
575 580 Py_ssize_t length = index_length(self);
576 581 PyObject *entry;
577 582
578 583 if (pos < 0)
579 584 pos += length;
580 585
581 586 if (pos < 0 || pos >= length) {
582 587 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
583 588 return NULL;
584 589 }
585 590
586 591 if (pos == length - 1) {
587 592 Py_INCREF(nullentry);
588 593 return nullentry;
589 594 }
590 595
591 596 if (pos >= self->length - 1) {
592 597 PyObject *obj;
593 598 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
594 599 Py_INCREF(obj);
595 600 return obj;
596 601 }
597 602
598 603 if (self->cache) {
599 604 if (self->cache[pos]) {
600 605 Py_INCREF(self->cache[pos]);
601 606 return self->cache[pos];
602 607 }
603 608 } else {
604 609 self->cache = calloc(self->raw_length, sizeof(PyObject *));
605 610 if (self->cache == NULL)
606 611 return PyErr_NoMemory();
607 612 }
608 613
609 614 data = index_deref(self, pos);
610 615 if (data == NULL)
611 616 return NULL;
612 617
613 618 offset_flags = getbe32(data + 4);
614 619 if (pos == 0) /* mask out version number for the first entry */
615 620 offset_flags &= 0xFFFF;
616 621 else {
617 622 uint32_t offset_high = getbe32(data);
618 623 offset_flags |= ((uint64_t)offset_high) << 32;
619 624 }
620 625
621 626 comp_len = getbe32(data + 8);
622 627 uncomp_len = getbe32(data + 12);
623 628 base_rev = getbe32(data + 16);
624 629 link_rev = getbe32(data + 20);
625 630 parent_1 = getbe32(data + 24);
626 631 parent_2 = getbe32(data + 28);
627 632 c_node_id = data + 32;
628 633
629 634 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
630 635 uncomp_len, base_rev, link_rev,
631 636 parent_1, parent_2, c_node_id, 20);
632 637
633 638 if (entry) {
634 639 PyObject_GC_UnTrack(entry);
635 640 Py_INCREF(entry);
636 641 }
637 642
638 643 self->cache[pos] = entry;
639 644
640 645 return entry;
641 646 }
642 647
643 648 /*
644 649 * Return the 20-byte SHA of the node corresponding to the given rev.
645 650 */
646 651 static const char *index_node(indexObject *self, Py_ssize_t pos)
647 652 {
648 653 Py_ssize_t length = index_length(self);
649 654 const char *data;
650 655
651 656 if (pos == length - 1 || pos == INT_MAX)
652 657 return nullid;
653 658
654 659 if (pos >= length)
655 660 return NULL;
656 661
657 662 if (pos >= self->length - 1) {
658 663 PyObject *tuple, *str;
659 664 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
660 665 str = PyTuple_GetItem(tuple, 7);
661 666 return str ? PyString_AS_STRING(str) : NULL;
662 667 }
663 668
664 669 data = index_deref(self, pos);
665 670 return data ? data + 32 : NULL;
666 671 }
667 672
668 673 static int nt_insert(indexObject *self, const char *node, int rev);
669 674
670 675 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
671 676 {
672 677 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
673 678 return -1;
674 679 if (*nodelen == 20)
675 680 return 0;
676 681 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
677 682 return -1;
678 683 }
679 684
680 685 static PyObject *index_insert(indexObject *self, PyObject *args)
681 686 {
682 687 PyObject *obj;
683 688 char *node;
684 689 Py_ssize_t offset, len, nodelen;
685 690
686 691 if (!PyArg_ParseTuple(args, "nO", &offset, &obj))
687 692 return NULL;
688 693
689 694 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
690 695 PyErr_SetString(PyExc_TypeError, "8-tuple required");
691 696 return NULL;
692 697 }
693 698
694 699 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
695 700 return NULL;
696 701
697 702 len = index_length(self);
698 703
699 704 if (offset < 0)
700 705 offset += len;
701 706
702 707 if (offset != len - 1) {
703 708 PyErr_SetString(PyExc_IndexError,
704 709 "insert only supported at index -1");
705 710 return NULL;
706 711 }
707 712
708 713 if (offset > INT_MAX) {
709 714 PyErr_SetString(PyExc_ValueError,
710 715 "currently only 2**31 revs supported");
711 716 return NULL;
712 717 }
713 718
714 719 if (self->added == NULL) {
715 720 self->added = PyList_New(0);
716 721 if (self->added == NULL)
717 722 return NULL;
718 723 }
719 724
720 725 if (PyList_Append(self->added, obj) == -1)
721 726 return NULL;
722 727
723 728 if (self->nt)
724 729 nt_insert(self, node, (int)offset);
725 730
726 731 Py_CLEAR(self->headrevs);
727 732 Py_RETURN_NONE;
728 733 }
729 734
730 735 static void _index_clearcaches(indexObject *self)
731 736 {
732 737 if (self->cache) {
733 738 Py_ssize_t i;
734 739
735 740 for (i = 0; i < self->raw_length; i++)
736 741 Py_CLEAR(self->cache[i]);
737 742 free(self->cache);
738 743 self->cache = NULL;
739 744 }
740 745 if (self->offsets) {
741 746 free(self->offsets);
742 747 self->offsets = NULL;
743 748 }
744 749 if (self->nt) {
745 750 free(self->nt);
746 751 self->nt = NULL;
747 752 }
748 753 Py_CLEAR(self->headrevs);
749 754 }
750 755
751 756 static PyObject *index_clearcaches(indexObject *self)
752 757 {
753 758 _index_clearcaches(self);
754 759 self->ntlength = self->ntcapacity = 0;
755 760 self->ntdepth = self->ntsplits = 0;
756 761 self->ntrev = -1;
757 762 self->ntlookups = self->ntmisses = 0;
758 763 Py_RETURN_NONE;
759 764 }
760 765
761 766 static PyObject *index_stats(indexObject *self)
762 767 {
763 768 PyObject *obj = PyDict_New();
764 769
765 770 if (obj == NULL)
766 771 return NULL;
767 772
768 773 #define istat(__n, __d) \
769 774 if (PyDict_SetItemString(obj, __d, PyInt_FromSsize_t(self->__n)) == -1) \
770 775 goto bail;
771 776
772 777 if (self->added) {
773 778 Py_ssize_t len = PyList_GET_SIZE(self->added);
774 779 if (PyDict_SetItemString(obj, "index entries added",
775 780 PyInt_FromSsize_t(len)) == -1)
776 781 goto bail;
777 782 }
778 783
779 784 if (self->raw_length != self->length - 1)
780 785 istat(raw_length, "revs on disk");
781 786 istat(length, "revs in memory");
782 787 istat(ntcapacity, "node trie capacity");
783 788 istat(ntdepth, "node trie depth");
784 789 istat(ntlength, "node trie count");
785 790 istat(ntlookups, "node trie lookups");
786 791 istat(ntmisses, "node trie misses");
787 792 istat(ntrev, "node trie last rev scanned");
788 793 istat(ntsplits, "node trie splits");
789 794
790 795 #undef istat
791 796
792 797 return obj;
793 798
794 799 bail:
795 800 Py_XDECREF(obj);
796 801 return NULL;
797 802 }
798 803
799 804 /*
800 805 * When we cache a list, we want to be sure the caller can't mutate
801 806 * the cached copy.
802 807 */
803 808 static PyObject *list_copy(PyObject *list)
804 809 {
805 810 Py_ssize_t len = PyList_GET_SIZE(list);
806 811 PyObject *newlist = PyList_New(len);
807 812 Py_ssize_t i;
808 813
809 814 if (newlist == NULL)
810 815 return NULL;
811 816
812 817 for (i = 0; i < len; i++) {
813 818 PyObject *obj = PyList_GET_ITEM(list, i);
814 819 Py_INCREF(obj);
815 820 PyList_SET_ITEM(newlist, i, obj);
816 821 }
817 822
818 823 return newlist;
819 824 }
820 825
821 826 static PyObject *index_headrevs(indexObject *self)
822 827 {
823 828 Py_ssize_t i, len, addlen;
824 829 char *nothead = NULL;
825 830 PyObject *heads;
826 831
827 832 if (self->headrevs)
828 833 return list_copy(self->headrevs);
829 834
830 835 len = index_length(self) - 1;
831 836 heads = PyList_New(0);
832 837 if (heads == NULL)
833 838 goto bail;
834 839 if (len == 0) {
835 840 PyObject *nullid = PyInt_FromLong(-1);
836 841 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
837 842 Py_XDECREF(nullid);
838 843 goto bail;
839 844 }
840 845 goto done;
841 846 }
842 847
843 848 nothead = calloc(len, 1);
844 849 if (nothead == NULL)
845 850 goto bail;
846 851
847 852 for (i = 0; i < self->raw_length; i++) {
848 853 const char *data = index_deref(self, i);
849 854 int parent_1 = getbe32(data + 24);
850 855 int parent_2 = getbe32(data + 28);
851 856 if (parent_1 >= 0)
852 857 nothead[parent_1] = 1;
853 858 if (parent_2 >= 0)
854 859 nothead[parent_2] = 1;
855 860 }
856 861
857 862 addlen = self->added ? PyList_GET_SIZE(self->added) : 0;
858 863
859 864 for (i = 0; i < addlen; i++) {
860 865 PyObject *rev = PyList_GET_ITEM(self->added, i);
861 866 PyObject *p1 = PyTuple_GET_ITEM(rev, 5);
862 867 PyObject *p2 = PyTuple_GET_ITEM(rev, 6);
863 868 long parent_1, parent_2;
864 869
865 870 if (!PyInt_Check(p1) || !PyInt_Check(p2)) {
866 871 PyErr_SetString(PyExc_TypeError,
867 872 "revlog parents are invalid");
868 873 goto bail;
869 874 }
870 875 parent_1 = PyInt_AS_LONG(p1);
871 876 parent_2 = PyInt_AS_LONG(p2);
872 877 if (parent_1 >= 0)
873 878 nothead[parent_1] = 1;
874 879 if (parent_2 >= 0)
875 880 nothead[parent_2] = 1;
876 881 }
877 882
878 883 for (i = 0; i < len; i++) {
879 884 PyObject *head;
880 885
881 886 if (nothead[i])
882 887 continue;
883 888 head = PyInt_FromSsize_t(i);
884 889 if (head == NULL || PyList_Append(heads, head) == -1) {
885 890 Py_XDECREF(head);
886 891 goto bail;
887 892 }
888 893 }
889 894
890 895 done:
891 896 self->headrevs = heads;
892 897 free(nothead);
893 898 return list_copy(self->headrevs);
894 899 bail:
895 900 Py_XDECREF(heads);
896 901 free(nothead);
897 902 return NULL;
898 903 }
899 904
900 905 static inline int nt_level(const char *node, Py_ssize_t level)
901 906 {
902 907 int v = node[level>>1];
903 908 if (!(level & 1))
904 909 v >>= 4;
905 910 return v & 0xf;
906 911 }
907 912
908 913 /*
909 914 * Return values:
910 915 *
911 916 * -4: match is ambiguous (multiple candidates)
912 917 * -2: not found
913 918 * rest: valid rev
914 919 */
915 920 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
916 921 int hex)
917 922 {
918 923 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
919 924 int level, maxlevel, off;
920 925
921 926 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
922 927 return -1;
923 928
924 929 if (self->nt == NULL)
925 930 return -2;
926 931
927 932 if (hex)
928 933 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
929 934 else
930 935 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
931 936
932 937 for (level = off = 0; level < maxlevel; level++) {
933 938 int k = getnybble(node, level);
934 939 nodetree *n = &self->nt[off];
935 940 int v = n->children[k];
936 941
937 942 if (v < 0) {
938 943 const char *n;
939 944 Py_ssize_t i;
940 945
941 946 v = -v - 1;
942 947 n = index_node(self, v);
943 948 if (n == NULL)
944 949 return -2;
945 950 for (i = level; i < maxlevel; i++)
946 951 if (getnybble(node, i) != nt_level(n, i))
947 952 return -2;
948 953 return v;
949 954 }
950 955 if (v == 0)
951 956 return -2;
952 957 off = v;
953 958 }
954 959 /* multiple matches against an ambiguous prefix */
955 960 return -4;
956 961 }
957 962
958 963 static int nt_new(indexObject *self)
959 964 {
960 965 if (self->ntlength == self->ntcapacity) {
961 966 self->ntcapacity *= 2;
962 967 self->nt = realloc(self->nt,
963 968 self->ntcapacity * sizeof(nodetree));
964 969 if (self->nt == NULL) {
965 970 PyErr_SetString(PyExc_MemoryError, "out of memory");
966 971 return -1;
967 972 }
968 973 memset(&self->nt[self->ntlength], 0,
969 974 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
970 975 }
971 976 return self->ntlength++;
972 977 }
973 978
974 979 static int nt_insert(indexObject *self, const char *node, int rev)
975 980 {
976 981 int level = 0;
977 982 int off = 0;
978 983
979 984 while (level < 40) {
980 985 int k = nt_level(node, level);
981 986 nodetree *n;
982 987 int v;
983 988
984 989 n = &self->nt[off];
985 990 v = n->children[k];
986 991
987 992 if (v == 0) {
988 993 n->children[k] = -rev - 1;
989 994 return 0;
990 995 }
991 996 if (v < 0) {
992 997 const char *oldnode = index_node(self, -v - 1);
993 998 int noff;
994 999
995 1000 if (!oldnode || !memcmp(oldnode, node, 20)) {
996 1001 n->children[k] = -rev - 1;
997 1002 return 0;
998 1003 }
999 1004 noff = nt_new(self);
1000 1005 if (noff == -1)
1001 1006 return -1;
1002 1007 /* self->nt may have been changed by realloc */
1003 1008 self->nt[off].children[k] = noff;
1004 1009 off = noff;
1005 1010 n = &self->nt[off];
1006 1011 n->children[nt_level(oldnode, ++level)] = v;
1007 1012 if (level > self->ntdepth)
1008 1013 self->ntdepth = level;
1009 1014 self->ntsplits += 1;
1010 1015 } else {
1011 1016 level += 1;
1012 1017 off = v;
1013 1018 }
1014 1019 }
1015 1020
1016 1021 return -1;
1017 1022 }
1018 1023
1019 1024 static int nt_init(indexObject *self)
1020 1025 {
1021 1026 if (self->nt == NULL) {
1022 1027 if (self->raw_length > INT_MAX) {
1023 1028 PyErr_SetString(PyExc_ValueError, "overflow in nt_init");
1024 1029 return -1;
1025 1030 }
1026 1031 self->ntcapacity = self->raw_length < 4
1027 1032 ? 4 : (int)self->raw_length / 2;
1028 1033
1029 1034 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
1030 1035 if (self->nt == NULL) {
1031 1036 PyErr_NoMemory();
1032 1037 return -1;
1033 1038 }
1034 1039 self->ntlength = 1;
1035 1040 self->ntrev = (int)index_length(self) - 1;
1036 1041 self->ntlookups = 1;
1037 1042 self->ntmisses = 0;
1038 1043 if (nt_insert(self, nullid, INT_MAX) == -1)
1039 1044 return -1;
1040 1045 }
1041 1046 return 0;
1042 1047 }
1043 1048
1044 1049 /*
1045 1050 * Return values:
1046 1051 *
1047 1052 * -3: error (exception set)
1048 1053 * -2: not found (no exception set)
1049 1054 * rest: valid rev
1050 1055 */
1051 1056 static int index_find_node(indexObject *self,
1052 1057 const char *node, Py_ssize_t nodelen)
1053 1058 {
1054 1059 int rev;
1055 1060
1056 1061 self->ntlookups++;
1057 1062 rev = nt_find(self, node, nodelen, 0);
1058 1063 if (rev >= -1)
1059 1064 return rev;
1060 1065
1061 1066 if (nt_init(self) == -1)
1062 1067 return -3;
1063 1068
1064 1069 /*
1065 1070 * For the first handful of lookups, we scan the entire index,
1066 1071 * and cache only the matching nodes. This optimizes for cases
1067 1072 * like "hg tip", where only a few nodes are accessed.
1068 1073 *
1069 1074 * After that, we cache every node we visit, using a single
1070 1075 * scan amortized over multiple lookups. This gives the best
1071 1076 * bulk performance, e.g. for "hg log".
1072 1077 */
1073 1078 if (self->ntmisses++ < 4) {
1074 1079 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1075 1080 const char *n = index_node(self, rev);
1076 1081 if (n == NULL)
1077 1082 return -2;
1078 1083 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1079 1084 if (nt_insert(self, n, rev) == -1)
1080 1085 return -3;
1081 1086 break;
1082 1087 }
1083 1088 }
1084 1089 } else {
1085 1090 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1086 1091 const char *n = index_node(self, rev);
1087 1092 if (n == NULL) {
1088 1093 self->ntrev = rev + 1;
1089 1094 return -2;
1090 1095 }
1091 1096 if (nt_insert(self, n, rev) == -1) {
1092 1097 self->ntrev = rev + 1;
1093 1098 return -3;
1094 1099 }
1095 1100 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1096 1101 break;
1097 1102 }
1098 1103 }
1099 1104 self->ntrev = rev;
1100 1105 }
1101 1106
1102 1107 if (rev >= 0)
1103 1108 return rev;
1104 1109 return -2;
1105 1110 }
1106 1111
1107 1112 static PyObject *raise_revlog_error(void)
1108 1113 {
1109 1114 static PyObject *errclass;
1110 1115 PyObject *mod = NULL, *errobj;
1111 1116
1112 1117 if (errclass == NULL) {
1113 1118 PyObject *dict;
1114 1119
1115 1120 mod = PyImport_ImportModule("mercurial.error");
1116 1121 if (mod == NULL)
1117 1122 goto classfail;
1118 1123
1119 1124 dict = PyModule_GetDict(mod);
1120 1125 if (dict == NULL)
1121 1126 goto classfail;
1122 1127
1123 1128 errclass = PyDict_GetItemString(dict, "RevlogError");
1124 1129 if (errclass == NULL) {
1125 1130 PyErr_SetString(PyExc_SystemError,
1126 1131 "could not find RevlogError");
1127 1132 goto classfail;
1128 1133 }
1129 1134 Py_INCREF(errclass);
1130 1135 }
1131 1136
1132 1137 errobj = PyObject_CallFunction(errclass, NULL);
1133 1138 if (errobj == NULL)
1134 1139 return NULL;
1135 1140 PyErr_SetObject(errclass, errobj);
1136 1141 return errobj;
1137 1142
1138 1143 classfail:
1139 1144 Py_XDECREF(mod);
1140 1145 return NULL;
1141 1146 }
1142 1147
1143 1148 static PyObject *index_getitem(indexObject *self, PyObject *value)
1144 1149 {
1145 1150 char *node;
1146 1151 Py_ssize_t nodelen;
1147 1152 int rev;
1148 1153
1149 1154 if (PyInt_Check(value))
1150 1155 return index_get(self, PyInt_AS_LONG(value));
1151 1156
1152 1157 if (node_check(value, &node, &nodelen) == -1)
1153 1158 return NULL;
1154 1159 rev = index_find_node(self, node, nodelen);
1155 1160 if (rev >= -1)
1156 1161 return PyInt_FromLong(rev);
1157 1162 if (rev == -2)
1158 1163 raise_revlog_error();
1159 1164 return NULL;
1160 1165 }
1161 1166
1162 1167 static int nt_partialmatch(indexObject *self, const char *node,
1163 1168 Py_ssize_t nodelen)
1164 1169 {
1165 1170 int rev;
1166 1171
1167 1172 if (nt_init(self) == -1)
1168 1173 return -3;
1169 1174
1170 1175 if (self->ntrev > 0) {
1171 1176 /* ensure that the radix tree is fully populated */
1172 1177 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1173 1178 const char *n = index_node(self, rev);
1174 1179 if (n == NULL)
1175 1180 return -2;
1176 1181 if (nt_insert(self, n, rev) == -1)
1177 1182 return -3;
1178 1183 }
1179 1184 self->ntrev = rev;
1180 1185 }
1181 1186
1182 1187 return nt_find(self, node, nodelen, 1);
1183 1188 }
1184 1189
1185 1190 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1186 1191 {
1187 1192 const char *fullnode;
1188 1193 int nodelen;
1189 1194 char *node;
1190 1195 int rev, i;
1191 1196
1192 1197 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
1193 1198 return NULL;
1194 1199
1195 1200 if (nodelen < 4) {
1196 1201 PyErr_SetString(PyExc_ValueError, "key too short");
1197 1202 return NULL;
1198 1203 }
1199 1204
1200 1205 if (nodelen > 40) {
1201 1206 PyErr_SetString(PyExc_ValueError, "key too long");
1202 1207 return NULL;
1203 1208 }
1204 1209
1205 1210 for (i = 0; i < nodelen; i++)
1206 1211 hexdigit(node, i);
1207 1212 if (PyErr_Occurred()) {
1208 1213 /* input contains non-hex characters */
1209 1214 PyErr_Clear();
1210 1215 Py_RETURN_NONE;
1211 1216 }
1212 1217
1213 1218 rev = nt_partialmatch(self, node, nodelen);
1214 1219
1215 1220 switch (rev) {
1216 1221 case -4:
1217 1222 raise_revlog_error();
1218 1223 case -3:
1219 1224 return NULL;
1220 1225 case -2:
1221 1226 Py_RETURN_NONE;
1222 1227 case -1:
1223 1228 return PyString_FromStringAndSize(nullid, 20);
1224 1229 }
1225 1230
1226 1231 fullnode = index_node(self, rev);
1227 1232 if (fullnode == NULL) {
1228 1233 PyErr_Format(PyExc_IndexError,
1229 1234 "could not access rev %d", rev);
1230 1235 return NULL;
1231 1236 }
1232 1237 return PyString_FromStringAndSize(fullnode, 20);
1233 1238 }
1234 1239
1235 1240 static PyObject *index_m_get(indexObject *self, PyObject *args)
1236 1241 {
1237 1242 Py_ssize_t nodelen;
1238 1243 PyObject *val;
1239 1244 char *node;
1240 1245 int rev;
1241 1246
1242 1247 if (!PyArg_ParseTuple(args, "O", &val))
1243 1248 return NULL;
1244 1249 if (node_check(val, &node, &nodelen) == -1)
1245 1250 return NULL;
1246 1251 rev = index_find_node(self, node, nodelen);
1247 1252 if (rev == -3)
1248 1253 return NULL;
1249 1254 if (rev == -2)
1250 1255 Py_RETURN_NONE;
1251 1256 return PyInt_FromLong(rev);
1252 1257 }
1253 1258
1254 1259 static int index_contains(indexObject *self, PyObject *value)
1255 1260 {
1256 1261 char *node;
1257 1262 Py_ssize_t nodelen;
1258 1263
1259 1264 if (PyInt_Check(value)) {
1260 1265 long rev = PyInt_AS_LONG(value);
1261 1266 return rev >= -1 && rev < index_length(self);
1262 1267 }
1263 1268
1264 1269 if (node_check(value, &node, &nodelen) == -1)
1265 1270 return -1;
1266 1271
1267 1272 switch (index_find_node(self, node, nodelen)) {
1268 1273 case -3:
1269 1274 return -1;
1270 1275 case -2:
1271 1276 return 0;
1272 1277 default:
1273 1278 return 1;
1274 1279 }
1275 1280 }
1276 1281
1277 1282 static inline void index_get_parents(indexObject *self, int rev, int *ps)
1278 1283 {
1279 1284 if (rev >= self->length - 1) {
1280 1285 PyObject *tuple = PyList_GET_ITEM(self->added,
1281 1286 rev - self->length + 1);
1282 1287 ps[0] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 5));
1283 1288 ps[1] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 6));
1284 1289 } else {
1285 1290 const char *data = index_deref(self, rev);
1286 1291 ps[0] = getbe32(data + 24);
1287 1292 ps[1] = getbe32(data + 28);
1288 1293 }
1289 1294 }
1290 1295
1291 1296 typedef uint64_t bitmask;
1292 1297
1293 1298 /*
1294 1299 * Given a disjoint set of revs, return all candidates for the
1295 1300 * greatest common ancestor. In revset notation, this is the set
1296 1301 * "heads(::a and ::b and ...)"
1297 1302 */
1298 1303 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
1299 1304 int revcount)
1300 1305 {
1301 1306 const bitmask allseen = (1ull << revcount) - 1;
1302 1307 const bitmask poison = 1ull << revcount;
1303 1308 PyObject *gca = PyList_New(0);
1304 1309 int i, v, interesting;
1305 1310 int maxrev = -1;
1306 1311 bitmask sp;
1307 1312 bitmask *seen;
1308 1313
1309 1314 if (gca == NULL)
1310 1315 return PyErr_NoMemory();
1311 1316
1312 1317 for (i = 0; i < revcount; i++) {
1313 1318 if (revs[i] > maxrev)
1314 1319 maxrev = revs[i];
1315 1320 }
1316 1321
1317 1322 seen = calloc(sizeof(*seen), maxrev + 1);
1318 1323 if (seen == NULL) {
1319 1324 Py_DECREF(gca);
1320 1325 return PyErr_NoMemory();
1321 1326 }
1322 1327
1323 1328 for (i = 0; i < revcount; i++)
1324 1329 seen[revs[i]] = 1ull << i;
1325 1330
1326 1331 interesting = revcount;
1327 1332
1328 1333 for (v = maxrev; v >= 0 && interesting; v--) {
1329 1334 bitmask sv = seen[v];
1330 1335 int parents[2];
1331 1336
1332 1337 if (!sv)
1333 1338 continue;
1334 1339
1335 1340 if (sv < poison) {
1336 1341 interesting -= 1;
1337 1342 if (sv == allseen) {
1338 1343 PyObject *obj = PyInt_FromLong(v);
1339 1344 if (obj == NULL)
1340 1345 goto bail;
1341 1346 if (PyList_Append(gca, obj) == -1) {
1342 1347 Py_DECREF(obj);
1343 1348 goto bail;
1344 1349 }
1345 1350 sv |= poison;
1346 1351 for (i = 0; i < revcount; i++) {
1347 1352 if (revs[i] == v)
1348 1353 goto done;
1349 1354 }
1350 1355 }
1351 1356 }
1352 1357 index_get_parents(self, v, parents);
1353 1358
1354 1359 for (i = 0; i < 2; i++) {
1355 1360 int p = parents[i];
1356 1361 if (p == -1)
1357 1362 continue;
1358 1363 sp = seen[p];
1359 1364 if (sv < poison) {
1360 1365 if (sp == 0) {
1361 1366 seen[p] = sv;
1362 1367 interesting++;
1363 1368 }
1364 1369 else if (sp != sv)
1365 1370 seen[p] |= sv;
1366 1371 } else {
1367 1372 if (sp && sp < poison)
1368 1373 interesting--;
1369 1374 seen[p] = sv;
1370 1375 }
1371 1376 }
1372 1377 }
1373 1378
1374 1379 done:
1375 1380 free(seen);
1376 1381 return gca;
1377 1382 bail:
1378 1383 free(seen);
1379 1384 Py_XDECREF(gca);
1380 1385 return NULL;
1381 1386 }
1382 1387
1383 1388 /*
1384 1389 * Given a disjoint set of revs, return the subset with the longest
1385 1390 * path to the root.
1386 1391 */
1387 1392 static PyObject *find_deepest(indexObject *self, PyObject *revs)
1388 1393 {
1389 1394 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
1390 1395 static const Py_ssize_t capacity = 24;
1391 1396 int *depth, *interesting = NULL;
1392 1397 int i, j, v, ninteresting;
1393 1398 PyObject *dict = NULL, *keys = NULL;
1394 1399 long *seen = NULL;
1395 1400 int maxrev = -1;
1396 1401 long final;
1397 1402
1398 1403 if (revcount > capacity) {
1399 1404 PyErr_Format(PyExc_OverflowError,
1400 1405 "bitset size (%ld) > capacity (%ld)",
1401 1406 (long)revcount, (long)capacity);
1402 1407 return NULL;
1403 1408 }
1404 1409
1405 1410 for (i = 0; i < revcount; i++) {
1406 1411 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1407 1412 if (n > maxrev)
1408 1413 maxrev = n;
1409 1414 }
1410 1415
1411 1416 depth = calloc(sizeof(*depth), maxrev + 1);
1412 1417 if (depth == NULL)
1413 1418 return PyErr_NoMemory();
1414 1419
1415 1420 seen = calloc(sizeof(*seen), maxrev + 1);
1416 1421 if (seen == NULL) {
1417 1422 PyErr_NoMemory();
1418 1423 goto bail;
1419 1424 }
1420 1425
1421 1426 interesting = calloc(sizeof(*interesting), 2 << revcount);
1422 1427 if (interesting == NULL) {
1423 1428 PyErr_NoMemory();
1424 1429 goto bail;
1425 1430 }
1426 1431
1427 1432 if (PyList_Sort(revs) == -1)
1428 1433 goto bail;
1429 1434
1430 1435 for (i = 0; i < revcount; i++) {
1431 1436 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1432 1437 long b = 1l << i;
1433 1438 depth[n] = 1;
1434 1439 seen[n] = b;
1435 1440 interesting[b] = 1;
1436 1441 }
1437 1442
1438 1443 ninteresting = (int)revcount;
1439 1444
1440 1445 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
1441 1446 int dv = depth[v];
1442 1447 int parents[2];
1443 1448 long sv;
1444 1449
1445 1450 if (dv == 0)
1446 1451 continue;
1447 1452
1448 1453 sv = seen[v];
1449 1454 index_get_parents(self, v, parents);
1450 1455
1451 1456 for (i = 0; i < 2; i++) {
1452 1457 int p = parents[i];
1453 1458 long nsp, sp;
1454 1459 int dp;
1455 1460
1456 1461 if (p == -1)
1457 1462 continue;
1458 1463
1459 1464 dp = depth[p];
1460 1465 nsp = sp = seen[p];
1461 1466 if (dp <= dv) {
1462 1467 depth[p] = dv + 1;
1463 1468 if (sp != sv) {
1464 1469 interesting[sv] += 1;
1465 1470 nsp = seen[p] = sv;
1466 1471 if (sp) {
1467 1472 interesting[sp] -= 1;
1468 1473 if (interesting[sp] == 0)
1469 1474 ninteresting -= 1;
1470 1475 }
1471 1476 }
1472 1477 }
1473 1478 else if (dv == dp - 1) {
1474 1479 nsp = sp | sv;
1475 1480 if (nsp == sp)
1476 1481 continue;
1477 1482 seen[p] = nsp;
1478 1483 interesting[sp] -= 1;
1479 1484 if (interesting[sp] == 0 && interesting[nsp] > 0)
1480 1485 ninteresting -= 1;
1481 1486 interesting[nsp] += 1;
1482 1487 }
1483 1488 }
1484 1489 interesting[sv] -= 1;
1485 1490 if (interesting[sv] == 0)
1486 1491 ninteresting -= 1;
1487 1492 }
1488 1493
1489 1494 final = 0;
1490 1495 j = ninteresting;
1491 1496 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
1492 1497 if (interesting[i] == 0)
1493 1498 continue;
1494 1499 final |= i;
1495 1500 j -= 1;
1496 1501 }
1497 1502 if (final == 0) {
1498 1503 keys = PyList_New(0);
1499 1504 goto bail;
1500 1505 }
1501 1506
1502 1507 dict = PyDict_New();
1503 1508 if (dict == NULL)
1504 1509 goto bail;
1505 1510
1506 1511 for (i = 0; i < revcount; i++) {
1507 1512 PyObject *key;
1508 1513
1509 1514 if ((final & (1 << i)) == 0)
1510 1515 continue;
1511 1516
1512 1517 key = PyList_GET_ITEM(revs, i);
1513 1518 Py_INCREF(key);
1514 1519 Py_INCREF(Py_None);
1515 1520 if (PyDict_SetItem(dict, key, Py_None) == -1) {
1516 1521 Py_DECREF(key);
1517 1522 Py_DECREF(Py_None);
1518 1523 goto bail;
1519 1524 }
1520 1525 }
1521 1526
1522 1527 keys = PyDict_Keys(dict);
1523 1528
1524 1529 bail:
1525 1530 free(depth);
1526 1531 free(seen);
1527 1532 free(interesting);
1528 1533 Py_XDECREF(dict);
1529 1534
1530 1535 return keys;
1531 1536 }
1532 1537
1533 1538 /*
1534 1539 * Given a (possibly overlapping) set of revs, return the greatest
1535 1540 * common ancestors: those with the longest path to the root.
1536 1541 */
1537 1542 static PyObject *index_ancestors(indexObject *self, PyObject *args)
1538 1543 {
1539 1544 PyObject *ret = NULL, *gca = NULL;
1540 1545 Py_ssize_t argcount, i, len;
1541 1546 bitmask repeat = 0;
1542 1547 int revcount = 0;
1543 1548 int *revs;
1544 1549
1545 1550 argcount = PySequence_Length(args);
1546 1551 revs = malloc(argcount * sizeof(*revs));
1547 1552 if (argcount > 0 && revs == NULL)
1548 1553 return PyErr_NoMemory();
1549 1554 len = index_length(self) - 1;
1550 1555
1551 1556 for (i = 0; i < argcount; i++) {
1552 1557 static const int capacity = 24;
1553 1558 PyObject *obj = PySequence_GetItem(args, i);
1554 1559 bitmask x;
1555 1560 long val;
1556 1561
1557 1562 if (!PyInt_Check(obj)) {
1558 1563 PyErr_SetString(PyExc_TypeError,
1559 1564 "arguments must all be ints");
1560 1565 goto bail;
1561 1566 }
1562 1567 val = PyInt_AsLong(obj);
1563 1568 if (val == -1) {
1564 1569 ret = PyList_New(0);
1565 1570 goto done;
1566 1571 }
1567 1572 if (val < 0 || val >= len) {
1568 1573 PyErr_SetString(PyExc_IndexError,
1569 1574 "index out of range");
1570 1575 goto bail;
1571 1576 }
1572 1577 /* this cheesy bloom filter lets us avoid some more
1573 1578 * expensive duplicate checks in the common set-is-disjoint
1574 1579 * case */
1575 1580 x = 1ull << (val & 0x3f);
1576 1581 if (repeat & x) {
1577 1582 int k;
1578 1583 for (k = 0; k < revcount; k++) {
1579 1584 if (val == revs[k])
1580 1585 goto duplicate;
1581 1586 }
1582 1587 }
1583 1588 else repeat |= x;
1584 1589 if (revcount >= capacity) {
1585 1590 PyErr_Format(PyExc_OverflowError,
1586 1591 "bitset size (%d) > capacity (%d)",
1587 1592 revcount, capacity);
1588 1593 goto bail;
1589 1594 }
1590 1595 revs[revcount++] = (int)val;
1591 1596 duplicate:;
1592 1597 }
1593 1598
1594 1599 if (revcount == 0) {
1595 1600 ret = PyList_New(0);
1596 1601 goto done;
1597 1602 }
1598 1603 if (revcount == 1) {
1599 1604 PyObject *obj;
1600 1605 ret = PyList_New(1);
1601 1606 if (ret == NULL)
1602 1607 goto bail;
1603 1608 obj = PyInt_FromLong(revs[0]);
1604 1609 if (obj == NULL)
1605 1610 goto bail;
1606 1611 PyList_SET_ITEM(ret, 0, obj);
1607 1612 goto done;
1608 1613 }
1609 1614
1610 1615 gca = find_gca_candidates(self, revs, revcount);
1611 1616 if (gca == NULL)
1612 1617 goto bail;
1613 1618
1614 1619 if (PyList_GET_SIZE(gca) <= 1) {
1615 1620 ret = gca;
1616 1621 Py_INCREF(gca);
1617 1622 }
1618 1623 else ret = find_deepest(self, gca);
1619 1624
1620 1625 done:
1621 1626 free(revs);
1622 1627 Py_XDECREF(gca);
1623 1628
1624 1629 return ret;
1625 1630
1626 1631 bail:
1627 1632 free(revs);
1628 1633 Py_XDECREF(gca);
1629 1634 Py_XDECREF(ret);
1630 1635 return NULL;
1631 1636 }
1632 1637
1633 1638 /*
1634 1639 * Given a (possibly overlapping) set of revs, return all the
1635 1640 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
1636 1641 */
1637 1642 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
1638 1643 {
1639 1644 PyObject *ret = NULL;
1640 1645 Py_ssize_t argcount, i, len;
1641 1646 bitmask repeat = 0;
1642 1647 int revcount = 0;
1643 1648 int *revs;
1644 1649
1645 1650 argcount = PySequence_Length(args);
1646 1651 revs = malloc(argcount * sizeof(*revs));
1647 1652 if (argcount > 0 && revs == NULL)
1648 1653 return PyErr_NoMemory();
1649 1654 len = index_length(self) - 1;
1650 1655
1651 1656 for (i = 0; i < argcount; i++) {
1652 1657 static const int capacity = 24;
1653 1658 PyObject *obj = PySequence_GetItem(args, i);
1654 1659 bitmask x;
1655 1660 long val;
1656 1661
1657 1662 if (!PyInt_Check(obj)) {
1658 1663 PyErr_SetString(PyExc_TypeError,
1659 1664 "arguments must all be ints");
1660 1665 goto bail;
1661 1666 }
1662 1667 val = PyInt_AsLong(obj);
1663 1668 if (val == -1) {
1664 1669 ret = PyList_New(0);
1665 1670 goto done;
1666 1671 }
1667 1672 if (val < 0 || val >= len) {
1668 1673 PyErr_SetString(PyExc_IndexError,
1669 1674 "index out of range");
1670 1675 goto bail;
1671 1676 }
1672 1677 /* this cheesy bloom filter lets us avoid some more
1673 1678 * expensive duplicate checks in the common set-is-disjoint
1674 1679 * case */
1675 1680 x = 1ull << (val & 0x3f);
1676 1681 if (repeat & x) {
1677 1682 int k;
1678 1683 for (k = 0; k < revcount; k++) {
1679 1684 if (val == revs[k])
1680 1685 goto duplicate;
1681 1686 }
1682 1687 }
1683 1688 else repeat |= x;
1684 1689 if (revcount >= capacity) {
1685 1690 PyErr_Format(PyExc_OverflowError,
1686 1691 "bitset size (%d) > capacity (%d)",
1687 1692 revcount, capacity);
1688 1693 goto bail;
1689 1694 }
1690 1695 revs[revcount++] = (int)val;
1691 1696 duplicate:;
1692 1697 }
1693 1698
1694 1699 if (revcount == 0) {
1695 1700 ret = PyList_New(0);
1696 1701 goto done;
1697 1702 }
1698 1703 if (revcount == 1) {
1699 1704 PyObject *obj;
1700 1705 ret = PyList_New(1);
1701 1706 if (ret == NULL)
1702 1707 goto bail;
1703 1708 obj = PyInt_FromLong(revs[0]);
1704 1709 if (obj == NULL)
1705 1710 goto bail;
1706 1711 PyList_SET_ITEM(ret, 0, obj);
1707 1712 goto done;
1708 1713 }
1709 1714
1710 1715 ret = find_gca_candidates(self, revs, revcount);
1711 1716 if (ret == NULL)
1712 1717 goto bail;
1713 1718
1714 1719 done:
1715 1720 free(revs);
1716 1721 return ret;
1717 1722
1718 1723 bail:
1719 1724 free(revs);
1720 1725 Py_XDECREF(ret);
1721 1726 return NULL;
1722 1727 }
1723 1728
1724 1729 /*
1725 1730 * Invalidate any trie entries introduced by added revs.
1726 1731 */
1727 1732 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
1728 1733 {
1729 1734 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
1730 1735
1731 1736 for (i = start; i < len; i++) {
1732 1737 PyObject *tuple = PyList_GET_ITEM(self->added, i);
1733 1738 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
1734 1739
1735 1740 nt_insert(self, PyString_AS_STRING(node), -1);
1736 1741 }
1737 1742
1738 1743 if (start == 0)
1739 1744 Py_CLEAR(self->added);
1740 1745 }
1741 1746
1742 1747 /*
1743 1748 * Delete a numeric range of revs, which must be at the end of the
1744 1749 * range, but exclude the sentinel nullid entry.
1745 1750 */
1746 1751 static int index_slice_del(indexObject *self, PyObject *item)
1747 1752 {
1748 1753 Py_ssize_t start, stop, step, slicelength;
1749 1754 Py_ssize_t length = index_length(self);
1750 1755 int ret = 0;
1751 1756
1752 1757 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
1753 1758 &start, &stop, &step, &slicelength) < 0)
1754 1759 return -1;
1755 1760
1756 1761 if (slicelength <= 0)
1757 1762 return 0;
1758 1763
1759 1764 if ((step < 0 && start < stop) || (step > 0 && start > stop))
1760 1765 stop = start;
1761 1766
1762 1767 if (step < 0) {
1763 1768 stop = start + 1;
1764 1769 start = stop + step*(slicelength - 1) - 1;
1765 1770 step = -step;
1766 1771 }
1767 1772
1768 1773 if (step != 1) {
1769 1774 PyErr_SetString(PyExc_ValueError,
1770 1775 "revlog index delete requires step size of 1");
1771 1776 return -1;
1772 1777 }
1773 1778
1774 1779 if (stop != length - 1) {
1775 1780 PyErr_SetString(PyExc_IndexError,
1776 1781 "revlog index deletion indices are invalid");
1777 1782 return -1;
1778 1783 }
1779 1784
1780 1785 if (start < self->length - 1) {
1781 1786 if (self->nt) {
1782 1787 Py_ssize_t i;
1783 1788
1784 1789 for (i = start + 1; i < self->length - 1; i++) {
1785 1790 const char *node = index_node(self, i);
1786 1791
1787 1792 if (node)
1788 1793 nt_insert(self, node, -1);
1789 1794 }
1790 1795 if (self->added)
1791 1796 nt_invalidate_added(self, 0);
1792 1797 if (self->ntrev > start)
1793 1798 self->ntrev = (int)start;
1794 1799 }
1795 1800 self->length = start + 1;
1796 1801 if (start < self->raw_length) {
1797 1802 if (self->cache) {
1798 1803 Py_ssize_t i;
1799 1804 for (i = start; i < self->raw_length; i++)
1800 1805 Py_CLEAR(self->cache[i]);
1801 1806 }
1802 1807 self->raw_length = start;
1803 1808 }
1804 1809 goto done;
1805 1810 }
1806 1811
1807 1812 if (self->nt) {
1808 1813 nt_invalidate_added(self, start - self->length + 1);
1809 1814 if (self->ntrev > start)
1810 1815 self->ntrev = (int)start;
1811 1816 }
1812 1817 if (self->added)
1813 1818 ret = PyList_SetSlice(self->added, start - self->length + 1,
1814 1819 PyList_GET_SIZE(self->added), NULL);
1815 1820 done:
1816 1821 Py_CLEAR(self->headrevs);
1817 1822 return ret;
1818 1823 }
1819 1824
1820 1825 /*
1821 1826 * Supported ops:
1822 1827 *
1823 1828 * slice deletion
1824 1829 * string assignment (extend node->rev mapping)
1825 1830 * string deletion (shrink node->rev mapping)
1826 1831 */
1827 1832 static int index_assign_subscript(indexObject *self, PyObject *item,
1828 1833 PyObject *value)
1829 1834 {
1830 1835 char *node;
1831 1836 Py_ssize_t nodelen;
1832 1837 long rev;
1833 1838
1834 1839 if (PySlice_Check(item) && value == NULL)
1835 1840 return index_slice_del(self, item);
1836 1841
1837 1842 if (node_check(item, &node, &nodelen) == -1)
1838 1843 return -1;
1839 1844
1840 1845 if (value == NULL)
1841 1846 return self->nt ? nt_insert(self, node, -1) : 0;
1842 1847 rev = PyInt_AsLong(value);
1843 1848 if (rev > INT_MAX || rev < 0) {
1844 1849 if (!PyErr_Occurred())
1845 1850 PyErr_SetString(PyExc_ValueError, "rev out of range");
1846 1851 return -1;
1847 1852 }
1848 1853 return nt_insert(self, node, (int)rev);
1849 1854 }
1850 1855
1851 1856 /*
1852 1857 * Find all RevlogNG entries in an index that has inline data. Update
1853 1858 * the optional "offsets" table with those entries.
1854 1859 */
1855 1860 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
1856 1861 {
1857 1862 const char *data = PyString_AS_STRING(self->data);
1858 1863 Py_ssize_t pos = 0;
1859 1864 Py_ssize_t end = PyString_GET_SIZE(self->data);
1860 1865 long incr = v1_hdrsize;
1861 1866 Py_ssize_t len = 0;
1862 1867
1863 1868 while (pos + v1_hdrsize <= end && pos >= 0) {
1864 1869 uint32_t comp_len;
1865 1870 /* 3rd element of header is length of compressed inline data */
1866 1871 comp_len = getbe32(data + pos + 8);
1867 1872 incr = v1_hdrsize + comp_len;
1868 1873 if (offsets)
1869 1874 offsets[len] = data + pos;
1870 1875 len++;
1871 1876 pos += incr;
1872 1877 }
1873 1878
1874 1879 if (pos != end) {
1875 1880 if (!PyErr_Occurred())
1876 1881 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1877 1882 return -1;
1878 1883 }
1879 1884
1880 1885 return len;
1881 1886 }
1882 1887
1883 1888 static int index_init(indexObject *self, PyObject *args)
1884 1889 {
1885 1890 PyObject *data_obj, *inlined_obj;
1886 1891 Py_ssize_t size;
1887 1892
1888 1893 /* Initialize before argument-checking to avoid index_dealloc() crash. */
1889 1894 self->raw_length = 0;
1890 1895 self->added = NULL;
1891 1896 self->cache = NULL;
1892 1897 self->data = NULL;
1893 1898 self->headrevs = NULL;
1894 1899 self->nt = NULL;
1895 1900 self->offsets = NULL;
1896 1901
1897 1902 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
1898 1903 return -1;
1899 1904 if (!PyString_Check(data_obj)) {
1900 1905 PyErr_SetString(PyExc_TypeError, "data is not a string");
1901 1906 return -1;
1902 1907 }
1903 1908 size = PyString_GET_SIZE(data_obj);
1904 1909
1905 1910 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
1906 1911 self->data = data_obj;
1907 1912
1908 1913 self->ntlength = self->ntcapacity = 0;
1909 1914 self->ntdepth = self->ntsplits = 0;
1910 1915 self->ntlookups = self->ntmisses = 0;
1911 1916 self->ntrev = -1;
1912 1917 Py_INCREF(self->data);
1913 1918
1914 1919 if (self->inlined) {
1915 1920 Py_ssize_t len = inline_scan(self, NULL);
1916 1921 if (len == -1)
1917 1922 goto bail;
1918 1923 self->raw_length = len;
1919 1924 self->length = len + 1;
1920 1925 } else {
1921 1926 if (size % v1_hdrsize) {
1922 1927 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1923 1928 goto bail;
1924 1929 }
1925 1930 self->raw_length = size / v1_hdrsize;
1926 1931 self->length = self->raw_length + 1;
1927 1932 }
1928 1933
1929 1934 return 0;
1930 1935 bail:
1931 1936 return -1;
1932 1937 }
1933 1938
1934 1939 static PyObject *index_nodemap(indexObject *self)
1935 1940 {
1936 1941 Py_INCREF(self);
1937 1942 return (PyObject *)self;
1938 1943 }
1939 1944
1940 1945 static void index_dealloc(indexObject *self)
1941 1946 {
1942 1947 _index_clearcaches(self);
1943 1948 Py_XDECREF(self->data);
1944 1949 Py_XDECREF(self->added);
1945 1950 PyObject_Del(self);
1946 1951 }
1947 1952
1948 1953 static PySequenceMethods index_sequence_methods = {
1949 1954 (lenfunc)index_length, /* sq_length */
1950 1955 0, /* sq_concat */
1951 1956 0, /* sq_repeat */
1952 1957 (ssizeargfunc)index_get, /* sq_item */
1953 1958 0, /* sq_slice */
1954 1959 0, /* sq_ass_item */
1955 1960 0, /* sq_ass_slice */
1956 1961 (objobjproc)index_contains, /* sq_contains */
1957 1962 };
1958 1963
1959 1964 static PyMappingMethods index_mapping_methods = {
1960 1965 (lenfunc)index_length, /* mp_length */
1961 1966 (binaryfunc)index_getitem, /* mp_subscript */
1962 1967 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
1963 1968 };
1964 1969
1965 1970 static PyMethodDef index_methods[] = {
1966 1971 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
1967 1972 "return the gca set of the given revs"},
1968 1973 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
1969 1974 METH_VARARGS,
1970 1975 "return the heads of the common ancestors of the given revs"},
1971 1976 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
1972 1977 "clear the index caches"},
1973 1978 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1974 1979 "get an index entry"},
1975 1980 {"headrevs", (PyCFunction)index_headrevs, METH_NOARGS,
1976 1981 "get head revisions"},
1977 1982 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1978 1983 "insert an index entry"},
1979 1984 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
1980 1985 "match a potentially ambiguous node ID"},
1981 1986 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1982 1987 "stats for the index"},
1983 1988 {NULL} /* Sentinel */
1984 1989 };
1985 1990
1986 1991 static PyGetSetDef index_getset[] = {
1987 1992 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
1988 1993 {NULL} /* Sentinel */
1989 1994 };
1990 1995
1991 1996 static PyTypeObject indexType = {
1992 1997 PyObject_HEAD_INIT(NULL)
1993 1998 0, /* ob_size */
1994 1999 "parsers.index", /* tp_name */
1995 2000 sizeof(indexObject), /* tp_basicsize */
1996 2001 0, /* tp_itemsize */
1997 2002 (destructor)index_dealloc, /* tp_dealloc */
1998 2003 0, /* tp_print */
1999 2004 0, /* tp_getattr */
2000 2005 0, /* tp_setattr */
2001 2006 0, /* tp_compare */
2002 2007 0, /* tp_repr */
2003 2008 0, /* tp_as_number */
2004 2009 &index_sequence_methods, /* tp_as_sequence */
2005 2010 &index_mapping_methods, /* tp_as_mapping */
2006 2011 0, /* tp_hash */
2007 2012 0, /* tp_call */
2008 2013 0, /* tp_str */
2009 2014 0, /* tp_getattro */
2010 2015 0, /* tp_setattro */
2011 2016 0, /* tp_as_buffer */
2012 2017 Py_TPFLAGS_DEFAULT, /* tp_flags */
2013 2018 "revlog index", /* tp_doc */
2014 2019 0, /* tp_traverse */
2015 2020 0, /* tp_clear */
2016 2021 0, /* tp_richcompare */
2017 2022 0, /* tp_weaklistoffset */
2018 2023 0, /* tp_iter */
2019 2024 0, /* tp_iternext */
2020 2025 index_methods, /* tp_methods */
2021 2026 0, /* tp_members */
2022 2027 index_getset, /* tp_getset */
2023 2028 0, /* tp_base */
2024 2029 0, /* tp_dict */
2025 2030 0, /* tp_descr_get */
2026 2031 0, /* tp_descr_set */
2027 2032 0, /* tp_dictoffset */
2028 2033 (initproc)index_init, /* tp_init */
2029 2034 0, /* tp_alloc */
2030 2035 };
2031 2036
2032 2037 /*
2033 2038 * returns a tuple of the form (index, index, cache) with elements as
2034 2039 * follows:
2035 2040 *
2036 2041 * index: an index object that lazily parses RevlogNG records
2037 2042 * cache: if data is inlined, a tuple (index_file_content, 0), else None
2038 2043 *
2039 2044 * added complications are for backwards compatibility
2040 2045 */
2041 2046 static PyObject *parse_index2(PyObject *self, PyObject *args)
2042 2047 {
2043 2048 PyObject *tuple = NULL, *cache = NULL;
2044 2049 indexObject *idx;
2045 2050 int ret;
2046 2051
2047 2052 idx = PyObject_New(indexObject, &indexType);
2048 2053 if (idx == NULL)
2049 2054 goto bail;
2050 2055
2051 2056 ret = index_init(idx, args);
2052 2057 if (ret == -1)
2053 2058 goto bail;
2054 2059
2055 2060 if (idx->inlined) {
2056 2061 cache = Py_BuildValue("iO", 0, idx->data);
2057 2062 if (cache == NULL)
2058 2063 goto bail;
2059 2064 } else {
2060 2065 cache = Py_None;
2061 2066 Py_INCREF(cache);
2062 2067 }
2063 2068
2064 2069 tuple = Py_BuildValue("NN", idx, cache);
2065 2070 if (!tuple)
2066 2071 goto bail;
2067 2072 return tuple;
2068 2073
2069 2074 bail:
2070 2075 Py_XDECREF(idx);
2071 2076 Py_XDECREF(cache);
2072 2077 Py_XDECREF(tuple);
2073 2078 return NULL;
2074 2079 }
2075 2080
2076 2081 static char parsers_doc[] = "Efficient content parsing.";
2077 2082
2078 2083 PyObject *encodedir(PyObject *self, PyObject *args);
2079 2084 PyObject *pathencode(PyObject *self, PyObject *args);
2080 2085 PyObject *lowerencode(PyObject *self, PyObject *args);
2081 2086
2082 2087 static PyMethodDef methods[] = {
2083 2088 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
2084 2089 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
2085 2090 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
2086 2091 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
2087 2092 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
2088 2093 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
2089 2094 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
2090 2095 {NULL, NULL}
2091 2096 };
2092 2097
2093 2098 void dirs_module_init(PyObject *mod);
2094 2099
2095 2100 static void module_init(PyObject *mod)
2096 2101 {
2097 2102 /* This module constant has two purposes. First, it lets us unit test
2098 2103 * the ImportError raised without hard-coding any error text. This
2099 2104 * means we can change the text in the future without breaking tests,
2100 2105 * even across changesets without a recompile. Second, its presence
2101 2106 * can be used to determine whether the version-checking logic is
2102 2107 * present, which also helps in testing across changesets without a
2103 2108 * recompile. Note that this means the pure-Python version of parsers
2104 2109 * should not have this module constant. */
2105 2110 PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
2106 2111
2107 2112 dirs_module_init(mod);
2108 2113
2109 2114 indexType.tp_new = PyType_GenericNew;
2110 2115 if (PyType_Ready(&indexType) < 0 ||
2111 2116 PyType_Ready(&dirstateTupleType) < 0)
2112 2117 return;
2113 2118 Py_INCREF(&indexType);
2114 2119 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
2115 2120 Py_INCREF(&dirstateTupleType);
2116 2121 PyModule_AddObject(mod, "dirstatetuple",
2117 2122 (PyObject *)&dirstateTupleType);
2118 2123
2119 2124 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
2120 2125 -1, -1, -1, -1, nullid, 20);
2121 2126 if (nullentry)
2122 2127 PyObject_GC_UnTrack(nullentry);
2123 2128 }
2124 2129
2125 2130 static int check_python_version(void)
2126 2131 {
2127 2132 PyObject *sys = PyImport_ImportModule("sys");
2128 2133 long hexversion = PyInt_AsLong(PyObject_GetAttrString(sys, "hexversion"));
2129 2134 /* sys.hexversion is a 32-bit number by default, so the -1 case
2130 2135 * should only occur in unusual circumstances (e.g. if sys.hexversion
2131 2136 * is manually set to an invalid value). */
2132 2137 if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
2133 2138 PyErr_Format(PyExc_ImportError, "%s: The Mercurial extension "
2134 2139 "modules were compiled with Python " PY_VERSION ", but "
2135 2140 "Mercurial is currently using Python with sys.hexversion=%ld: "
2136 2141 "Python %s\n at: %s", versionerrortext, hexversion,
2137 2142 Py_GetVersion(), Py_GetProgramFullPath());
2138 2143 return -1;
2139 2144 }
2140 2145 return 0;
2141 2146 }
2142 2147
2143 2148 #ifdef IS_PY3K
2144 2149 static struct PyModuleDef parsers_module = {
2145 2150 PyModuleDef_HEAD_INIT,
2146 2151 "parsers",
2147 2152 parsers_doc,
2148 2153 -1,
2149 2154 methods
2150 2155 };
2151 2156
2152 2157 PyMODINIT_FUNC PyInit_parsers(void)
2153 2158 {
2154 2159 PyObject *mod;
2155 2160
2156 2161 if (check_python_version() == -1)
2157 2162 return;
2158 2163 mod = PyModule_Create(&parsers_module);
2159 2164 module_init(mod);
2160 2165 return mod;
2161 2166 }
2162 2167 #else
2163 2168 PyMODINIT_FUNC initparsers(void)
2164 2169 {
2165 2170 PyObject *mod;
2166 2171
2167 2172 if (check_python_version() == -1)
2168 2173 return;
2169 2174 mod = Py_InitModule3("parsers", methods, parsers_doc);
2170 2175 module_init(mod);
2171 2176 }
2172 2177 #endif
General Comments 0
You need to be logged in to leave comments. Login now