##// END OF EJS Templates
parsers: don't leak references to sys et al in check_python_version...
Augie Fackler -
r23943:5fb44983 stable
parent child Browse files
Show More
@@ -1,2315 +1,2323
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #include <Python.h>
11 11 #include <ctype.h>
12 12 #include <stddef.h>
13 13 #include <string.h>
14 14
15 15 #include "util.h"
16 16
17 17 static char *versionerrortext = "Python minor version mismatch";
18 18
19 19 static int8_t hextable[256] = {
20 20 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
21 21 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
22 22 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
23 23 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, /* 0-9 */
24 24 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* A-F */
25 25 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
26 26 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* a-f */
27 27 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
28 28 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
29 29 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
30 30 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
31 31 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
32 32 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
33 33 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
34 34 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
35 35 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
36 36 };
37 37
38 38 static char lowertable[128] = {
39 39 '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
40 40 '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
41 41 '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
42 42 '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
43 43 '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
44 44 '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
45 45 '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
46 46 '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
47 47 '\x40',
48 48 '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', /* A-G */
49 49 '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', /* H-O */
50 50 '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', /* P-W */
51 51 '\x78', '\x79', '\x7a', /* X-Z */
52 52 '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
53 53 '\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67',
54 54 '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f',
55 55 '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77',
56 56 '\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f'
57 57 };
58 58
59 59 static inline int hexdigit(const char *p, Py_ssize_t off)
60 60 {
61 61 int8_t val = hextable[(unsigned char)p[off]];
62 62
63 63 if (val >= 0) {
64 64 return val;
65 65 }
66 66
67 67 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
68 68 return 0;
69 69 }
70 70
71 71 /*
72 72 * Turn a hex-encoded string into binary.
73 73 */
74 74 static PyObject *unhexlify(const char *str, int len)
75 75 {
76 76 PyObject *ret;
77 77 char *d;
78 78 int i;
79 79
80 80 ret = PyBytes_FromStringAndSize(NULL, len / 2);
81 81
82 82 if (!ret)
83 83 return NULL;
84 84
85 85 d = PyBytes_AsString(ret);
86 86
87 87 for (i = 0; i < len;) {
88 88 int hi = hexdigit(str, i++);
89 89 int lo = hexdigit(str, i++);
90 90 *d++ = (hi << 4) | lo;
91 91 }
92 92
93 93 return ret;
94 94 }
95 95
96 96 static PyObject *asciilower(PyObject *self, PyObject *args)
97 97 {
98 98 char *str, *newstr;
99 99 int i, len;
100 100 PyObject *newobj = NULL;
101 101
102 102 if (!PyArg_ParseTuple(args, "s#", &str, &len))
103 103 goto quit;
104 104
105 105 newobj = PyBytes_FromStringAndSize(NULL, len);
106 106 if (!newobj)
107 107 goto quit;
108 108
109 109 newstr = PyBytes_AS_STRING(newobj);
110 110
111 111 for (i = 0; i < len; i++) {
112 112 char c = str[i];
113 113 if (c & 0x80) {
114 114 PyObject *err = PyUnicodeDecodeError_Create(
115 115 "ascii", str, len, i, (i + 1),
116 116 "unexpected code byte");
117 117 PyErr_SetObject(PyExc_UnicodeDecodeError, err);
118 118 Py_XDECREF(err);
119 119 goto quit;
120 120 }
121 121 newstr[i] = lowertable[(unsigned char)c];
122 122 }
123 123
124 124 return newobj;
125 125 quit:
126 126 Py_XDECREF(newobj);
127 127 return NULL;
128 128 }
129 129
130 130 /*
131 131 * This code assumes that a manifest is stitched together with newline
132 132 * ('\n') characters.
133 133 */
134 134 static PyObject *parse_manifest(PyObject *self, PyObject *args)
135 135 {
136 136 PyObject *mfdict, *fdict;
137 137 char *str, *start, *end;
138 138 int len;
139 139
140 140 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
141 141 &PyDict_Type, &mfdict,
142 142 &PyDict_Type, &fdict,
143 143 &str, &len))
144 144 goto quit;
145 145
146 146 start = str;
147 147 end = str + len;
148 148 while (start < end) {
149 149 PyObject *file = NULL, *node = NULL;
150 150 PyObject *flags = NULL;
151 151 char *zero = NULL, *newline = NULL;
152 152 ptrdiff_t nlen;
153 153
154 154 zero = memchr(start, '\0', end - start);
155 155 if (!zero) {
156 156 PyErr_SetString(PyExc_ValueError,
157 157 "manifest entry has no separator");
158 158 goto quit;
159 159 }
160 160
161 161 newline = memchr(zero + 1, '\n', end - (zero + 1));
162 162 if (!newline) {
163 163 PyErr_SetString(PyExc_ValueError,
164 164 "manifest contains trailing garbage");
165 165 goto quit;
166 166 }
167 167
168 168 file = PyBytes_FromStringAndSize(start, zero - start);
169 169
170 170 if (!file)
171 171 goto bail;
172 172
173 173 nlen = newline - zero - 1;
174 174
175 175 node = unhexlify(zero + 1, nlen > 40 ? 40 : (int)nlen);
176 176 if (!node)
177 177 goto bail;
178 178
179 179 if (nlen > 40) {
180 180 flags = PyBytes_FromStringAndSize(zero + 41,
181 181 nlen - 40);
182 182 if (!flags)
183 183 goto bail;
184 184
185 185 if (PyDict_SetItem(fdict, file, flags) == -1)
186 186 goto bail;
187 187 }
188 188
189 189 if (PyDict_SetItem(mfdict, file, node) == -1)
190 190 goto bail;
191 191
192 192 start = newline + 1;
193 193
194 194 Py_XDECREF(flags);
195 195 Py_XDECREF(node);
196 196 Py_XDECREF(file);
197 197 continue;
198 198 bail:
199 199 Py_XDECREF(flags);
200 200 Py_XDECREF(node);
201 201 Py_XDECREF(file);
202 202 goto quit;
203 203 }
204 204
205 205 Py_INCREF(Py_None);
206 206 return Py_None;
207 207 quit:
208 208 return NULL;
209 209 }
210 210
211 211 static inline dirstateTupleObject *make_dirstate_tuple(char state, int mode,
212 212 int size, int mtime)
213 213 {
214 214 dirstateTupleObject *t = PyObject_New(dirstateTupleObject,
215 215 &dirstateTupleType);
216 216 if (!t)
217 217 return NULL;
218 218 t->state = state;
219 219 t->mode = mode;
220 220 t->size = size;
221 221 t->mtime = mtime;
222 222 return t;
223 223 }
224 224
225 225 static PyObject *dirstate_tuple_new(PyTypeObject *subtype, PyObject *args,
226 226 PyObject *kwds)
227 227 {
228 228 /* We do all the initialization here and not a tp_init function because
229 229 * dirstate_tuple is immutable. */
230 230 dirstateTupleObject *t;
231 231 char state;
232 232 int size, mode, mtime;
233 233 if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime))
234 234 return NULL;
235 235
236 236 t = (dirstateTupleObject *)subtype->tp_alloc(subtype, 1);
237 237 if (!t)
238 238 return NULL;
239 239 t->state = state;
240 240 t->mode = mode;
241 241 t->size = size;
242 242 t->mtime = mtime;
243 243
244 244 return (PyObject *)t;
245 245 }
246 246
247 247 static void dirstate_tuple_dealloc(PyObject *o)
248 248 {
249 249 PyObject_Del(o);
250 250 }
251 251
252 252 static Py_ssize_t dirstate_tuple_length(PyObject *o)
253 253 {
254 254 return 4;
255 255 }
256 256
257 257 static PyObject *dirstate_tuple_item(PyObject *o, Py_ssize_t i)
258 258 {
259 259 dirstateTupleObject *t = (dirstateTupleObject *)o;
260 260 switch (i) {
261 261 case 0:
262 262 return PyBytes_FromStringAndSize(&t->state, 1);
263 263 case 1:
264 264 return PyInt_FromLong(t->mode);
265 265 case 2:
266 266 return PyInt_FromLong(t->size);
267 267 case 3:
268 268 return PyInt_FromLong(t->mtime);
269 269 default:
270 270 PyErr_SetString(PyExc_IndexError, "index out of range");
271 271 return NULL;
272 272 }
273 273 }
274 274
275 275 static PySequenceMethods dirstate_tuple_sq = {
276 276 dirstate_tuple_length, /* sq_length */
277 277 0, /* sq_concat */
278 278 0, /* sq_repeat */
279 279 dirstate_tuple_item, /* sq_item */
280 280 0, /* sq_ass_item */
281 281 0, /* sq_contains */
282 282 0, /* sq_inplace_concat */
283 283 0 /* sq_inplace_repeat */
284 284 };
285 285
286 286 PyTypeObject dirstateTupleType = {
287 287 PyVarObject_HEAD_INIT(NULL, 0)
288 288 "dirstate_tuple", /* tp_name */
289 289 sizeof(dirstateTupleObject),/* tp_basicsize */
290 290 0, /* tp_itemsize */
291 291 (destructor)dirstate_tuple_dealloc, /* tp_dealloc */
292 292 0, /* tp_print */
293 293 0, /* tp_getattr */
294 294 0, /* tp_setattr */
295 295 0, /* tp_compare */
296 296 0, /* tp_repr */
297 297 0, /* tp_as_number */
298 298 &dirstate_tuple_sq, /* tp_as_sequence */
299 299 0, /* tp_as_mapping */
300 300 0, /* tp_hash */
301 301 0, /* tp_call */
302 302 0, /* tp_str */
303 303 0, /* tp_getattro */
304 304 0, /* tp_setattro */
305 305 0, /* tp_as_buffer */
306 306 Py_TPFLAGS_DEFAULT, /* tp_flags */
307 307 "dirstate tuple", /* tp_doc */
308 308 0, /* tp_traverse */
309 309 0, /* tp_clear */
310 310 0, /* tp_richcompare */
311 311 0, /* tp_weaklistoffset */
312 312 0, /* tp_iter */
313 313 0, /* tp_iternext */
314 314 0, /* tp_methods */
315 315 0, /* tp_members */
316 316 0, /* tp_getset */
317 317 0, /* tp_base */
318 318 0, /* tp_dict */
319 319 0, /* tp_descr_get */
320 320 0, /* tp_descr_set */
321 321 0, /* tp_dictoffset */
322 322 0, /* tp_init */
323 323 0, /* tp_alloc */
324 324 dirstate_tuple_new, /* tp_new */
325 325 };
326 326
327 327 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
328 328 {
329 329 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
330 330 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
331 331 char state, *cur, *str, *cpos;
332 332 int mode, size, mtime;
333 333 unsigned int flen, len, pos = 40;
334 334 int readlen;
335 335
336 336 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
337 337 &PyDict_Type, &dmap,
338 338 &PyDict_Type, &cmap,
339 339 &str, &readlen))
340 340 goto quit;
341 341
342 342 if (readlen < 0)
343 343 goto quit;
344 344
345 345 len = readlen;
346 346
347 347 /* read parents */
348 348 if (len < 40)
349 349 goto quit;
350 350
351 351 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
352 352 if (!parents)
353 353 goto quit;
354 354
355 355 /* read filenames */
356 356 while (pos >= 40 && pos < len) {
357 357 cur = str + pos;
358 358 /* unpack header */
359 359 state = *cur;
360 360 mode = getbe32(cur + 1);
361 361 size = getbe32(cur + 5);
362 362 mtime = getbe32(cur + 9);
363 363 flen = getbe32(cur + 13);
364 364 pos += 17;
365 365 cur += 17;
366 366 if (flen > len - pos) {
367 367 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
368 368 goto quit;
369 369 }
370 370
371 371 entry = (PyObject *)make_dirstate_tuple(state, mode, size,
372 372 mtime);
373 373 cpos = memchr(cur, 0, flen);
374 374 if (cpos) {
375 375 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
376 376 cname = PyBytes_FromStringAndSize(cpos + 1,
377 377 flen - (cpos - cur) - 1);
378 378 if (!fname || !cname ||
379 379 PyDict_SetItem(cmap, fname, cname) == -1 ||
380 380 PyDict_SetItem(dmap, fname, entry) == -1)
381 381 goto quit;
382 382 Py_DECREF(cname);
383 383 } else {
384 384 fname = PyBytes_FromStringAndSize(cur, flen);
385 385 if (!fname ||
386 386 PyDict_SetItem(dmap, fname, entry) == -1)
387 387 goto quit;
388 388 }
389 389 Py_DECREF(fname);
390 390 Py_DECREF(entry);
391 391 fname = cname = entry = NULL;
392 392 pos += flen;
393 393 }
394 394
395 395 ret = parents;
396 396 Py_INCREF(ret);
397 397 quit:
398 398 Py_XDECREF(fname);
399 399 Py_XDECREF(cname);
400 400 Py_XDECREF(entry);
401 401 Py_XDECREF(parents);
402 402 return ret;
403 403 }
404 404
405 405 /*
406 406 * Efficiently pack a dirstate object into its on-disk format.
407 407 */
408 408 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
409 409 {
410 410 PyObject *packobj = NULL;
411 411 PyObject *map, *copymap, *pl, *mtime_unset = NULL;
412 412 Py_ssize_t nbytes, pos, l;
413 413 PyObject *k, *v, *pn;
414 414 char *p, *s;
415 415 double now;
416 416
417 417 if (!PyArg_ParseTuple(args, "O!O!Od:pack_dirstate",
418 418 &PyDict_Type, &map, &PyDict_Type, &copymap,
419 419 &pl, &now))
420 420 return NULL;
421 421
422 422 if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) {
423 423 PyErr_SetString(PyExc_TypeError, "expected 2-element sequence");
424 424 return NULL;
425 425 }
426 426
427 427 /* Figure out how much we need to allocate. */
428 428 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
429 429 PyObject *c;
430 430 if (!PyString_Check(k)) {
431 431 PyErr_SetString(PyExc_TypeError, "expected string key");
432 432 goto bail;
433 433 }
434 434 nbytes += PyString_GET_SIZE(k) + 17;
435 435 c = PyDict_GetItem(copymap, k);
436 436 if (c) {
437 437 if (!PyString_Check(c)) {
438 438 PyErr_SetString(PyExc_TypeError,
439 439 "expected string key");
440 440 goto bail;
441 441 }
442 442 nbytes += PyString_GET_SIZE(c) + 1;
443 443 }
444 444 }
445 445
446 446 packobj = PyString_FromStringAndSize(NULL, nbytes);
447 447 if (packobj == NULL)
448 448 goto bail;
449 449
450 450 p = PyString_AS_STRING(packobj);
451 451
452 452 pn = PySequence_ITEM(pl, 0);
453 453 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
454 454 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
455 455 goto bail;
456 456 }
457 457 memcpy(p, s, l);
458 458 p += 20;
459 459 pn = PySequence_ITEM(pl, 1);
460 460 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
461 461 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
462 462 goto bail;
463 463 }
464 464 memcpy(p, s, l);
465 465 p += 20;
466 466
467 467 for (pos = 0; PyDict_Next(map, &pos, &k, &v); ) {
468 468 dirstateTupleObject *tuple;
469 469 char state;
470 470 uint32_t mode, size, mtime;
471 471 Py_ssize_t len, l;
472 472 PyObject *o;
473 473 char *t;
474 474
475 475 if (!dirstate_tuple_check(v)) {
476 476 PyErr_SetString(PyExc_TypeError,
477 477 "expected a dirstate tuple");
478 478 goto bail;
479 479 }
480 480 tuple = (dirstateTupleObject *)v;
481 481
482 482 state = tuple->state;
483 483 mode = tuple->mode;
484 484 size = tuple->size;
485 485 mtime = tuple->mtime;
486 486 if (state == 'n' && mtime == (uint32_t)now) {
487 487 /* See pure/parsers.py:pack_dirstate for why we do
488 488 * this. */
489 489 mtime = -1;
490 490 mtime_unset = (PyObject *)make_dirstate_tuple(
491 491 state, mode, size, mtime);
492 492 if (!mtime_unset)
493 493 goto bail;
494 494 if (PyDict_SetItem(map, k, mtime_unset) == -1)
495 495 goto bail;
496 496 Py_DECREF(mtime_unset);
497 497 mtime_unset = NULL;
498 498 }
499 499 *p++ = state;
500 500 putbe32(mode, p);
501 501 putbe32(size, p + 4);
502 502 putbe32(mtime, p + 8);
503 503 t = p + 12;
504 504 p += 16;
505 505 len = PyString_GET_SIZE(k);
506 506 memcpy(p, PyString_AS_STRING(k), len);
507 507 p += len;
508 508 o = PyDict_GetItem(copymap, k);
509 509 if (o) {
510 510 *p++ = '\0';
511 511 l = PyString_GET_SIZE(o);
512 512 memcpy(p, PyString_AS_STRING(o), l);
513 513 p += l;
514 514 len += l + 1;
515 515 }
516 516 putbe32((uint32_t)len, t);
517 517 }
518 518
519 519 pos = p - PyString_AS_STRING(packobj);
520 520 if (pos != nbytes) {
521 521 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
522 522 (long)pos, (long)nbytes);
523 523 goto bail;
524 524 }
525 525
526 526 return packobj;
527 527 bail:
528 528 Py_XDECREF(mtime_unset);
529 529 Py_XDECREF(packobj);
530 530 return NULL;
531 531 }
532 532
533 533 /*
534 534 * A base-16 trie for fast node->rev mapping.
535 535 *
536 536 * Positive value is index of the next node in the trie
537 537 * Negative value is a leaf: -(rev + 1)
538 538 * Zero is empty
539 539 */
540 540 typedef struct {
541 541 int children[16];
542 542 } nodetree;
543 543
544 544 /*
545 545 * This class has two behaviours.
546 546 *
547 547 * When used in a list-like way (with integer keys), we decode an
548 548 * entry in a RevlogNG index file on demand. Our last entry is a
549 549 * sentinel, always a nullid. We have limited support for
550 550 * integer-keyed insert and delete, only at elements right before the
551 551 * sentinel.
552 552 *
553 553 * With string keys, we lazily perform a reverse mapping from node to
554 554 * rev, using a base-16 trie.
555 555 */
556 556 typedef struct {
557 557 PyObject_HEAD
558 558 /* Type-specific fields go here. */
559 559 PyObject *data; /* raw bytes of index */
560 560 PyObject **cache; /* cached tuples */
561 561 const char **offsets; /* populated on demand */
562 562 Py_ssize_t raw_length; /* original number of elements */
563 563 Py_ssize_t length; /* current number of elements */
564 564 PyObject *added; /* populated on demand */
565 565 PyObject *headrevs; /* cache, invalidated on changes */
566 566 PyObject *filteredrevs;/* filtered revs set */
567 567 nodetree *nt; /* base-16 trie */
568 568 int ntlength; /* # nodes in use */
569 569 int ntcapacity; /* # nodes allocated */
570 570 int ntdepth; /* maximum depth of tree */
571 571 int ntsplits; /* # splits performed */
572 572 int ntrev; /* last rev scanned */
573 573 int ntlookups; /* # lookups */
574 574 int ntmisses; /* # lookups that miss the cache */
575 575 int inlined;
576 576 } indexObject;
577 577
578 578 static Py_ssize_t index_length(const indexObject *self)
579 579 {
580 580 if (self->added == NULL)
581 581 return self->length;
582 582 return self->length + PyList_GET_SIZE(self->added);
583 583 }
584 584
585 585 static PyObject *nullentry;
586 586 static const char nullid[20];
587 587
588 588 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
589 589
590 590 #if LONG_MAX == 0x7fffffffL
591 591 static char *tuple_format = "Kiiiiiis#";
592 592 #else
593 593 static char *tuple_format = "kiiiiiis#";
594 594 #endif
595 595
596 596 /* A RevlogNG v1 index entry is 64 bytes long. */
597 597 static const long v1_hdrsize = 64;
598 598
599 599 /*
600 600 * Return a pointer to the beginning of a RevlogNG record.
601 601 */
602 602 static const char *index_deref(indexObject *self, Py_ssize_t pos)
603 603 {
604 604 if (self->inlined && pos > 0) {
605 605 if (self->offsets == NULL) {
606 606 self->offsets = malloc(self->raw_length *
607 607 sizeof(*self->offsets));
608 608 if (self->offsets == NULL)
609 609 return (const char *)PyErr_NoMemory();
610 610 inline_scan(self, self->offsets);
611 611 }
612 612 return self->offsets[pos];
613 613 }
614 614
615 615 return PyString_AS_STRING(self->data) + pos * v1_hdrsize;
616 616 }
617 617
618 618 /*
619 619 * RevlogNG format (all in big endian, data may be inlined):
620 620 * 6 bytes: offset
621 621 * 2 bytes: flags
622 622 * 4 bytes: compressed length
623 623 * 4 bytes: uncompressed length
624 624 * 4 bytes: base revision
625 625 * 4 bytes: link revision
626 626 * 4 bytes: parent 1 revision
627 627 * 4 bytes: parent 2 revision
628 628 * 32 bytes: nodeid (only 20 bytes used)
629 629 */
630 630 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
631 631 {
632 632 uint64_t offset_flags;
633 633 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
634 634 const char *c_node_id;
635 635 const char *data;
636 636 Py_ssize_t length = index_length(self);
637 637 PyObject *entry;
638 638
639 639 if (pos < 0)
640 640 pos += length;
641 641
642 642 if (pos < 0 || pos >= length) {
643 643 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
644 644 return NULL;
645 645 }
646 646
647 647 if (pos == length - 1) {
648 648 Py_INCREF(nullentry);
649 649 return nullentry;
650 650 }
651 651
652 652 if (pos >= self->length - 1) {
653 653 PyObject *obj;
654 654 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
655 655 Py_INCREF(obj);
656 656 return obj;
657 657 }
658 658
659 659 if (self->cache) {
660 660 if (self->cache[pos]) {
661 661 Py_INCREF(self->cache[pos]);
662 662 return self->cache[pos];
663 663 }
664 664 } else {
665 665 self->cache = calloc(self->raw_length, sizeof(PyObject *));
666 666 if (self->cache == NULL)
667 667 return PyErr_NoMemory();
668 668 }
669 669
670 670 data = index_deref(self, pos);
671 671 if (data == NULL)
672 672 return NULL;
673 673
674 674 offset_flags = getbe32(data + 4);
675 675 if (pos == 0) /* mask out version number for the first entry */
676 676 offset_flags &= 0xFFFF;
677 677 else {
678 678 uint32_t offset_high = getbe32(data);
679 679 offset_flags |= ((uint64_t)offset_high) << 32;
680 680 }
681 681
682 682 comp_len = getbe32(data + 8);
683 683 uncomp_len = getbe32(data + 12);
684 684 base_rev = getbe32(data + 16);
685 685 link_rev = getbe32(data + 20);
686 686 parent_1 = getbe32(data + 24);
687 687 parent_2 = getbe32(data + 28);
688 688 c_node_id = data + 32;
689 689
690 690 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
691 691 uncomp_len, base_rev, link_rev,
692 692 parent_1, parent_2, c_node_id, 20);
693 693
694 694 if (entry) {
695 695 PyObject_GC_UnTrack(entry);
696 696 Py_INCREF(entry);
697 697 }
698 698
699 699 self->cache[pos] = entry;
700 700
701 701 return entry;
702 702 }
703 703
704 704 /*
705 705 * Return the 20-byte SHA of the node corresponding to the given rev.
706 706 */
707 707 static const char *index_node(indexObject *self, Py_ssize_t pos)
708 708 {
709 709 Py_ssize_t length = index_length(self);
710 710 const char *data;
711 711
712 712 if (pos == length - 1 || pos == INT_MAX)
713 713 return nullid;
714 714
715 715 if (pos >= length)
716 716 return NULL;
717 717
718 718 if (pos >= self->length - 1) {
719 719 PyObject *tuple, *str;
720 720 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
721 721 str = PyTuple_GetItem(tuple, 7);
722 722 return str ? PyString_AS_STRING(str) : NULL;
723 723 }
724 724
725 725 data = index_deref(self, pos);
726 726 return data ? data + 32 : NULL;
727 727 }
728 728
729 729 static int nt_insert(indexObject *self, const char *node, int rev);
730 730
731 731 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
732 732 {
733 733 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
734 734 return -1;
735 735 if (*nodelen == 20)
736 736 return 0;
737 737 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
738 738 return -1;
739 739 }
740 740
741 741 static PyObject *index_insert(indexObject *self, PyObject *args)
742 742 {
743 743 PyObject *obj;
744 744 char *node;
745 745 int index;
746 746 Py_ssize_t len, nodelen;
747 747
748 748 if (!PyArg_ParseTuple(args, "iO", &index, &obj))
749 749 return NULL;
750 750
751 751 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
752 752 PyErr_SetString(PyExc_TypeError, "8-tuple required");
753 753 return NULL;
754 754 }
755 755
756 756 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
757 757 return NULL;
758 758
759 759 len = index_length(self);
760 760
761 761 if (index < 0)
762 762 index += len;
763 763
764 764 if (index != len - 1) {
765 765 PyErr_SetString(PyExc_IndexError,
766 766 "insert only supported at index -1");
767 767 return NULL;
768 768 }
769 769
770 770 if (self->added == NULL) {
771 771 self->added = PyList_New(0);
772 772 if (self->added == NULL)
773 773 return NULL;
774 774 }
775 775
776 776 if (PyList_Append(self->added, obj) == -1)
777 777 return NULL;
778 778
779 779 if (self->nt)
780 780 nt_insert(self, node, index);
781 781
782 782 Py_CLEAR(self->headrevs);
783 783 Py_RETURN_NONE;
784 784 }
785 785
786 786 static void _index_clearcaches(indexObject *self)
787 787 {
788 788 if (self->cache) {
789 789 Py_ssize_t i;
790 790
791 791 for (i = 0; i < self->raw_length; i++)
792 792 Py_CLEAR(self->cache[i]);
793 793 free(self->cache);
794 794 self->cache = NULL;
795 795 }
796 796 if (self->offsets) {
797 797 free(self->offsets);
798 798 self->offsets = NULL;
799 799 }
800 800 if (self->nt) {
801 801 free(self->nt);
802 802 self->nt = NULL;
803 803 }
804 804 Py_CLEAR(self->headrevs);
805 805 }
806 806
807 807 static PyObject *index_clearcaches(indexObject *self)
808 808 {
809 809 _index_clearcaches(self);
810 810 self->ntlength = self->ntcapacity = 0;
811 811 self->ntdepth = self->ntsplits = 0;
812 812 self->ntrev = -1;
813 813 self->ntlookups = self->ntmisses = 0;
814 814 Py_RETURN_NONE;
815 815 }
816 816
817 817 static PyObject *index_stats(indexObject *self)
818 818 {
819 819 PyObject *obj = PyDict_New();
820 820
821 821 if (obj == NULL)
822 822 return NULL;
823 823
824 824 #define istat(__n, __d) \
825 825 if (PyDict_SetItemString(obj, __d, PyInt_FromSsize_t(self->__n)) == -1) \
826 826 goto bail;
827 827
828 828 if (self->added) {
829 829 Py_ssize_t len = PyList_GET_SIZE(self->added);
830 830 if (PyDict_SetItemString(obj, "index entries added",
831 831 PyInt_FromSsize_t(len)) == -1)
832 832 goto bail;
833 833 }
834 834
835 835 if (self->raw_length != self->length - 1)
836 836 istat(raw_length, "revs on disk");
837 837 istat(length, "revs in memory");
838 838 istat(ntcapacity, "node trie capacity");
839 839 istat(ntdepth, "node trie depth");
840 840 istat(ntlength, "node trie count");
841 841 istat(ntlookups, "node trie lookups");
842 842 istat(ntmisses, "node trie misses");
843 843 istat(ntrev, "node trie last rev scanned");
844 844 istat(ntsplits, "node trie splits");
845 845
846 846 #undef istat
847 847
848 848 return obj;
849 849
850 850 bail:
851 851 Py_XDECREF(obj);
852 852 return NULL;
853 853 }
854 854
855 855 /*
856 856 * When we cache a list, we want to be sure the caller can't mutate
857 857 * the cached copy.
858 858 */
859 859 static PyObject *list_copy(PyObject *list)
860 860 {
861 861 Py_ssize_t len = PyList_GET_SIZE(list);
862 862 PyObject *newlist = PyList_New(len);
863 863 Py_ssize_t i;
864 864
865 865 if (newlist == NULL)
866 866 return NULL;
867 867
868 868 for (i = 0; i < len; i++) {
869 869 PyObject *obj = PyList_GET_ITEM(list, i);
870 870 Py_INCREF(obj);
871 871 PyList_SET_ITEM(newlist, i, obj);
872 872 }
873 873
874 874 return newlist;
875 875 }
876 876
877 877 /* arg should be Py_ssize_t but Python 2.4 do not support the n format */
878 878 static int check_filter(PyObject *filter, unsigned long arg) {
879 879 if (filter) {
880 880 PyObject *arglist, *result;
881 881 int isfiltered;
882 882
883 883 arglist = Py_BuildValue("(k)", arg);
884 884 if (!arglist) {
885 885 return -1;
886 886 }
887 887
888 888 result = PyEval_CallObject(filter, arglist);
889 889 Py_DECREF(arglist);
890 890 if (!result) {
891 891 return -1;
892 892 }
893 893
894 894 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
895 895 * same as this function, so we can just return it directly.*/
896 896 isfiltered = PyObject_IsTrue(result);
897 897 Py_DECREF(result);
898 898 return isfiltered;
899 899 } else {
900 900 return 0;
901 901 }
902 902 }
903 903
904 904 static PyObject *index_headrevs(indexObject *self, PyObject *args)
905 905 {
906 906 Py_ssize_t i, len, addlen;
907 907 char *nothead = NULL;
908 908 PyObject *heads = NULL;
909 909 PyObject *filter = NULL;
910 910 PyObject *filteredrevs = Py_None;
911 911
912 912 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
913 913 return NULL;
914 914 }
915 915
916 916 if (self->headrevs && filteredrevs == self->filteredrevs)
917 917 return list_copy(self->headrevs);
918 918
919 919 Py_DECREF(self->filteredrevs);
920 920 self->filteredrevs = filteredrevs;
921 921 Py_INCREF(filteredrevs);
922 922
923 923 if (filteredrevs != Py_None) {
924 924 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
925 925 if (!filter) {
926 926 PyErr_SetString(PyExc_TypeError,
927 927 "filteredrevs has no attribute __contains__");
928 928 goto bail;
929 929 }
930 930 }
931 931
932 932 len = index_length(self) - 1;
933 933 heads = PyList_New(0);
934 934 if (heads == NULL)
935 935 goto bail;
936 936 if (len == 0) {
937 937 PyObject *nullid = PyInt_FromLong(-1);
938 938 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
939 939 Py_XDECREF(nullid);
940 940 goto bail;
941 941 }
942 942 goto done;
943 943 }
944 944
945 945 nothead = calloc(len, 1);
946 946 if (nothead == NULL)
947 947 goto bail;
948 948
949 949 for (i = 0; i < self->raw_length; i++) {
950 950 const char *data;
951 951 int parent_1, parent_2, isfiltered;
952 952
953 953 isfiltered = check_filter(filter, i);
954 954 if (isfiltered == -1) {
955 955 PyErr_SetString(PyExc_TypeError,
956 956 "unable to check filter");
957 957 goto bail;
958 958 }
959 959
960 960 if (isfiltered) {
961 961 nothead[i] = 1;
962 962 continue;
963 963 }
964 964
965 965 data = index_deref(self, i);
966 966 parent_1 = getbe32(data + 24);
967 967 parent_2 = getbe32(data + 28);
968 968
969 969 if (parent_1 >= 0)
970 970 nothead[parent_1] = 1;
971 971 if (parent_2 >= 0)
972 972 nothead[parent_2] = 1;
973 973 }
974 974
975 975 addlen = self->added ? PyList_GET_SIZE(self->added) : 0;
976 976
977 977 for (i = 0; i < addlen; i++) {
978 978 PyObject *rev = PyList_GET_ITEM(self->added, i);
979 979 PyObject *p1 = PyTuple_GET_ITEM(rev, 5);
980 980 PyObject *p2 = PyTuple_GET_ITEM(rev, 6);
981 981 long parent_1, parent_2;
982 982 int isfiltered;
983 983
984 984 if (!PyInt_Check(p1) || !PyInt_Check(p2)) {
985 985 PyErr_SetString(PyExc_TypeError,
986 986 "revlog parents are invalid");
987 987 goto bail;
988 988 }
989 989
990 990 isfiltered = check_filter(filter, i);
991 991 if (isfiltered == -1) {
992 992 PyErr_SetString(PyExc_TypeError,
993 993 "unable to check filter");
994 994 goto bail;
995 995 }
996 996
997 997 if (isfiltered) {
998 998 nothead[i] = 1;
999 999 continue;
1000 1000 }
1001 1001
1002 1002 parent_1 = PyInt_AS_LONG(p1);
1003 1003 parent_2 = PyInt_AS_LONG(p2);
1004 1004 if (parent_1 >= 0)
1005 1005 nothead[parent_1] = 1;
1006 1006 if (parent_2 >= 0)
1007 1007 nothead[parent_2] = 1;
1008 1008 }
1009 1009
1010 1010 for (i = 0; i < len; i++) {
1011 1011 PyObject *head;
1012 1012
1013 1013 if (nothead[i])
1014 1014 continue;
1015 1015 head = PyInt_FromSsize_t(i);
1016 1016 if (head == NULL || PyList_Append(heads, head) == -1) {
1017 1017 Py_XDECREF(head);
1018 1018 goto bail;
1019 1019 }
1020 1020 }
1021 1021
1022 1022 done:
1023 1023 self->headrevs = heads;
1024 1024 Py_XDECREF(filter);
1025 1025 free(nothead);
1026 1026 return list_copy(self->headrevs);
1027 1027 bail:
1028 1028 Py_XDECREF(filter);
1029 1029 Py_XDECREF(heads);
1030 1030 free(nothead);
1031 1031 return NULL;
1032 1032 }
1033 1033
1034 1034 static inline int nt_level(const char *node, Py_ssize_t level)
1035 1035 {
1036 1036 int v = node[level>>1];
1037 1037 if (!(level & 1))
1038 1038 v >>= 4;
1039 1039 return v & 0xf;
1040 1040 }
1041 1041
1042 1042 /*
1043 1043 * Return values:
1044 1044 *
1045 1045 * -4: match is ambiguous (multiple candidates)
1046 1046 * -2: not found
1047 1047 * rest: valid rev
1048 1048 */
1049 1049 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
1050 1050 int hex)
1051 1051 {
1052 1052 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1053 1053 int level, maxlevel, off;
1054 1054
1055 1055 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
1056 1056 return -1;
1057 1057
1058 1058 if (self->nt == NULL)
1059 1059 return -2;
1060 1060
1061 1061 if (hex)
1062 1062 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
1063 1063 else
1064 1064 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
1065 1065
1066 1066 for (level = off = 0; level < maxlevel; level++) {
1067 1067 int k = getnybble(node, level);
1068 1068 nodetree *n = &self->nt[off];
1069 1069 int v = n->children[k];
1070 1070
1071 1071 if (v < 0) {
1072 1072 const char *n;
1073 1073 Py_ssize_t i;
1074 1074
1075 1075 v = -v - 1;
1076 1076 n = index_node(self, v);
1077 1077 if (n == NULL)
1078 1078 return -2;
1079 1079 for (i = level; i < maxlevel; i++)
1080 1080 if (getnybble(node, i) != nt_level(n, i))
1081 1081 return -2;
1082 1082 return v;
1083 1083 }
1084 1084 if (v == 0)
1085 1085 return -2;
1086 1086 off = v;
1087 1087 }
1088 1088 /* multiple matches against an ambiguous prefix */
1089 1089 return -4;
1090 1090 }
1091 1091
1092 1092 static int nt_new(indexObject *self)
1093 1093 {
1094 1094 if (self->ntlength == self->ntcapacity) {
1095 1095 self->ntcapacity *= 2;
1096 1096 self->nt = realloc(self->nt,
1097 1097 self->ntcapacity * sizeof(nodetree));
1098 1098 if (self->nt == NULL) {
1099 1099 PyErr_SetString(PyExc_MemoryError, "out of memory");
1100 1100 return -1;
1101 1101 }
1102 1102 memset(&self->nt[self->ntlength], 0,
1103 1103 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
1104 1104 }
1105 1105 return self->ntlength++;
1106 1106 }
1107 1107
1108 1108 static int nt_insert(indexObject *self, const char *node, int rev)
1109 1109 {
1110 1110 int level = 0;
1111 1111 int off = 0;
1112 1112
1113 1113 while (level < 40) {
1114 1114 int k = nt_level(node, level);
1115 1115 nodetree *n;
1116 1116 int v;
1117 1117
1118 1118 n = &self->nt[off];
1119 1119 v = n->children[k];
1120 1120
1121 1121 if (v == 0) {
1122 1122 n->children[k] = -rev - 1;
1123 1123 return 0;
1124 1124 }
1125 1125 if (v < 0) {
1126 1126 const char *oldnode = index_node(self, -v - 1);
1127 1127 int noff;
1128 1128
1129 1129 if (!oldnode || !memcmp(oldnode, node, 20)) {
1130 1130 n->children[k] = -rev - 1;
1131 1131 return 0;
1132 1132 }
1133 1133 noff = nt_new(self);
1134 1134 if (noff == -1)
1135 1135 return -1;
1136 1136 /* self->nt may have been changed by realloc */
1137 1137 self->nt[off].children[k] = noff;
1138 1138 off = noff;
1139 1139 n = &self->nt[off];
1140 1140 n->children[nt_level(oldnode, ++level)] = v;
1141 1141 if (level > self->ntdepth)
1142 1142 self->ntdepth = level;
1143 1143 self->ntsplits += 1;
1144 1144 } else {
1145 1145 level += 1;
1146 1146 off = v;
1147 1147 }
1148 1148 }
1149 1149
1150 1150 return -1;
1151 1151 }
1152 1152
1153 1153 static int nt_init(indexObject *self)
1154 1154 {
1155 1155 if (self->nt == NULL) {
1156 1156 if (self->raw_length > INT_MAX) {
1157 1157 PyErr_SetString(PyExc_ValueError, "overflow in nt_init");
1158 1158 return -1;
1159 1159 }
1160 1160 self->ntcapacity = self->raw_length < 4
1161 1161 ? 4 : (int)self->raw_length / 2;
1162 1162
1163 1163 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
1164 1164 if (self->nt == NULL) {
1165 1165 PyErr_NoMemory();
1166 1166 return -1;
1167 1167 }
1168 1168 self->ntlength = 1;
1169 1169 self->ntrev = (int)index_length(self) - 1;
1170 1170 self->ntlookups = 1;
1171 1171 self->ntmisses = 0;
1172 1172 if (nt_insert(self, nullid, INT_MAX) == -1)
1173 1173 return -1;
1174 1174 }
1175 1175 return 0;
1176 1176 }
1177 1177
1178 1178 /*
1179 1179 * Return values:
1180 1180 *
1181 1181 * -3: error (exception set)
1182 1182 * -2: not found (no exception set)
1183 1183 * rest: valid rev
1184 1184 */
1185 1185 static int index_find_node(indexObject *self,
1186 1186 const char *node, Py_ssize_t nodelen)
1187 1187 {
1188 1188 int rev;
1189 1189
1190 1190 self->ntlookups++;
1191 1191 rev = nt_find(self, node, nodelen, 0);
1192 1192 if (rev >= -1)
1193 1193 return rev;
1194 1194
1195 1195 if (nt_init(self) == -1)
1196 1196 return -3;
1197 1197
1198 1198 /*
1199 1199 * For the first handful of lookups, we scan the entire index,
1200 1200 * and cache only the matching nodes. This optimizes for cases
1201 1201 * like "hg tip", where only a few nodes are accessed.
1202 1202 *
1203 1203 * After that, we cache every node we visit, using a single
1204 1204 * scan amortized over multiple lookups. This gives the best
1205 1205 * bulk performance, e.g. for "hg log".
1206 1206 */
1207 1207 if (self->ntmisses++ < 4) {
1208 1208 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1209 1209 const char *n = index_node(self, rev);
1210 1210 if (n == NULL)
1211 1211 return -2;
1212 1212 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1213 1213 if (nt_insert(self, n, rev) == -1)
1214 1214 return -3;
1215 1215 break;
1216 1216 }
1217 1217 }
1218 1218 } else {
1219 1219 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1220 1220 const char *n = index_node(self, rev);
1221 1221 if (n == NULL) {
1222 1222 self->ntrev = rev + 1;
1223 1223 return -2;
1224 1224 }
1225 1225 if (nt_insert(self, n, rev) == -1) {
1226 1226 self->ntrev = rev + 1;
1227 1227 return -3;
1228 1228 }
1229 1229 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1230 1230 break;
1231 1231 }
1232 1232 }
1233 1233 self->ntrev = rev;
1234 1234 }
1235 1235
1236 1236 if (rev >= 0)
1237 1237 return rev;
1238 1238 return -2;
1239 1239 }
1240 1240
1241 1241 static PyObject *raise_revlog_error(void)
1242 1242 {
1243 1243 static PyObject *errclass;
1244 1244 PyObject *mod = NULL, *errobj;
1245 1245
1246 1246 if (errclass == NULL) {
1247 1247 PyObject *dict;
1248 1248
1249 1249 mod = PyImport_ImportModule("mercurial.error");
1250 1250 if (mod == NULL)
1251 1251 goto classfail;
1252 1252
1253 1253 dict = PyModule_GetDict(mod);
1254 1254 if (dict == NULL)
1255 1255 goto classfail;
1256 1256
1257 1257 errclass = PyDict_GetItemString(dict, "RevlogError");
1258 1258 if (errclass == NULL) {
1259 1259 PyErr_SetString(PyExc_SystemError,
1260 1260 "could not find RevlogError");
1261 1261 goto classfail;
1262 1262 }
1263 1263 Py_INCREF(errclass);
1264 1264 }
1265 1265
1266 1266 errobj = PyObject_CallFunction(errclass, NULL);
1267 1267 if (errobj == NULL)
1268 1268 return NULL;
1269 1269 PyErr_SetObject(errclass, errobj);
1270 1270 return errobj;
1271 1271
1272 1272 classfail:
1273 1273 Py_XDECREF(mod);
1274 1274 return NULL;
1275 1275 }
1276 1276
1277 1277 static PyObject *index_getitem(indexObject *self, PyObject *value)
1278 1278 {
1279 1279 char *node;
1280 1280 Py_ssize_t nodelen;
1281 1281 int rev;
1282 1282
1283 1283 if (PyInt_Check(value))
1284 1284 return index_get(self, PyInt_AS_LONG(value));
1285 1285
1286 1286 if (node_check(value, &node, &nodelen) == -1)
1287 1287 return NULL;
1288 1288 rev = index_find_node(self, node, nodelen);
1289 1289 if (rev >= -1)
1290 1290 return PyInt_FromLong(rev);
1291 1291 if (rev == -2)
1292 1292 raise_revlog_error();
1293 1293 return NULL;
1294 1294 }
1295 1295
1296 1296 static int nt_partialmatch(indexObject *self, const char *node,
1297 1297 Py_ssize_t nodelen)
1298 1298 {
1299 1299 int rev;
1300 1300
1301 1301 if (nt_init(self) == -1)
1302 1302 return -3;
1303 1303
1304 1304 if (self->ntrev > 0) {
1305 1305 /* ensure that the radix tree is fully populated */
1306 1306 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1307 1307 const char *n = index_node(self, rev);
1308 1308 if (n == NULL)
1309 1309 return -2;
1310 1310 if (nt_insert(self, n, rev) == -1)
1311 1311 return -3;
1312 1312 }
1313 1313 self->ntrev = rev;
1314 1314 }
1315 1315
1316 1316 return nt_find(self, node, nodelen, 1);
1317 1317 }
1318 1318
1319 1319 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1320 1320 {
1321 1321 const char *fullnode;
1322 1322 int nodelen;
1323 1323 char *node;
1324 1324 int rev, i;
1325 1325
1326 1326 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
1327 1327 return NULL;
1328 1328
1329 1329 if (nodelen < 4) {
1330 1330 PyErr_SetString(PyExc_ValueError, "key too short");
1331 1331 return NULL;
1332 1332 }
1333 1333
1334 1334 if (nodelen > 40) {
1335 1335 PyErr_SetString(PyExc_ValueError, "key too long");
1336 1336 return NULL;
1337 1337 }
1338 1338
1339 1339 for (i = 0; i < nodelen; i++)
1340 1340 hexdigit(node, i);
1341 1341 if (PyErr_Occurred()) {
1342 1342 /* input contains non-hex characters */
1343 1343 PyErr_Clear();
1344 1344 Py_RETURN_NONE;
1345 1345 }
1346 1346
1347 1347 rev = nt_partialmatch(self, node, nodelen);
1348 1348
1349 1349 switch (rev) {
1350 1350 case -4:
1351 1351 raise_revlog_error();
1352 1352 case -3:
1353 1353 return NULL;
1354 1354 case -2:
1355 1355 Py_RETURN_NONE;
1356 1356 case -1:
1357 1357 return PyString_FromStringAndSize(nullid, 20);
1358 1358 }
1359 1359
1360 1360 fullnode = index_node(self, rev);
1361 1361 if (fullnode == NULL) {
1362 1362 PyErr_Format(PyExc_IndexError,
1363 1363 "could not access rev %d", rev);
1364 1364 return NULL;
1365 1365 }
1366 1366 return PyString_FromStringAndSize(fullnode, 20);
1367 1367 }
1368 1368
1369 1369 static PyObject *index_m_get(indexObject *self, PyObject *args)
1370 1370 {
1371 1371 Py_ssize_t nodelen;
1372 1372 PyObject *val;
1373 1373 char *node;
1374 1374 int rev;
1375 1375
1376 1376 if (!PyArg_ParseTuple(args, "O", &val))
1377 1377 return NULL;
1378 1378 if (node_check(val, &node, &nodelen) == -1)
1379 1379 return NULL;
1380 1380 rev = index_find_node(self, node, nodelen);
1381 1381 if (rev == -3)
1382 1382 return NULL;
1383 1383 if (rev == -2)
1384 1384 Py_RETURN_NONE;
1385 1385 return PyInt_FromLong(rev);
1386 1386 }
1387 1387
1388 1388 static int index_contains(indexObject *self, PyObject *value)
1389 1389 {
1390 1390 char *node;
1391 1391 Py_ssize_t nodelen;
1392 1392
1393 1393 if (PyInt_Check(value)) {
1394 1394 long rev = PyInt_AS_LONG(value);
1395 1395 return rev >= -1 && rev < index_length(self);
1396 1396 }
1397 1397
1398 1398 if (node_check(value, &node, &nodelen) == -1)
1399 1399 return -1;
1400 1400
1401 1401 switch (index_find_node(self, node, nodelen)) {
1402 1402 case -3:
1403 1403 return -1;
1404 1404 case -2:
1405 1405 return 0;
1406 1406 default:
1407 1407 return 1;
1408 1408 }
1409 1409 }
1410 1410
1411 1411 static inline void index_get_parents(indexObject *self, int rev, int *ps)
1412 1412 {
1413 1413 if (rev >= self->length - 1) {
1414 1414 PyObject *tuple = PyList_GET_ITEM(self->added,
1415 1415 rev - self->length + 1);
1416 1416 ps[0] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 5));
1417 1417 ps[1] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 6));
1418 1418 } else {
1419 1419 const char *data = index_deref(self, rev);
1420 1420 ps[0] = getbe32(data + 24);
1421 1421 ps[1] = getbe32(data + 28);
1422 1422 }
1423 1423 }
1424 1424
1425 1425 typedef uint64_t bitmask;
1426 1426
1427 1427 /*
1428 1428 * Given a disjoint set of revs, return all candidates for the
1429 1429 * greatest common ancestor. In revset notation, this is the set
1430 1430 * "heads(::a and ::b and ...)"
1431 1431 */
1432 1432 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
1433 1433 int revcount)
1434 1434 {
1435 1435 const bitmask allseen = (1ull << revcount) - 1;
1436 1436 const bitmask poison = 1ull << revcount;
1437 1437 PyObject *gca = PyList_New(0);
1438 1438 int i, v, interesting;
1439 1439 int maxrev = -1;
1440 1440 bitmask sp;
1441 1441 bitmask *seen;
1442 1442
1443 1443 if (gca == NULL)
1444 1444 return PyErr_NoMemory();
1445 1445
1446 1446 for (i = 0; i < revcount; i++) {
1447 1447 if (revs[i] > maxrev)
1448 1448 maxrev = revs[i];
1449 1449 }
1450 1450
1451 1451 seen = calloc(sizeof(*seen), maxrev + 1);
1452 1452 if (seen == NULL) {
1453 1453 Py_DECREF(gca);
1454 1454 return PyErr_NoMemory();
1455 1455 }
1456 1456
1457 1457 for (i = 0; i < revcount; i++)
1458 1458 seen[revs[i]] = 1ull << i;
1459 1459
1460 1460 interesting = revcount;
1461 1461
1462 1462 for (v = maxrev; v >= 0 && interesting; v--) {
1463 1463 bitmask sv = seen[v];
1464 1464 int parents[2];
1465 1465
1466 1466 if (!sv)
1467 1467 continue;
1468 1468
1469 1469 if (sv < poison) {
1470 1470 interesting -= 1;
1471 1471 if (sv == allseen) {
1472 1472 PyObject *obj = PyInt_FromLong(v);
1473 1473 if (obj == NULL)
1474 1474 goto bail;
1475 1475 if (PyList_Append(gca, obj) == -1) {
1476 1476 Py_DECREF(obj);
1477 1477 goto bail;
1478 1478 }
1479 1479 sv |= poison;
1480 1480 for (i = 0; i < revcount; i++) {
1481 1481 if (revs[i] == v)
1482 1482 goto done;
1483 1483 }
1484 1484 }
1485 1485 }
1486 1486 index_get_parents(self, v, parents);
1487 1487
1488 1488 for (i = 0; i < 2; i++) {
1489 1489 int p = parents[i];
1490 1490 if (p == -1)
1491 1491 continue;
1492 1492 sp = seen[p];
1493 1493 if (sv < poison) {
1494 1494 if (sp == 0) {
1495 1495 seen[p] = sv;
1496 1496 interesting++;
1497 1497 }
1498 1498 else if (sp != sv)
1499 1499 seen[p] |= sv;
1500 1500 } else {
1501 1501 if (sp && sp < poison)
1502 1502 interesting--;
1503 1503 seen[p] = sv;
1504 1504 }
1505 1505 }
1506 1506 }
1507 1507
1508 1508 done:
1509 1509 free(seen);
1510 1510 return gca;
1511 1511 bail:
1512 1512 free(seen);
1513 1513 Py_XDECREF(gca);
1514 1514 return NULL;
1515 1515 }
1516 1516
1517 1517 /*
1518 1518 * Given a disjoint set of revs, return the subset with the longest
1519 1519 * path to the root.
1520 1520 */
1521 1521 static PyObject *find_deepest(indexObject *self, PyObject *revs)
1522 1522 {
1523 1523 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
1524 1524 static const Py_ssize_t capacity = 24;
1525 1525 int *depth, *interesting = NULL;
1526 1526 int i, j, v, ninteresting;
1527 1527 PyObject *dict = NULL, *keys = NULL;
1528 1528 long *seen = NULL;
1529 1529 int maxrev = -1;
1530 1530 long final;
1531 1531
1532 1532 if (revcount > capacity) {
1533 1533 PyErr_Format(PyExc_OverflowError,
1534 1534 "bitset size (%ld) > capacity (%ld)",
1535 1535 (long)revcount, (long)capacity);
1536 1536 return NULL;
1537 1537 }
1538 1538
1539 1539 for (i = 0; i < revcount; i++) {
1540 1540 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1541 1541 if (n > maxrev)
1542 1542 maxrev = n;
1543 1543 }
1544 1544
1545 1545 depth = calloc(sizeof(*depth), maxrev + 1);
1546 1546 if (depth == NULL)
1547 1547 return PyErr_NoMemory();
1548 1548
1549 1549 seen = calloc(sizeof(*seen), maxrev + 1);
1550 1550 if (seen == NULL) {
1551 1551 PyErr_NoMemory();
1552 1552 goto bail;
1553 1553 }
1554 1554
1555 1555 interesting = calloc(sizeof(*interesting), 2 << revcount);
1556 1556 if (interesting == NULL) {
1557 1557 PyErr_NoMemory();
1558 1558 goto bail;
1559 1559 }
1560 1560
1561 1561 if (PyList_Sort(revs) == -1)
1562 1562 goto bail;
1563 1563
1564 1564 for (i = 0; i < revcount; i++) {
1565 1565 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1566 1566 long b = 1l << i;
1567 1567 depth[n] = 1;
1568 1568 seen[n] = b;
1569 1569 interesting[b] = 1;
1570 1570 }
1571 1571
1572 1572 ninteresting = (int)revcount;
1573 1573
1574 1574 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
1575 1575 int dv = depth[v];
1576 1576 int parents[2];
1577 1577 long sv;
1578 1578
1579 1579 if (dv == 0)
1580 1580 continue;
1581 1581
1582 1582 sv = seen[v];
1583 1583 index_get_parents(self, v, parents);
1584 1584
1585 1585 for (i = 0; i < 2; i++) {
1586 1586 int p = parents[i];
1587 1587 long nsp, sp;
1588 1588 int dp;
1589 1589
1590 1590 if (p == -1)
1591 1591 continue;
1592 1592
1593 1593 dp = depth[p];
1594 1594 nsp = sp = seen[p];
1595 1595 if (dp <= dv) {
1596 1596 depth[p] = dv + 1;
1597 1597 if (sp != sv) {
1598 1598 interesting[sv] += 1;
1599 1599 nsp = seen[p] = sv;
1600 1600 if (sp) {
1601 1601 interesting[sp] -= 1;
1602 1602 if (interesting[sp] == 0)
1603 1603 ninteresting -= 1;
1604 1604 }
1605 1605 }
1606 1606 }
1607 1607 else if (dv == dp - 1) {
1608 1608 nsp = sp | sv;
1609 1609 if (nsp == sp)
1610 1610 continue;
1611 1611 seen[p] = nsp;
1612 1612 interesting[sp] -= 1;
1613 1613 if (interesting[sp] == 0 && interesting[nsp] > 0)
1614 1614 ninteresting -= 1;
1615 1615 interesting[nsp] += 1;
1616 1616 }
1617 1617 }
1618 1618 interesting[sv] -= 1;
1619 1619 if (interesting[sv] == 0)
1620 1620 ninteresting -= 1;
1621 1621 }
1622 1622
1623 1623 final = 0;
1624 1624 j = ninteresting;
1625 1625 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
1626 1626 if (interesting[i] == 0)
1627 1627 continue;
1628 1628 final |= i;
1629 1629 j -= 1;
1630 1630 }
1631 1631 if (final == 0) {
1632 1632 keys = PyList_New(0);
1633 1633 goto bail;
1634 1634 }
1635 1635
1636 1636 dict = PyDict_New();
1637 1637 if (dict == NULL)
1638 1638 goto bail;
1639 1639
1640 1640 for (i = 0; i < revcount; i++) {
1641 1641 PyObject *key;
1642 1642
1643 1643 if ((final & (1 << i)) == 0)
1644 1644 continue;
1645 1645
1646 1646 key = PyList_GET_ITEM(revs, i);
1647 1647 Py_INCREF(key);
1648 1648 Py_INCREF(Py_None);
1649 1649 if (PyDict_SetItem(dict, key, Py_None) == -1) {
1650 1650 Py_DECREF(key);
1651 1651 Py_DECREF(Py_None);
1652 1652 goto bail;
1653 1653 }
1654 1654 }
1655 1655
1656 1656 keys = PyDict_Keys(dict);
1657 1657
1658 1658 bail:
1659 1659 free(depth);
1660 1660 free(seen);
1661 1661 free(interesting);
1662 1662 Py_XDECREF(dict);
1663 1663
1664 1664 return keys;
1665 1665 }
1666 1666
1667 1667 /*
1668 1668 * Given a (possibly overlapping) set of revs, return the greatest
1669 1669 * common ancestors: those with the longest path to the root.
1670 1670 */
1671 1671 static PyObject *index_ancestors(indexObject *self, PyObject *args)
1672 1672 {
1673 1673 PyObject *ret = NULL, *gca = NULL;
1674 1674 Py_ssize_t argcount, i, len;
1675 1675 bitmask repeat = 0;
1676 1676 int revcount = 0;
1677 1677 int *revs;
1678 1678
1679 1679 argcount = PySequence_Length(args);
1680 1680 revs = malloc(argcount * sizeof(*revs));
1681 1681 if (argcount > 0 && revs == NULL)
1682 1682 return PyErr_NoMemory();
1683 1683 len = index_length(self) - 1;
1684 1684
1685 1685 for (i = 0; i < argcount; i++) {
1686 1686 static const int capacity = 24;
1687 1687 PyObject *obj = PySequence_GetItem(args, i);
1688 1688 bitmask x;
1689 1689 long val;
1690 1690
1691 1691 if (!PyInt_Check(obj)) {
1692 1692 PyErr_SetString(PyExc_TypeError,
1693 1693 "arguments must all be ints");
1694 1694 goto bail;
1695 1695 }
1696 1696 val = PyInt_AsLong(obj);
1697 1697 if (val == -1) {
1698 1698 ret = PyList_New(0);
1699 1699 goto done;
1700 1700 }
1701 1701 if (val < 0 || val >= len) {
1702 1702 PyErr_SetString(PyExc_IndexError,
1703 1703 "index out of range");
1704 1704 goto bail;
1705 1705 }
1706 1706 /* this cheesy bloom filter lets us avoid some more
1707 1707 * expensive duplicate checks in the common set-is-disjoint
1708 1708 * case */
1709 1709 x = 1ull << (val & 0x3f);
1710 1710 if (repeat & x) {
1711 1711 int k;
1712 1712 for (k = 0; k < revcount; k++) {
1713 1713 if (val == revs[k])
1714 1714 goto duplicate;
1715 1715 }
1716 1716 }
1717 1717 else repeat |= x;
1718 1718 if (revcount >= capacity) {
1719 1719 PyErr_Format(PyExc_OverflowError,
1720 1720 "bitset size (%d) > capacity (%d)",
1721 1721 revcount, capacity);
1722 1722 goto bail;
1723 1723 }
1724 1724 revs[revcount++] = (int)val;
1725 1725 duplicate:;
1726 1726 }
1727 1727
1728 1728 if (revcount == 0) {
1729 1729 ret = PyList_New(0);
1730 1730 goto done;
1731 1731 }
1732 1732 if (revcount == 1) {
1733 1733 PyObject *obj;
1734 1734 ret = PyList_New(1);
1735 1735 if (ret == NULL)
1736 1736 goto bail;
1737 1737 obj = PyInt_FromLong(revs[0]);
1738 1738 if (obj == NULL)
1739 1739 goto bail;
1740 1740 PyList_SET_ITEM(ret, 0, obj);
1741 1741 goto done;
1742 1742 }
1743 1743
1744 1744 gca = find_gca_candidates(self, revs, revcount);
1745 1745 if (gca == NULL)
1746 1746 goto bail;
1747 1747
1748 1748 if (PyList_GET_SIZE(gca) <= 1) {
1749 1749 ret = gca;
1750 1750 Py_INCREF(gca);
1751 1751 }
1752 1752 else ret = find_deepest(self, gca);
1753 1753
1754 1754 done:
1755 1755 free(revs);
1756 1756 Py_XDECREF(gca);
1757 1757
1758 1758 return ret;
1759 1759
1760 1760 bail:
1761 1761 free(revs);
1762 1762 Py_XDECREF(gca);
1763 1763 Py_XDECREF(ret);
1764 1764 return NULL;
1765 1765 }
1766 1766
1767 1767 /*
1768 1768 * Given a (possibly overlapping) set of revs, return all the
1769 1769 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
1770 1770 */
1771 1771 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
1772 1772 {
1773 1773 PyObject *ret = NULL;
1774 1774 Py_ssize_t argcount, i, len;
1775 1775 bitmask repeat = 0;
1776 1776 int revcount = 0;
1777 1777 int *revs;
1778 1778
1779 1779 argcount = PySequence_Length(args);
1780 1780 revs = malloc(argcount * sizeof(*revs));
1781 1781 if (argcount > 0 && revs == NULL)
1782 1782 return PyErr_NoMemory();
1783 1783 len = index_length(self) - 1;
1784 1784
1785 1785 for (i = 0; i < argcount; i++) {
1786 1786 static const int capacity = 24;
1787 1787 PyObject *obj = PySequence_GetItem(args, i);
1788 1788 bitmask x;
1789 1789 long val;
1790 1790
1791 1791 if (!PyInt_Check(obj)) {
1792 1792 PyErr_SetString(PyExc_TypeError,
1793 1793 "arguments must all be ints");
1794 1794 goto bail;
1795 1795 }
1796 1796 val = PyInt_AsLong(obj);
1797 1797 if (val == -1) {
1798 1798 ret = PyList_New(0);
1799 1799 goto done;
1800 1800 }
1801 1801 if (val < 0 || val >= len) {
1802 1802 PyErr_SetString(PyExc_IndexError,
1803 1803 "index out of range");
1804 1804 goto bail;
1805 1805 }
1806 1806 /* this cheesy bloom filter lets us avoid some more
1807 1807 * expensive duplicate checks in the common set-is-disjoint
1808 1808 * case */
1809 1809 x = 1ull << (val & 0x3f);
1810 1810 if (repeat & x) {
1811 1811 int k;
1812 1812 for (k = 0; k < revcount; k++) {
1813 1813 if (val == revs[k])
1814 1814 goto duplicate;
1815 1815 }
1816 1816 }
1817 1817 else repeat |= x;
1818 1818 if (revcount >= capacity) {
1819 1819 PyErr_Format(PyExc_OverflowError,
1820 1820 "bitset size (%d) > capacity (%d)",
1821 1821 revcount, capacity);
1822 1822 goto bail;
1823 1823 }
1824 1824 revs[revcount++] = (int)val;
1825 1825 duplicate:;
1826 1826 }
1827 1827
1828 1828 if (revcount == 0) {
1829 1829 ret = PyList_New(0);
1830 1830 goto done;
1831 1831 }
1832 1832 if (revcount == 1) {
1833 1833 PyObject *obj;
1834 1834 ret = PyList_New(1);
1835 1835 if (ret == NULL)
1836 1836 goto bail;
1837 1837 obj = PyInt_FromLong(revs[0]);
1838 1838 if (obj == NULL)
1839 1839 goto bail;
1840 1840 PyList_SET_ITEM(ret, 0, obj);
1841 1841 goto done;
1842 1842 }
1843 1843
1844 1844 ret = find_gca_candidates(self, revs, revcount);
1845 1845 if (ret == NULL)
1846 1846 goto bail;
1847 1847
1848 1848 done:
1849 1849 free(revs);
1850 1850 return ret;
1851 1851
1852 1852 bail:
1853 1853 free(revs);
1854 1854 Py_XDECREF(ret);
1855 1855 return NULL;
1856 1856 }
1857 1857
1858 1858 /*
1859 1859 * Invalidate any trie entries introduced by added revs.
1860 1860 */
1861 1861 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
1862 1862 {
1863 1863 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
1864 1864
1865 1865 for (i = start; i < len; i++) {
1866 1866 PyObject *tuple = PyList_GET_ITEM(self->added, i);
1867 1867 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
1868 1868
1869 1869 nt_insert(self, PyString_AS_STRING(node), -1);
1870 1870 }
1871 1871
1872 1872 if (start == 0)
1873 1873 Py_CLEAR(self->added);
1874 1874 }
1875 1875
1876 1876 /*
1877 1877 * Delete a numeric range of revs, which must be at the end of the
1878 1878 * range, but exclude the sentinel nullid entry.
1879 1879 */
1880 1880 static int index_slice_del(indexObject *self, PyObject *item)
1881 1881 {
1882 1882 Py_ssize_t start, stop, step, slicelength;
1883 1883 Py_ssize_t length = index_length(self);
1884 1884 int ret = 0;
1885 1885
1886 1886 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
1887 1887 &start, &stop, &step, &slicelength) < 0)
1888 1888 return -1;
1889 1889
1890 1890 if (slicelength <= 0)
1891 1891 return 0;
1892 1892
1893 1893 if ((step < 0 && start < stop) || (step > 0 && start > stop))
1894 1894 stop = start;
1895 1895
1896 1896 if (step < 0) {
1897 1897 stop = start + 1;
1898 1898 start = stop + step*(slicelength - 1) - 1;
1899 1899 step = -step;
1900 1900 }
1901 1901
1902 1902 if (step != 1) {
1903 1903 PyErr_SetString(PyExc_ValueError,
1904 1904 "revlog index delete requires step size of 1");
1905 1905 return -1;
1906 1906 }
1907 1907
1908 1908 if (stop != length - 1) {
1909 1909 PyErr_SetString(PyExc_IndexError,
1910 1910 "revlog index deletion indices are invalid");
1911 1911 return -1;
1912 1912 }
1913 1913
1914 1914 if (start < self->length - 1) {
1915 1915 if (self->nt) {
1916 1916 Py_ssize_t i;
1917 1917
1918 1918 for (i = start + 1; i < self->length - 1; i++) {
1919 1919 const char *node = index_node(self, i);
1920 1920
1921 1921 if (node)
1922 1922 nt_insert(self, node, -1);
1923 1923 }
1924 1924 if (self->added)
1925 1925 nt_invalidate_added(self, 0);
1926 1926 if (self->ntrev > start)
1927 1927 self->ntrev = (int)start;
1928 1928 }
1929 1929 self->length = start + 1;
1930 1930 if (start < self->raw_length) {
1931 1931 if (self->cache) {
1932 1932 Py_ssize_t i;
1933 1933 for (i = start; i < self->raw_length; i++)
1934 1934 Py_CLEAR(self->cache[i]);
1935 1935 }
1936 1936 self->raw_length = start;
1937 1937 }
1938 1938 goto done;
1939 1939 }
1940 1940
1941 1941 if (self->nt) {
1942 1942 nt_invalidate_added(self, start - self->length + 1);
1943 1943 if (self->ntrev > start)
1944 1944 self->ntrev = (int)start;
1945 1945 }
1946 1946 if (self->added)
1947 1947 ret = PyList_SetSlice(self->added, start - self->length + 1,
1948 1948 PyList_GET_SIZE(self->added), NULL);
1949 1949 done:
1950 1950 Py_CLEAR(self->headrevs);
1951 1951 return ret;
1952 1952 }
1953 1953
1954 1954 /*
1955 1955 * Supported ops:
1956 1956 *
1957 1957 * slice deletion
1958 1958 * string assignment (extend node->rev mapping)
1959 1959 * string deletion (shrink node->rev mapping)
1960 1960 */
1961 1961 static int index_assign_subscript(indexObject *self, PyObject *item,
1962 1962 PyObject *value)
1963 1963 {
1964 1964 char *node;
1965 1965 Py_ssize_t nodelen;
1966 1966 long rev;
1967 1967
1968 1968 if (PySlice_Check(item) && value == NULL)
1969 1969 return index_slice_del(self, item);
1970 1970
1971 1971 if (node_check(item, &node, &nodelen) == -1)
1972 1972 return -1;
1973 1973
1974 1974 if (value == NULL)
1975 1975 return self->nt ? nt_insert(self, node, -1) : 0;
1976 1976 rev = PyInt_AsLong(value);
1977 1977 if (rev > INT_MAX || rev < 0) {
1978 1978 if (!PyErr_Occurred())
1979 1979 PyErr_SetString(PyExc_ValueError, "rev out of range");
1980 1980 return -1;
1981 1981 }
1982 1982
1983 1983 if (nt_init(self) == -1)
1984 1984 return -1;
1985 1985 return nt_insert(self, node, (int)rev);
1986 1986 }
1987 1987
1988 1988 /*
1989 1989 * Find all RevlogNG entries in an index that has inline data. Update
1990 1990 * the optional "offsets" table with those entries.
1991 1991 */
1992 1992 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
1993 1993 {
1994 1994 const char *data = PyString_AS_STRING(self->data);
1995 1995 Py_ssize_t pos = 0;
1996 1996 Py_ssize_t end = PyString_GET_SIZE(self->data);
1997 1997 long incr = v1_hdrsize;
1998 1998 Py_ssize_t len = 0;
1999 1999
2000 2000 while (pos + v1_hdrsize <= end && pos >= 0) {
2001 2001 uint32_t comp_len;
2002 2002 /* 3rd element of header is length of compressed inline data */
2003 2003 comp_len = getbe32(data + pos + 8);
2004 2004 incr = v1_hdrsize + comp_len;
2005 2005 if (offsets)
2006 2006 offsets[len] = data + pos;
2007 2007 len++;
2008 2008 pos += incr;
2009 2009 }
2010 2010
2011 2011 if (pos != end) {
2012 2012 if (!PyErr_Occurred())
2013 2013 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2014 2014 return -1;
2015 2015 }
2016 2016
2017 2017 return len;
2018 2018 }
2019 2019
2020 2020 static int index_init(indexObject *self, PyObject *args)
2021 2021 {
2022 2022 PyObject *data_obj, *inlined_obj;
2023 2023 Py_ssize_t size;
2024 2024
2025 2025 /* Initialize before argument-checking to avoid index_dealloc() crash. */
2026 2026 self->raw_length = 0;
2027 2027 self->added = NULL;
2028 2028 self->cache = NULL;
2029 2029 self->data = NULL;
2030 2030 self->headrevs = NULL;
2031 2031 self->filteredrevs = Py_None;
2032 2032 Py_INCREF(Py_None);
2033 2033 self->nt = NULL;
2034 2034 self->offsets = NULL;
2035 2035
2036 2036 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
2037 2037 return -1;
2038 2038 if (!PyString_Check(data_obj)) {
2039 2039 PyErr_SetString(PyExc_TypeError, "data is not a string");
2040 2040 return -1;
2041 2041 }
2042 2042 size = PyString_GET_SIZE(data_obj);
2043 2043
2044 2044 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
2045 2045 self->data = data_obj;
2046 2046
2047 2047 self->ntlength = self->ntcapacity = 0;
2048 2048 self->ntdepth = self->ntsplits = 0;
2049 2049 self->ntlookups = self->ntmisses = 0;
2050 2050 self->ntrev = -1;
2051 2051 Py_INCREF(self->data);
2052 2052
2053 2053 if (self->inlined) {
2054 2054 Py_ssize_t len = inline_scan(self, NULL);
2055 2055 if (len == -1)
2056 2056 goto bail;
2057 2057 self->raw_length = len;
2058 2058 self->length = len + 1;
2059 2059 } else {
2060 2060 if (size % v1_hdrsize) {
2061 2061 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2062 2062 goto bail;
2063 2063 }
2064 2064 self->raw_length = size / v1_hdrsize;
2065 2065 self->length = self->raw_length + 1;
2066 2066 }
2067 2067
2068 2068 return 0;
2069 2069 bail:
2070 2070 return -1;
2071 2071 }
2072 2072
2073 2073 static PyObject *index_nodemap(indexObject *self)
2074 2074 {
2075 2075 Py_INCREF(self);
2076 2076 return (PyObject *)self;
2077 2077 }
2078 2078
2079 2079 static void index_dealloc(indexObject *self)
2080 2080 {
2081 2081 _index_clearcaches(self);
2082 2082 Py_XDECREF(self->filteredrevs);
2083 2083 Py_XDECREF(self->data);
2084 2084 Py_XDECREF(self->added);
2085 2085 PyObject_Del(self);
2086 2086 }
2087 2087
2088 2088 static PySequenceMethods index_sequence_methods = {
2089 2089 (lenfunc)index_length, /* sq_length */
2090 2090 0, /* sq_concat */
2091 2091 0, /* sq_repeat */
2092 2092 (ssizeargfunc)index_get, /* sq_item */
2093 2093 0, /* sq_slice */
2094 2094 0, /* sq_ass_item */
2095 2095 0, /* sq_ass_slice */
2096 2096 (objobjproc)index_contains, /* sq_contains */
2097 2097 };
2098 2098
2099 2099 static PyMappingMethods index_mapping_methods = {
2100 2100 (lenfunc)index_length, /* mp_length */
2101 2101 (binaryfunc)index_getitem, /* mp_subscript */
2102 2102 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
2103 2103 };
2104 2104
2105 2105 static PyMethodDef index_methods[] = {
2106 2106 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
2107 2107 "return the gca set of the given revs"},
2108 2108 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
2109 2109 METH_VARARGS,
2110 2110 "return the heads of the common ancestors of the given revs"},
2111 2111 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
2112 2112 "clear the index caches"},
2113 2113 {"get", (PyCFunction)index_m_get, METH_VARARGS,
2114 2114 "get an index entry"},
2115 2115 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
2116 2116 "get head revisions"}, /* Can do filtering since 3.2 */
2117 2117 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
2118 2118 "get filtered head revisions"}, /* Can always do filtering */
2119 2119 {"insert", (PyCFunction)index_insert, METH_VARARGS,
2120 2120 "insert an index entry"},
2121 2121 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2122 2122 "match a potentially ambiguous node ID"},
2123 2123 {"stats", (PyCFunction)index_stats, METH_NOARGS,
2124 2124 "stats for the index"},
2125 2125 {NULL} /* Sentinel */
2126 2126 };
2127 2127
2128 2128 static PyGetSetDef index_getset[] = {
2129 2129 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2130 2130 {NULL} /* Sentinel */
2131 2131 };
2132 2132
2133 2133 static PyTypeObject indexType = {
2134 2134 PyObject_HEAD_INIT(NULL)
2135 2135 0, /* ob_size */
2136 2136 "parsers.index", /* tp_name */
2137 2137 sizeof(indexObject), /* tp_basicsize */
2138 2138 0, /* tp_itemsize */
2139 2139 (destructor)index_dealloc, /* tp_dealloc */
2140 2140 0, /* tp_print */
2141 2141 0, /* tp_getattr */
2142 2142 0, /* tp_setattr */
2143 2143 0, /* tp_compare */
2144 2144 0, /* tp_repr */
2145 2145 0, /* tp_as_number */
2146 2146 &index_sequence_methods, /* tp_as_sequence */
2147 2147 &index_mapping_methods, /* tp_as_mapping */
2148 2148 0, /* tp_hash */
2149 2149 0, /* tp_call */
2150 2150 0, /* tp_str */
2151 2151 0, /* tp_getattro */
2152 2152 0, /* tp_setattro */
2153 2153 0, /* tp_as_buffer */
2154 2154 Py_TPFLAGS_DEFAULT, /* tp_flags */
2155 2155 "revlog index", /* tp_doc */
2156 2156 0, /* tp_traverse */
2157 2157 0, /* tp_clear */
2158 2158 0, /* tp_richcompare */
2159 2159 0, /* tp_weaklistoffset */
2160 2160 0, /* tp_iter */
2161 2161 0, /* tp_iternext */
2162 2162 index_methods, /* tp_methods */
2163 2163 0, /* tp_members */
2164 2164 index_getset, /* tp_getset */
2165 2165 0, /* tp_base */
2166 2166 0, /* tp_dict */
2167 2167 0, /* tp_descr_get */
2168 2168 0, /* tp_descr_set */
2169 2169 0, /* tp_dictoffset */
2170 2170 (initproc)index_init, /* tp_init */
2171 2171 0, /* tp_alloc */
2172 2172 };
2173 2173
2174 2174 /*
2175 2175 * returns a tuple of the form (index, index, cache) with elements as
2176 2176 * follows:
2177 2177 *
2178 2178 * index: an index object that lazily parses RevlogNG records
2179 2179 * cache: if data is inlined, a tuple (index_file_content, 0), else None
2180 2180 *
2181 2181 * added complications are for backwards compatibility
2182 2182 */
2183 2183 static PyObject *parse_index2(PyObject *self, PyObject *args)
2184 2184 {
2185 2185 PyObject *tuple = NULL, *cache = NULL;
2186 2186 indexObject *idx;
2187 2187 int ret;
2188 2188
2189 2189 idx = PyObject_New(indexObject, &indexType);
2190 2190 if (idx == NULL)
2191 2191 goto bail;
2192 2192
2193 2193 ret = index_init(idx, args);
2194 2194 if (ret == -1)
2195 2195 goto bail;
2196 2196
2197 2197 if (idx->inlined) {
2198 2198 cache = Py_BuildValue("iO", 0, idx->data);
2199 2199 if (cache == NULL)
2200 2200 goto bail;
2201 2201 } else {
2202 2202 cache = Py_None;
2203 2203 Py_INCREF(cache);
2204 2204 }
2205 2205
2206 2206 tuple = Py_BuildValue("NN", idx, cache);
2207 2207 if (!tuple)
2208 2208 goto bail;
2209 2209 return tuple;
2210 2210
2211 2211 bail:
2212 2212 Py_XDECREF(idx);
2213 2213 Py_XDECREF(cache);
2214 2214 Py_XDECREF(tuple);
2215 2215 return NULL;
2216 2216 }
2217 2217
2218 2218 static char parsers_doc[] = "Efficient content parsing.";
2219 2219
2220 2220 PyObject *encodedir(PyObject *self, PyObject *args);
2221 2221 PyObject *pathencode(PyObject *self, PyObject *args);
2222 2222 PyObject *lowerencode(PyObject *self, PyObject *args);
2223 2223
2224 2224 static PyMethodDef methods[] = {
2225 2225 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
2226 2226 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
2227 2227 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
2228 2228 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
2229 2229 {"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"},
2230 2230 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
2231 2231 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
2232 2232 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
2233 2233 {NULL, NULL}
2234 2234 };
2235 2235
2236 2236 void dirs_module_init(PyObject *mod);
2237 2237
2238 2238 static void module_init(PyObject *mod)
2239 2239 {
2240 2240 /* This module constant has two purposes. First, it lets us unit test
2241 2241 * the ImportError raised without hard-coding any error text. This
2242 2242 * means we can change the text in the future without breaking tests,
2243 2243 * even across changesets without a recompile. Second, its presence
2244 2244 * can be used to determine whether the version-checking logic is
2245 2245 * present, which also helps in testing across changesets without a
2246 2246 * recompile. Note that this means the pure-Python version of parsers
2247 2247 * should not have this module constant. */
2248 2248 PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
2249 2249
2250 2250 dirs_module_init(mod);
2251 2251
2252 2252 indexType.tp_new = PyType_GenericNew;
2253 2253 if (PyType_Ready(&indexType) < 0 ||
2254 2254 PyType_Ready(&dirstateTupleType) < 0)
2255 2255 return;
2256 2256 Py_INCREF(&indexType);
2257 2257 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
2258 2258 Py_INCREF(&dirstateTupleType);
2259 2259 PyModule_AddObject(mod, "dirstatetuple",
2260 2260 (PyObject *)&dirstateTupleType);
2261 2261
2262 2262 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
2263 2263 -1, -1, -1, -1, nullid, 20);
2264 2264 if (nullentry)
2265 2265 PyObject_GC_UnTrack(nullentry);
2266 2266 }
2267 2267
2268 2268 static int check_python_version(void)
2269 2269 {
2270 PyObject *sys = PyImport_ImportModule("sys");
2271 long hexversion = PyInt_AsLong(PyObject_GetAttrString(sys, "hexversion"));
2270 PyObject *sys = PyImport_ImportModule("sys"), *ver;
2271 long hexversion;
2272 if (!sys)
2273 return -1;
2274 ver = PyObject_GetAttrString(sys, "hexversion");
2275 Py_DECREF(sys);
2276 if (!ver)
2277 return -1;
2278 hexversion = PyInt_AsLong(ver);
2279 Py_DECREF(ver);
2272 2280 /* sys.hexversion is a 32-bit number by default, so the -1 case
2273 2281 * should only occur in unusual circumstances (e.g. if sys.hexversion
2274 2282 * is manually set to an invalid value). */
2275 2283 if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
2276 2284 PyErr_Format(PyExc_ImportError, "%s: The Mercurial extension "
2277 2285 "modules were compiled with Python " PY_VERSION ", but "
2278 2286 "Mercurial is currently using Python with sys.hexversion=%ld: "
2279 2287 "Python %s\n at: %s", versionerrortext, hexversion,
2280 2288 Py_GetVersion(), Py_GetProgramFullPath());
2281 2289 return -1;
2282 2290 }
2283 2291 return 0;
2284 2292 }
2285 2293
2286 2294 #ifdef IS_PY3K
2287 2295 static struct PyModuleDef parsers_module = {
2288 2296 PyModuleDef_HEAD_INIT,
2289 2297 "parsers",
2290 2298 parsers_doc,
2291 2299 -1,
2292 2300 methods
2293 2301 };
2294 2302
2295 2303 PyMODINIT_FUNC PyInit_parsers(void)
2296 2304 {
2297 2305 PyObject *mod;
2298 2306
2299 2307 if (check_python_version() == -1)
2300 2308 return;
2301 2309 mod = PyModule_Create(&parsers_module);
2302 2310 module_init(mod);
2303 2311 return mod;
2304 2312 }
2305 2313 #else
2306 2314 PyMODINIT_FUNC initparsers(void)
2307 2315 {
2308 2316 PyObject *mod;
2309 2317
2310 2318 if (check_python_version() == -1)
2311 2319 return;
2312 2320 mod = Py_InitModule3("parsers", methods, parsers_doc);
2313 2321 module_init(mod);
2314 2322 }
2315 2323 #endif
General Comments 0
You need to be logged in to leave comments. Login now