##// END OF EJS Templates
rust: exposing in parsers module...
Georges Racinet -
r40309:3b275f54 default
parent child Browse files
Show More
@@ -1,2313 +1,2469
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #include <Python.h>
11 11 #include <assert.h>
12 12 #include <ctype.h>
13 13 #include <stddef.h>
14 14 #include <string.h>
15 15
16 16 #include "bitmanipulation.h"
17 17 #include "charencode.h"
18 18 #include "util.h"
19 19
20 20 #ifdef IS_PY3K
21 21 /* The mapping of Python types is meant to be temporary to get Python
22 22 * 3 to compile. We should remove this once Python 3 support is fully
23 23 * supported and proper types are used in the extensions themselves. */
24 24 #define PyInt_Check PyLong_Check
25 25 #define PyInt_FromLong PyLong_FromLong
26 26 #define PyInt_FromSsize_t PyLong_FromSsize_t
27 27 #define PyInt_AS_LONG PyLong_AS_LONG
28 28 #define PyInt_AsLong PyLong_AsLong
29 29 #endif
30 30
31 31 typedef struct indexObjectStruct indexObject;
32 32
33 33 typedef struct {
34 34 int children[16];
35 35 } nodetreenode;
36 36
37 37 /*
38 38 * A base-16 trie for fast node->rev mapping.
39 39 *
40 40 * Positive value is index of the next node in the trie
41 41 * Negative value is a leaf: -(rev + 2)
42 42 * Zero is empty
43 43 */
44 44 typedef struct {
45 45 indexObject *index;
46 46 nodetreenode *nodes;
47 47 unsigned length; /* # nodes in use */
48 48 unsigned capacity; /* # nodes allocated */
49 49 int depth; /* maximum depth of tree */
50 50 int splits; /* # splits performed */
51 51 } nodetree;
52 52
53 53 typedef struct {
54 54 PyObject_HEAD
55 55 nodetree nt;
56 56 } nodetreeObject;
57 57
58 58 /*
59 59 * This class has two behaviors.
60 60 *
61 61 * When used in a list-like way (with integer keys), we decode an
62 62 * entry in a RevlogNG index file on demand. Our last entry is a
63 63 * sentinel, always a nullid. We have limited support for
64 64 * integer-keyed insert and delete, only at elements right before the
65 65 * sentinel.
66 66 *
67 67 * With string keys, we lazily perform a reverse mapping from node to
68 68 * rev, using a base-16 trie.
69 69 */
70 70 struct indexObjectStruct {
71 71 PyObject_HEAD
72 72 /* Type-specific fields go here. */
73 73 PyObject *data; /* raw bytes of index */
74 74 Py_buffer buf; /* buffer of data */
75 75 PyObject **cache; /* cached tuples */
76 76 const char **offsets; /* populated on demand */
77 77 Py_ssize_t raw_length; /* original number of elements */
78 78 Py_ssize_t length; /* current number of elements */
79 79 PyObject *added; /* populated on demand */
80 80 PyObject *headrevs; /* cache, invalidated on changes */
81 81 PyObject *filteredrevs;/* filtered revs set */
82 82 nodetree nt; /* base-16 trie */
83 83 int ntinitialized; /* 0 or 1 */
84 84 int ntrev; /* last rev scanned */
85 85 int ntlookups; /* # lookups */
86 86 int ntmisses; /* # lookups that miss the cache */
87 87 int inlined;
88 88 };
89 89
90 90 static Py_ssize_t index_length(const indexObject *self)
91 91 {
92 92 if (self->added == NULL)
93 93 return self->length;
94 94 return self->length + PyList_GET_SIZE(self->added);
95 95 }
96 96
97 97 static PyObject *nullentry = NULL;
98 98 static const char nullid[20] = {0};
99 99
100 100 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
101 101
102 102 #if LONG_MAX == 0x7fffffffL
103 103 static const char *const tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#");
104 104 #else
105 105 static const char *const tuple_format = PY23("kiiiiiis#", "kiiiiiiy#");
106 106 #endif
107 107
108 108 /* A RevlogNG v1 index entry is 64 bytes long. */
109 109 static const long v1_hdrsize = 64;
110 110
111 111 static void raise_revlog_error(void)
112 112 {
113 113 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
114 114
115 115 mod = PyImport_ImportModule("mercurial.error");
116 116 if (mod == NULL) {
117 117 goto cleanup;
118 118 }
119 119
120 120 dict = PyModule_GetDict(mod);
121 121 if (dict == NULL) {
122 122 goto cleanup;
123 123 }
124 124 Py_INCREF(dict);
125 125
126 126 errclass = PyDict_GetItemString(dict, "RevlogError");
127 127 if (errclass == NULL) {
128 128 PyErr_SetString(PyExc_SystemError,
129 129 "could not find RevlogError");
130 130 goto cleanup;
131 131 }
132 132
133 133 /* value of exception is ignored by callers */
134 134 PyErr_SetString(errclass, "RevlogError");
135 135
136 136 cleanup:
137 137 Py_XDECREF(dict);
138 138 Py_XDECREF(mod);
139 139 }
140 140
141 141 /*
142 142 * Return a pointer to the beginning of a RevlogNG record.
143 143 */
144 144 static const char *index_deref(indexObject *self, Py_ssize_t pos)
145 145 {
146 146 if (self->inlined && pos > 0) {
147 147 if (self->offsets == NULL) {
148 148 self->offsets = PyMem_Malloc(self->raw_length *
149 149 sizeof(*self->offsets));
150 150 if (self->offsets == NULL)
151 151 return (const char *)PyErr_NoMemory();
152 152 inline_scan(self, self->offsets);
153 153 }
154 154 return self->offsets[pos];
155 155 }
156 156
157 157 return (const char *)(self->buf.buf) + pos * v1_hdrsize;
158 158 }
159 159
160 160 static inline int index_get_parents(indexObject *self, Py_ssize_t rev,
161 161 int *ps, int maxrev)
162 162 {
163 163 if (rev >= self->length) {
164 164 PyObject *tuple = PyList_GET_ITEM(self->added, rev - self->length);
165 165 ps[0] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 5));
166 166 ps[1] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 6));
167 167 } else {
168 168 const char *data = index_deref(self, rev);
169 169 ps[0] = getbe32(data + 24);
170 170 ps[1] = getbe32(data + 28);
171 171 }
172 172 /* If index file is corrupted, ps[] may point to invalid revisions. So
173 173 * there is a risk of buffer overflow to trust them unconditionally. */
174 174 if (ps[0] > maxrev || ps[1] > maxrev) {
175 175 PyErr_SetString(PyExc_ValueError, "parent out of range");
176 176 return -1;
177 177 }
178 178 return 0;
179 179 }
180 180
181 181
182 182 /*
183 183 * RevlogNG format (all in big endian, data may be inlined):
184 184 * 6 bytes: offset
185 185 * 2 bytes: flags
186 186 * 4 bytes: compressed length
187 187 * 4 bytes: uncompressed length
188 188 * 4 bytes: base revision
189 189 * 4 bytes: link revision
190 190 * 4 bytes: parent 1 revision
191 191 * 4 bytes: parent 2 revision
192 192 * 32 bytes: nodeid (only 20 bytes used)
193 193 */
194 194 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
195 195 {
196 196 uint64_t offset_flags;
197 197 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
198 198 const char *c_node_id;
199 199 const char *data;
200 200 Py_ssize_t length = index_length(self);
201 201 PyObject *entry;
202 202
203 203 if (pos == -1) {
204 204 Py_INCREF(nullentry);
205 205 return nullentry;
206 206 }
207 207
208 208 if (pos < 0 || pos >= length) {
209 209 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
210 210 return NULL;
211 211 }
212 212
213 213 if (pos >= self->length) {
214 214 PyObject *obj;
215 215 obj = PyList_GET_ITEM(self->added, pos - self->length);
216 216 Py_INCREF(obj);
217 217 return obj;
218 218 }
219 219
220 220 if (self->cache) {
221 221 if (self->cache[pos]) {
222 222 Py_INCREF(self->cache[pos]);
223 223 return self->cache[pos];
224 224 }
225 225 } else {
226 226 self->cache = calloc(self->raw_length, sizeof(PyObject *));
227 227 if (self->cache == NULL)
228 228 return PyErr_NoMemory();
229 229 }
230 230
231 231 data = index_deref(self, pos);
232 232 if (data == NULL)
233 233 return NULL;
234 234
235 235 offset_flags = getbe32(data + 4);
236 236 if (pos == 0) /* mask out version number for the first entry */
237 237 offset_flags &= 0xFFFF;
238 238 else {
239 239 uint32_t offset_high = getbe32(data);
240 240 offset_flags |= ((uint64_t)offset_high) << 32;
241 241 }
242 242
243 243 comp_len = getbe32(data + 8);
244 244 uncomp_len = getbe32(data + 12);
245 245 base_rev = getbe32(data + 16);
246 246 link_rev = getbe32(data + 20);
247 247 parent_1 = getbe32(data + 24);
248 248 parent_2 = getbe32(data + 28);
249 249 c_node_id = data + 32;
250 250
251 251 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
252 252 uncomp_len, base_rev, link_rev,
253 253 parent_1, parent_2, c_node_id, 20);
254 254
255 255 if (entry) {
256 256 PyObject_GC_UnTrack(entry);
257 257 Py_INCREF(entry);
258 258 }
259 259
260 260 self->cache[pos] = entry;
261 261
262 262 return entry;
263 263 }
264 264
265 265 /*
266 266 * Return the 20-byte SHA of the node corresponding to the given rev.
267 267 */
268 268 static const char *index_node(indexObject *self, Py_ssize_t pos)
269 269 {
270 270 Py_ssize_t length = index_length(self);
271 271 const char *data;
272 272
273 273 if (pos == -1)
274 274 return nullid;
275 275
276 276 if (pos >= length)
277 277 return NULL;
278 278
279 279 if (pos >= self->length) {
280 280 PyObject *tuple, *str;
281 281 tuple = PyList_GET_ITEM(self->added, pos - self->length);
282 282 str = PyTuple_GetItem(tuple, 7);
283 283 return str ? PyBytes_AS_STRING(str) : NULL;
284 284 }
285 285
286 286 data = index_deref(self, pos);
287 287 return data ? data + 32 : NULL;
288 288 }
289 289
290 290 /*
291 291 * Return the 20-byte SHA of the node corresponding to the given rev. The
292 292 * rev is assumed to be existing. If not, an exception is set.
293 293 */
294 294 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
295 295 {
296 296 const char *node = index_node(self, pos);
297 297 if (node == NULL) {
298 298 PyErr_Format(PyExc_IndexError, "could not access rev %d",
299 299 (int)pos);
300 300 }
301 301 return node;
302 302 }
303 303
304 304 static int nt_insert(nodetree *self, const char *node, int rev);
305 305
306 306 static int node_check(PyObject *obj, char **node)
307 307 {
308 308 Py_ssize_t nodelen;
309 309 if (PyBytes_AsStringAndSize(obj, node, &nodelen) == -1)
310 310 return -1;
311 311 if (nodelen == 20)
312 312 return 0;
313 313 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
314 314 return -1;
315 315 }
316 316
317 317 static PyObject *index_append(indexObject *self, PyObject *obj)
318 318 {
319 319 char *node;
320 320 Py_ssize_t len;
321 321
322 322 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
323 323 PyErr_SetString(PyExc_TypeError, "8-tuple required");
324 324 return NULL;
325 325 }
326 326
327 327 if (node_check(PyTuple_GET_ITEM(obj, 7), &node) == -1)
328 328 return NULL;
329 329
330 330 len = index_length(self);
331 331
332 332 if (self->added == NULL) {
333 333 self->added = PyList_New(0);
334 334 if (self->added == NULL)
335 335 return NULL;
336 336 }
337 337
338 338 if (PyList_Append(self->added, obj) == -1)
339 339 return NULL;
340 340
341 341 if (self->ntinitialized)
342 342 nt_insert(&self->nt, node, (int)len);
343 343
344 344 Py_CLEAR(self->headrevs);
345 345 Py_RETURN_NONE;
346 346 }
347 347
348 348 static PyObject *index_stats(indexObject *self)
349 349 {
350 350 PyObject *obj = PyDict_New();
351 351 PyObject *t = NULL;
352 352
353 353 if (obj == NULL)
354 354 return NULL;
355 355
356 356 #define istat(__n, __d) \
357 357 do { \
358 358 t = PyInt_FromSsize_t(self->__n); \
359 359 if (!t) \
360 360 goto bail; \
361 361 if (PyDict_SetItemString(obj, __d, t) == -1) \
362 362 goto bail; \
363 363 Py_DECREF(t); \
364 364 } while (0)
365 365
366 366 if (self->added) {
367 367 Py_ssize_t len = PyList_GET_SIZE(self->added);
368 368 t = PyInt_FromSsize_t(len);
369 369 if (!t)
370 370 goto bail;
371 371 if (PyDict_SetItemString(obj, "index entries added", t) == -1)
372 372 goto bail;
373 373 Py_DECREF(t);
374 374 }
375 375
376 376 if (self->raw_length != self->length)
377 377 istat(raw_length, "revs on disk");
378 378 istat(length, "revs in memory");
379 379 istat(ntlookups, "node trie lookups");
380 380 istat(ntmisses, "node trie misses");
381 381 istat(ntrev, "node trie last rev scanned");
382 382 if (self->ntinitialized) {
383 383 istat(nt.capacity, "node trie capacity");
384 384 istat(nt.depth, "node trie depth");
385 385 istat(nt.length, "node trie count");
386 386 istat(nt.splits, "node trie splits");
387 387 }
388 388
389 389 #undef istat
390 390
391 391 return obj;
392 392
393 393 bail:
394 394 Py_XDECREF(obj);
395 395 Py_XDECREF(t);
396 396 return NULL;
397 397 }
398 398
399 399 /*
400 400 * When we cache a list, we want to be sure the caller can't mutate
401 401 * the cached copy.
402 402 */
403 403 static PyObject *list_copy(PyObject *list)
404 404 {
405 405 Py_ssize_t len = PyList_GET_SIZE(list);
406 406 PyObject *newlist = PyList_New(len);
407 407 Py_ssize_t i;
408 408
409 409 if (newlist == NULL)
410 410 return NULL;
411 411
412 412 for (i = 0; i < len; i++) {
413 413 PyObject *obj = PyList_GET_ITEM(list, i);
414 414 Py_INCREF(obj);
415 415 PyList_SET_ITEM(newlist, i, obj);
416 416 }
417 417
418 418 return newlist;
419 419 }
420 420
421 421 static int check_filter(PyObject *filter, Py_ssize_t arg)
422 422 {
423 423 if (filter) {
424 424 PyObject *arglist, *result;
425 425 int isfiltered;
426 426
427 427 arglist = Py_BuildValue("(n)", arg);
428 428 if (!arglist) {
429 429 return -1;
430 430 }
431 431
432 432 result = PyEval_CallObject(filter, arglist);
433 433 Py_DECREF(arglist);
434 434 if (!result) {
435 435 return -1;
436 436 }
437 437
438 438 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
439 439 * same as this function, so we can just return it directly.*/
440 440 isfiltered = PyObject_IsTrue(result);
441 441 Py_DECREF(result);
442 442 return isfiltered;
443 443 } else {
444 444 return 0;
445 445 }
446 446 }
447 447
448 448 static Py_ssize_t add_roots_get_min(indexObject *self, PyObject *list,
449 449 Py_ssize_t marker, char *phases)
450 450 {
451 451 PyObject *iter = NULL;
452 452 PyObject *iter_item = NULL;
453 453 Py_ssize_t min_idx = index_length(self) + 2;
454 454 long iter_item_long;
455 455
456 456 if (PyList_GET_SIZE(list) != 0) {
457 457 iter = PyObject_GetIter(list);
458 458 if (iter == NULL)
459 459 return -2;
460 460 while ((iter_item = PyIter_Next(iter))) {
461 461 iter_item_long = PyInt_AS_LONG(iter_item);
462 462 Py_DECREF(iter_item);
463 463 if (iter_item_long < min_idx)
464 464 min_idx = iter_item_long;
465 465 phases[iter_item_long] = (char)marker;
466 466 }
467 467 Py_DECREF(iter);
468 468 }
469 469
470 470 return min_idx;
471 471 }
472 472
473 473 static inline void set_phase_from_parents(char *phases, int parent_1,
474 474 int parent_2, Py_ssize_t i)
475 475 {
476 476 if (parent_1 >= 0 && phases[parent_1] > phases[i])
477 477 phases[i] = phases[parent_1];
478 478 if (parent_2 >= 0 && phases[parent_2] > phases[i])
479 479 phases[i] = phases[parent_2];
480 480 }
481 481
482 482 static PyObject *reachableroots2(indexObject *self, PyObject *args)
483 483 {
484 484
485 485 /* Input */
486 486 long minroot;
487 487 PyObject *includepatharg = NULL;
488 488 int includepath = 0;
489 489 /* heads and roots are lists */
490 490 PyObject *heads = NULL;
491 491 PyObject *roots = NULL;
492 492 PyObject *reachable = NULL;
493 493
494 494 PyObject *val;
495 495 Py_ssize_t len = index_length(self);
496 496 long revnum;
497 497 Py_ssize_t k;
498 498 Py_ssize_t i;
499 499 Py_ssize_t l;
500 500 int r;
501 501 int parents[2];
502 502
503 503 /* Internal data structure:
504 504 * tovisit: array of length len+1 (all revs + nullrev), filled upto lentovisit
505 505 * revstates: array of length len+1 (all revs + nullrev) */
506 506 int *tovisit = NULL;
507 507 long lentovisit = 0;
508 508 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
509 509 char *revstates = NULL;
510 510
511 511 /* Get arguments */
512 512 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
513 513 &PyList_Type, &roots,
514 514 &PyBool_Type, &includepatharg))
515 515 goto bail;
516 516
517 517 if (includepatharg == Py_True)
518 518 includepath = 1;
519 519
520 520 /* Initialize return set */
521 521 reachable = PyList_New(0);
522 522 if (reachable == NULL)
523 523 goto bail;
524 524
525 525 /* Initialize internal datastructures */
526 526 tovisit = (int *)malloc((len + 1) * sizeof(int));
527 527 if (tovisit == NULL) {
528 528 PyErr_NoMemory();
529 529 goto bail;
530 530 }
531 531
532 532 revstates = (char *)calloc(len + 1, 1);
533 533 if (revstates == NULL) {
534 534 PyErr_NoMemory();
535 535 goto bail;
536 536 }
537 537
538 538 l = PyList_GET_SIZE(roots);
539 539 for (i = 0; i < l; i++) {
540 540 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
541 541 if (revnum == -1 && PyErr_Occurred())
542 542 goto bail;
543 543 /* If root is out of range, e.g. wdir(), it must be unreachable
544 544 * from heads. So we can just ignore it. */
545 545 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
546 546 continue;
547 547 revstates[revnum + 1] |= RS_ROOT;
548 548 }
549 549
550 550 /* Populate tovisit with all the heads */
551 551 l = PyList_GET_SIZE(heads);
552 552 for (i = 0; i < l; i++) {
553 553 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
554 554 if (revnum == -1 && PyErr_Occurred())
555 555 goto bail;
556 556 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
557 557 PyErr_SetString(PyExc_IndexError, "head out of range");
558 558 goto bail;
559 559 }
560 560 if (!(revstates[revnum + 1] & RS_SEEN)) {
561 561 tovisit[lentovisit++] = (int)revnum;
562 562 revstates[revnum + 1] |= RS_SEEN;
563 563 }
564 564 }
565 565
566 566 /* Visit the tovisit list and find the reachable roots */
567 567 k = 0;
568 568 while (k < lentovisit) {
569 569 /* Add the node to reachable if it is a root*/
570 570 revnum = tovisit[k++];
571 571 if (revstates[revnum + 1] & RS_ROOT) {
572 572 revstates[revnum + 1] |= RS_REACHABLE;
573 573 val = PyInt_FromLong(revnum);
574 574 if (val == NULL)
575 575 goto bail;
576 576 r = PyList_Append(reachable, val);
577 577 Py_DECREF(val);
578 578 if (r < 0)
579 579 goto bail;
580 580 if (includepath == 0)
581 581 continue;
582 582 }
583 583
584 584 /* Add its parents to the list of nodes to visit */
585 585 if (revnum == -1)
586 586 continue;
587 587 r = index_get_parents(self, revnum, parents, (int)len - 1);
588 588 if (r < 0)
589 589 goto bail;
590 590 for (i = 0; i < 2; i++) {
591 591 if (!(revstates[parents[i] + 1] & RS_SEEN)
592 592 && parents[i] >= minroot) {
593 593 tovisit[lentovisit++] = parents[i];
594 594 revstates[parents[i] + 1] |= RS_SEEN;
595 595 }
596 596 }
597 597 }
598 598
599 599 /* Find all the nodes in between the roots we found and the heads
600 600 * and add them to the reachable set */
601 601 if (includepath == 1) {
602 602 long minidx = minroot;
603 603 if (minidx < 0)
604 604 minidx = 0;
605 605 for (i = minidx; i < len; i++) {
606 606 if (!(revstates[i + 1] & RS_SEEN))
607 607 continue;
608 608 r = index_get_parents(self, i, parents, (int)len - 1);
609 609 /* Corrupted index file, error is set from
610 610 * index_get_parents */
611 611 if (r < 0)
612 612 goto bail;
613 613 if (((revstates[parents[0] + 1] |
614 614 revstates[parents[1] + 1]) & RS_REACHABLE)
615 615 && !(revstates[i + 1] & RS_REACHABLE)) {
616 616 revstates[i + 1] |= RS_REACHABLE;
617 617 val = PyInt_FromSsize_t(i);
618 618 if (val == NULL)
619 619 goto bail;
620 620 r = PyList_Append(reachable, val);
621 621 Py_DECREF(val);
622 622 if (r < 0)
623 623 goto bail;
624 624 }
625 625 }
626 626 }
627 627
628 628 free(revstates);
629 629 free(tovisit);
630 630 return reachable;
631 631 bail:
632 632 Py_XDECREF(reachable);
633 633 free(revstates);
634 634 free(tovisit);
635 635 return NULL;
636 636 }
637 637
638 638 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
639 639 {
640 640 PyObject *roots = Py_None;
641 641 PyObject *ret = NULL;
642 642 PyObject *phasessize = NULL;
643 643 PyObject *phaseroots = NULL;
644 644 PyObject *phaseset = NULL;
645 645 PyObject *phasessetlist = NULL;
646 646 PyObject *rev = NULL;
647 647 Py_ssize_t len = index_length(self);
648 648 Py_ssize_t numphase = 0;
649 649 Py_ssize_t minrevallphases = 0;
650 650 Py_ssize_t minrevphase = 0;
651 651 Py_ssize_t i = 0;
652 652 char *phases = NULL;
653 653 long phase;
654 654
655 655 if (!PyArg_ParseTuple(args, "O", &roots))
656 656 goto done;
657 657 if (roots == NULL || !PyList_Check(roots)) {
658 658 PyErr_SetString(PyExc_TypeError, "roots must be a list");
659 659 goto done;
660 660 }
661 661
662 662 phases = calloc(len, 1); /* phase per rev: {0: public, 1: draft, 2: secret} */
663 663 if (phases == NULL) {
664 664 PyErr_NoMemory();
665 665 goto done;
666 666 }
667 667 /* Put the phase information of all the roots in phases */
668 668 numphase = PyList_GET_SIZE(roots)+1;
669 669 minrevallphases = len + 1;
670 670 phasessetlist = PyList_New(numphase);
671 671 if (phasessetlist == NULL)
672 672 goto done;
673 673
674 674 PyList_SET_ITEM(phasessetlist, 0, Py_None);
675 675 Py_INCREF(Py_None);
676 676
677 677 for (i = 0; i < numphase-1; i++) {
678 678 phaseroots = PyList_GET_ITEM(roots, i);
679 679 phaseset = PySet_New(NULL);
680 680 if (phaseset == NULL)
681 681 goto release;
682 682 PyList_SET_ITEM(phasessetlist, i+1, phaseset);
683 683 if (!PyList_Check(phaseroots)) {
684 684 PyErr_SetString(PyExc_TypeError,
685 685 "roots item must be a list");
686 686 goto release;
687 687 }
688 688 minrevphase = add_roots_get_min(self, phaseroots, i+1, phases);
689 689 if (minrevphase == -2) /* Error from add_roots_get_min */
690 690 goto release;
691 691 minrevallphases = MIN(minrevallphases, minrevphase);
692 692 }
693 693 /* Propagate the phase information from the roots to the revs */
694 694 if (minrevallphases != -1) {
695 695 int parents[2];
696 696 for (i = minrevallphases; i < len; i++) {
697 697 if (index_get_parents(self, i, parents,
698 698 (int)len - 1) < 0)
699 699 goto release;
700 700 set_phase_from_parents(phases, parents[0], parents[1], i);
701 701 }
702 702 }
703 703 /* Transform phase list to a python list */
704 704 phasessize = PyInt_FromSsize_t(len);
705 705 if (phasessize == NULL)
706 706 goto release;
707 707 for (i = 0; i < len; i++) {
708 708 phase = phases[i];
709 709 /* We only store the sets of phase for non public phase, the public phase
710 710 * is computed as a difference */
711 711 if (phase != 0) {
712 712 phaseset = PyList_GET_ITEM(phasessetlist, phase);
713 713 rev = PyInt_FromSsize_t(i);
714 714 if (rev == NULL)
715 715 goto release;
716 716 PySet_Add(phaseset, rev);
717 717 Py_XDECREF(rev);
718 718 }
719 719 }
720 720 ret = PyTuple_Pack(2, phasessize, phasessetlist);
721 721
722 722 release:
723 723 Py_XDECREF(phasessize);
724 724 Py_XDECREF(phasessetlist);
725 725 done:
726 726 free(phases);
727 727 return ret;
728 728 }
729 729
730 730 static PyObject *index_headrevs(indexObject *self, PyObject *args)
731 731 {
732 732 Py_ssize_t i, j, len;
733 733 char *nothead = NULL;
734 734 PyObject *heads = NULL;
735 735 PyObject *filter = NULL;
736 736 PyObject *filteredrevs = Py_None;
737 737
738 738 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
739 739 return NULL;
740 740 }
741 741
742 742 if (self->headrevs && filteredrevs == self->filteredrevs)
743 743 return list_copy(self->headrevs);
744 744
745 745 Py_DECREF(self->filteredrevs);
746 746 self->filteredrevs = filteredrevs;
747 747 Py_INCREF(filteredrevs);
748 748
749 749 if (filteredrevs != Py_None) {
750 750 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
751 751 if (!filter) {
752 752 PyErr_SetString(PyExc_TypeError,
753 753 "filteredrevs has no attribute __contains__");
754 754 goto bail;
755 755 }
756 756 }
757 757
758 758 len = index_length(self);
759 759 heads = PyList_New(0);
760 760 if (heads == NULL)
761 761 goto bail;
762 762 if (len == 0) {
763 763 PyObject *nullid = PyInt_FromLong(-1);
764 764 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
765 765 Py_XDECREF(nullid);
766 766 goto bail;
767 767 }
768 768 goto done;
769 769 }
770 770
771 771 nothead = calloc(len, 1);
772 772 if (nothead == NULL) {
773 773 PyErr_NoMemory();
774 774 goto bail;
775 775 }
776 776
777 777 for (i = len - 1; i >= 0; i--) {
778 778 int isfiltered;
779 779 int parents[2];
780 780
781 781 /* If nothead[i] == 1, it means we've seen an unfiltered child of this
782 782 * node already, and therefore this node is not filtered. So we can skip
783 783 * the expensive check_filter step.
784 784 */
785 785 if (nothead[i] != 1) {
786 786 isfiltered = check_filter(filter, i);
787 787 if (isfiltered == -1) {
788 788 PyErr_SetString(PyExc_TypeError,
789 789 "unable to check filter");
790 790 goto bail;
791 791 }
792 792
793 793 if (isfiltered) {
794 794 nothead[i] = 1;
795 795 continue;
796 796 }
797 797 }
798 798
799 799 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
800 800 goto bail;
801 801 for (j = 0; j < 2; j++) {
802 802 if (parents[j] >= 0)
803 803 nothead[parents[j]] = 1;
804 804 }
805 805 }
806 806
807 807 for (i = 0; i < len; i++) {
808 808 PyObject *head;
809 809
810 810 if (nothead[i])
811 811 continue;
812 812 head = PyInt_FromSsize_t(i);
813 813 if (head == NULL || PyList_Append(heads, head) == -1) {
814 814 Py_XDECREF(head);
815 815 goto bail;
816 816 }
817 817 }
818 818
819 819 done:
820 820 self->headrevs = heads;
821 821 Py_XDECREF(filter);
822 822 free(nothead);
823 823 return list_copy(self->headrevs);
824 824 bail:
825 825 Py_XDECREF(filter);
826 826 Py_XDECREF(heads);
827 827 free(nothead);
828 828 return NULL;
829 829 }
830 830
831 831 /**
832 832 * Obtain the base revision index entry.
833 833 *
834 834 * Callers must ensure that rev >= 0 or illegal memory access may occur.
835 835 */
836 836 static inline int index_baserev(indexObject *self, int rev)
837 837 {
838 838 const char *data;
839 839
840 840 if (rev >= self->length) {
841 841 PyObject *tuple = PyList_GET_ITEM(self->added, rev - self->length);
842 842 return (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 3));
843 843 }
844 844 else {
845 845 data = index_deref(self, rev);
846 846 if (data == NULL) {
847 847 return -2;
848 848 }
849 849
850 850 return getbe32(data + 16);
851 851 }
852 852 }
853 853
854 854 static PyObject *index_deltachain(indexObject *self, PyObject *args)
855 855 {
856 856 int rev, generaldelta;
857 857 PyObject *stoparg;
858 858 int stoprev, iterrev, baserev = -1;
859 859 int stopped;
860 860 PyObject *chain = NULL, *result = NULL;
861 861 const Py_ssize_t length = index_length(self);
862 862
863 863 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
864 864 return NULL;
865 865 }
866 866
867 867 if (PyInt_Check(stoparg)) {
868 868 stoprev = (int)PyInt_AsLong(stoparg);
869 869 if (stoprev == -1 && PyErr_Occurred()) {
870 870 return NULL;
871 871 }
872 872 }
873 873 else if (stoparg == Py_None) {
874 874 stoprev = -2;
875 875 }
876 876 else {
877 877 PyErr_SetString(PyExc_ValueError,
878 878 "stoprev must be integer or None");
879 879 return NULL;
880 880 }
881 881
882 882 if (rev < 0 || rev >= length) {
883 883 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
884 884 return NULL;
885 885 }
886 886
887 887 chain = PyList_New(0);
888 888 if (chain == NULL) {
889 889 return NULL;
890 890 }
891 891
892 892 baserev = index_baserev(self, rev);
893 893
894 894 /* This should never happen. */
895 895 if (baserev <= -2) {
896 896 /* Error should be set by index_deref() */
897 897 assert(PyErr_Occurred());
898 898 goto bail;
899 899 }
900 900
901 901 iterrev = rev;
902 902
903 903 while (iterrev != baserev && iterrev != stoprev) {
904 904 PyObject *value = PyInt_FromLong(iterrev);
905 905 if (value == NULL) {
906 906 goto bail;
907 907 }
908 908 if (PyList_Append(chain, value)) {
909 909 Py_DECREF(value);
910 910 goto bail;
911 911 }
912 912 Py_DECREF(value);
913 913
914 914 if (generaldelta) {
915 915 iterrev = baserev;
916 916 }
917 917 else {
918 918 iterrev--;
919 919 }
920 920
921 921 if (iterrev < 0) {
922 922 break;
923 923 }
924 924
925 925 if (iterrev >= length) {
926 926 PyErr_SetString(PyExc_IndexError, "revision outside index");
927 927 return NULL;
928 928 }
929 929
930 930 baserev = index_baserev(self, iterrev);
931 931
932 932 /* This should never happen. */
933 933 if (baserev <= -2) {
934 934 /* Error should be set by index_deref() */
935 935 assert(PyErr_Occurred());
936 936 goto bail;
937 937 }
938 938 }
939 939
940 940 if (iterrev == stoprev) {
941 941 stopped = 1;
942 942 }
943 943 else {
944 944 PyObject *value = PyInt_FromLong(iterrev);
945 945 if (value == NULL) {
946 946 goto bail;
947 947 }
948 948 if (PyList_Append(chain, value)) {
949 949 Py_DECREF(value);
950 950 goto bail;
951 951 }
952 952 Py_DECREF(value);
953 953
954 954 stopped = 0;
955 955 }
956 956
957 957 if (PyList_Reverse(chain)) {
958 958 goto bail;
959 959 }
960 960
961 961 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
962 962 Py_DECREF(chain);
963 963 return result;
964 964
965 965 bail:
966 966 Py_DECREF(chain);
967 967 return NULL;
968 968 }
969 969
970 970 static inline int nt_level(const char *node, Py_ssize_t level)
971 971 {
972 972 int v = node[level>>1];
973 973 if (!(level & 1))
974 974 v >>= 4;
975 975 return v & 0xf;
976 976 }
977 977
978 978 /*
979 979 * Return values:
980 980 *
981 981 * -4: match is ambiguous (multiple candidates)
982 982 * -2: not found
983 983 * rest: valid rev
984 984 */
985 985 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
986 986 int hex)
987 987 {
988 988 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
989 989 int level, maxlevel, off;
990 990
991 991 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
992 992 return -1;
993 993
994 994 if (hex)
995 995 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
996 996 else
997 997 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
998 998
999 999 for (level = off = 0; level < maxlevel; level++) {
1000 1000 int k = getnybble(node, level);
1001 1001 nodetreenode *n = &self->nodes[off];
1002 1002 int v = n->children[k];
1003 1003
1004 1004 if (v < 0) {
1005 1005 const char *n;
1006 1006 Py_ssize_t i;
1007 1007
1008 1008 v = -(v + 2);
1009 1009 n = index_node(self->index, v);
1010 1010 if (n == NULL)
1011 1011 return -2;
1012 1012 for (i = level; i < maxlevel; i++)
1013 1013 if (getnybble(node, i) != nt_level(n, i))
1014 1014 return -2;
1015 1015 return v;
1016 1016 }
1017 1017 if (v == 0)
1018 1018 return -2;
1019 1019 off = v;
1020 1020 }
1021 1021 /* multiple matches against an ambiguous prefix */
1022 1022 return -4;
1023 1023 }
1024 1024
1025 1025 static int nt_new(nodetree *self)
1026 1026 {
1027 1027 if (self->length == self->capacity) {
1028 1028 unsigned newcapacity;
1029 1029 nodetreenode *newnodes;
1030 1030 newcapacity = self->capacity * 2;
1031 1031 if (newcapacity >= INT_MAX / sizeof(nodetreenode)) {
1032 1032 PyErr_SetString(PyExc_MemoryError, "overflow in nt_new");
1033 1033 return -1;
1034 1034 }
1035 1035 newnodes = realloc(self->nodes, newcapacity * sizeof(nodetreenode));
1036 1036 if (newnodes == NULL) {
1037 1037 PyErr_SetString(PyExc_MemoryError, "out of memory");
1038 1038 return -1;
1039 1039 }
1040 1040 self->capacity = newcapacity;
1041 1041 self->nodes = newnodes;
1042 1042 memset(&self->nodes[self->length], 0,
1043 1043 sizeof(nodetreenode) * (self->capacity - self->length));
1044 1044 }
1045 1045 return self->length++;
1046 1046 }
1047 1047
1048 1048 static int nt_insert(nodetree *self, const char *node, int rev)
1049 1049 {
1050 1050 int level = 0;
1051 1051 int off = 0;
1052 1052
1053 1053 while (level < 40) {
1054 1054 int k = nt_level(node, level);
1055 1055 nodetreenode *n;
1056 1056 int v;
1057 1057
1058 1058 n = &self->nodes[off];
1059 1059 v = n->children[k];
1060 1060
1061 1061 if (v == 0) {
1062 1062 n->children[k] = -rev - 2;
1063 1063 return 0;
1064 1064 }
1065 1065 if (v < 0) {
1066 1066 const char *oldnode = index_node_existing(self->index, -(v + 2));
1067 1067 int noff;
1068 1068
1069 1069 if (oldnode == NULL)
1070 1070 return -1;
1071 1071 if (!memcmp(oldnode, node, 20)) {
1072 1072 n->children[k] = -rev - 2;
1073 1073 return 0;
1074 1074 }
1075 1075 noff = nt_new(self);
1076 1076 if (noff == -1)
1077 1077 return -1;
1078 1078 /* self->nodes may have been changed by realloc */
1079 1079 self->nodes[off].children[k] = noff;
1080 1080 off = noff;
1081 1081 n = &self->nodes[off];
1082 1082 n->children[nt_level(oldnode, ++level)] = v;
1083 1083 if (level > self->depth)
1084 1084 self->depth = level;
1085 1085 self->splits += 1;
1086 1086 } else {
1087 1087 level += 1;
1088 1088 off = v;
1089 1089 }
1090 1090 }
1091 1091
1092 1092 return -1;
1093 1093 }
1094 1094
1095 1095 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
1096 1096 {
1097 1097 Py_ssize_t rev;
1098 1098 const char *node;
1099 1099 Py_ssize_t length;
1100 1100 if (!PyArg_ParseTuple(args, "n", &rev))
1101 1101 return NULL;
1102 1102 length = index_length(self->nt.index);
1103 1103 if (rev < 0 || rev >= length) {
1104 1104 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1105 1105 return NULL;
1106 1106 }
1107 1107 node = index_node_existing(self->nt.index, rev);
1108 1108 if (nt_insert(&self->nt, node, (int)rev) == -1)
1109 1109 return NULL;
1110 1110 Py_RETURN_NONE;
1111 1111 }
1112 1112
1113 1113 static int nt_delete_node(nodetree *self, const char *node)
1114 1114 {
1115 1115 /* rev==-2 happens to get encoded as 0, which is interpreted as not set */
1116 1116 return nt_insert(self, node, -2);
1117 1117 }
1118 1118
1119 1119 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
1120 1120 {
1121 1121 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
1122 1122 self->nodes = NULL;
1123 1123
1124 1124 self->index = index;
1125 1125 /* The input capacity is in terms of revisions, while the field is in
1126 1126 * terms of nodetree nodes. */
1127 1127 self->capacity = (capacity < 4 ? 4 : capacity / 2);
1128 1128 self->depth = 0;
1129 1129 self->splits = 0;
1130 1130 if ((size_t)self->capacity > INT_MAX / sizeof(nodetreenode)) {
1131 1131 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
1132 1132 return -1;
1133 1133 }
1134 1134 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
1135 1135 if (self->nodes == NULL) {
1136 1136 PyErr_NoMemory();
1137 1137 return -1;
1138 1138 }
1139 1139 self->length = 1;
1140 1140 return 0;
1141 1141 }
1142 1142
1143 1143 static PyTypeObject indexType;
1144 1144
1145 1145 static int ntobj_init(nodetreeObject *self, PyObject *args)
1146 1146 {
1147 1147 PyObject *index;
1148 1148 unsigned capacity;
1149 1149 if (!PyArg_ParseTuple(args, "O!I", &indexType, &index, &capacity))
1150 1150 return -1;
1151 1151 Py_INCREF(index);
1152 1152 return nt_init(&self->nt, (indexObject*)index, capacity);
1153 1153 }
1154 1154
1155 1155 static int nt_partialmatch(nodetree *self, const char *node,
1156 1156 Py_ssize_t nodelen)
1157 1157 {
1158 1158 return nt_find(self, node, nodelen, 1);
1159 1159 }
1160 1160
1161 1161 /*
1162 1162 * Find the length of the shortest unique prefix of node.
1163 1163 *
1164 1164 * Return values:
1165 1165 *
1166 1166 * -3: error (exception set)
1167 1167 * -2: not found (no exception set)
1168 1168 * rest: length of shortest prefix
1169 1169 */
1170 1170 static int nt_shortest(nodetree *self, const char *node)
1171 1171 {
1172 1172 int level, off;
1173 1173
1174 1174 for (level = off = 0; level < 40; level++) {
1175 1175 int k, v;
1176 1176 nodetreenode *n = &self->nodes[off];
1177 1177 k = nt_level(node, level);
1178 1178 v = n->children[k];
1179 1179 if (v < 0) {
1180 1180 const char *n;
1181 1181 v = -(v + 2);
1182 1182 n = index_node_existing(self->index, v);
1183 1183 if (n == NULL)
1184 1184 return -3;
1185 1185 if (memcmp(node, n, 20) != 0)
1186 1186 /*
1187 1187 * Found a unique prefix, but it wasn't for the
1188 1188 * requested node (i.e the requested node does
1189 1189 * not exist).
1190 1190 */
1191 1191 return -2;
1192 1192 return level + 1;
1193 1193 }
1194 1194 if (v == 0)
1195 1195 return -2;
1196 1196 off = v;
1197 1197 }
1198 1198 /*
1199 1199 * The node was still not unique after 40 hex digits, so this won't
1200 1200 * happen. Also, if we get here, then there's a programming error in
1201 1201 * this file that made us insert a node longer than 40 hex digits.
1202 1202 */
1203 1203 PyErr_SetString(PyExc_Exception, "broken node tree");
1204 1204 return -3;
1205 1205 }
1206 1206
1207 1207 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
1208 1208 {
1209 1209 PyObject *val;
1210 1210 char *node;
1211 1211 int length;
1212 1212
1213 1213 if (!PyArg_ParseTuple(args, "O", &val))
1214 1214 return NULL;
1215 1215 if (node_check(val, &node) == -1)
1216 1216 return NULL;
1217 1217
1218 1218 length = nt_shortest(&self->nt, node);
1219 1219 if (length == -3)
1220 1220 return NULL;
1221 1221 if (length == -2) {
1222 1222 raise_revlog_error();
1223 1223 return NULL;
1224 1224 }
1225 1225 return PyInt_FromLong(length);
1226 1226 }
1227 1227
1228 1228 static void nt_dealloc(nodetree *self)
1229 1229 {
1230 1230 free(self->nodes);
1231 1231 self->nodes = NULL;
1232 1232 }
1233 1233
1234 1234 static void ntobj_dealloc(nodetreeObject *self)
1235 1235 {
1236 1236 Py_XDECREF(self->nt.index);
1237 1237 nt_dealloc(&self->nt);
1238 1238 PyObject_Del(self);
1239 1239 }
1240 1240
1241 1241 static PyMethodDef ntobj_methods[] = {
1242 1242 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
1243 1243 "insert an index entry"},
1244 1244 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
1245 1245 "find length of shortest hex nodeid of a binary ID"},
1246 1246 {NULL} /* Sentinel */
1247 1247 };
1248 1248
1249 1249 static PyTypeObject nodetreeType = {
1250 1250 PyVarObject_HEAD_INIT(NULL, 0) /* header */
1251 1251 "parsers.nodetree", /* tp_name */
1252 1252 sizeof(nodetreeObject) , /* tp_basicsize */
1253 1253 0, /* tp_itemsize */
1254 1254 (destructor)ntobj_dealloc, /* tp_dealloc */
1255 1255 0, /* tp_print */
1256 1256 0, /* tp_getattr */
1257 1257 0, /* tp_setattr */
1258 1258 0, /* tp_compare */
1259 1259 0, /* tp_repr */
1260 1260 0, /* tp_as_number */
1261 1261 0, /* tp_as_sequence */
1262 1262 0, /* tp_as_mapping */
1263 1263 0, /* tp_hash */
1264 1264 0, /* tp_call */
1265 1265 0, /* tp_str */
1266 1266 0, /* tp_getattro */
1267 1267 0, /* tp_setattro */
1268 1268 0, /* tp_as_buffer */
1269 1269 Py_TPFLAGS_DEFAULT, /* tp_flags */
1270 1270 "nodetree", /* tp_doc */
1271 1271 0, /* tp_traverse */
1272 1272 0, /* tp_clear */
1273 1273 0, /* tp_richcompare */
1274 1274 0, /* tp_weaklistoffset */
1275 1275 0, /* tp_iter */
1276 1276 0, /* tp_iternext */
1277 1277 ntobj_methods, /* tp_methods */
1278 1278 0, /* tp_members */
1279 1279 0, /* tp_getset */
1280 1280 0, /* tp_base */
1281 1281 0, /* tp_dict */
1282 1282 0, /* tp_descr_get */
1283 1283 0, /* tp_descr_set */
1284 1284 0, /* tp_dictoffset */
1285 1285 (initproc)ntobj_init, /* tp_init */
1286 1286 0, /* tp_alloc */
1287 1287 };
1288 1288
1289 1289 static int index_init_nt(indexObject *self)
1290 1290 {
1291 1291 if (!self->ntinitialized) {
1292 1292 if (nt_init(&self->nt, self, (int)self->raw_length) == -1) {
1293 1293 nt_dealloc(&self->nt);
1294 1294 return -1;
1295 1295 }
1296 1296 if (nt_insert(&self->nt, nullid, -1) == -1) {
1297 1297 nt_dealloc(&self->nt);
1298 1298 return -1;
1299 1299 }
1300 1300 self->ntinitialized = 1;
1301 1301 self->ntrev = (int)index_length(self);
1302 1302 self->ntlookups = 1;
1303 1303 self->ntmisses = 0;
1304 1304 }
1305 1305 return 0;
1306 1306 }
1307 1307
1308 1308 /*
1309 1309 * Return values:
1310 1310 *
1311 1311 * -3: error (exception set)
1312 1312 * -2: not found (no exception set)
1313 1313 * rest: valid rev
1314 1314 */
1315 1315 static int index_find_node(indexObject *self,
1316 1316 const char *node, Py_ssize_t nodelen)
1317 1317 {
1318 1318 int rev;
1319 1319
1320 1320 if (index_init_nt(self) == -1)
1321 1321 return -3;
1322 1322
1323 1323 self->ntlookups++;
1324 1324 rev = nt_find(&self->nt, node, nodelen, 0);
1325 1325 if (rev >= -1)
1326 1326 return rev;
1327 1327
1328 1328 /*
1329 1329 * For the first handful of lookups, we scan the entire index,
1330 1330 * and cache only the matching nodes. This optimizes for cases
1331 1331 * like "hg tip", where only a few nodes are accessed.
1332 1332 *
1333 1333 * After that, we cache every node we visit, using a single
1334 1334 * scan amortized over multiple lookups. This gives the best
1335 1335 * bulk performance, e.g. for "hg log".
1336 1336 */
1337 1337 if (self->ntmisses++ < 4) {
1338 1338 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1339 1339 const char *n = index_node_existing(self, rev);
1340 1340 if (n == NULL)
1341 1341 return -3;
1342 1342 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1343 1343 if (nt_insert(&self->nt, n, rev) == -1)
1344 1344 return -3;
1345 1345 break;
1346 1346 }
1347 1347 }
1348 1348 } else {
1349 1349 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1350 1350 const char *n = index_node_existing(self, rev);
1351 1351 if (n == NULL)
1352 1352 return -3;
1353 1353 if (nt_insert(&self->nt, n, rev) == -1) {
1354 1354 self->ntrev = rev + 1;
1355 1355 return -3;
1356 1356 }
1357 1357 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1358 1358 break;
1359 1359 }
1360 1360 }
1361 1361 self->ntrev = rev;
1362 1362 }
1363 1363
1364 1364 if (rev >= 0)
1365 1365 return rev;
1366 1366 return -2;
1367 1367 }
1368 1368
1369 1369 static PyObject *index_getitem(indexObject *self, PyObject *value)
1370 1370 {
1371 1371 char *node;
1372 1372 int rev;
1373 1373
1374 1374 if (PyInt_Check(value))
1375 1375 return index_get(self, PyInt_AS_LONG(value));
1376 1376
1377 1377 if (node_check(value, &node) == -1)
1378 1378 return NULL;
1379 1379 rev = index_find_node(self, node, 20);
1380 1380 if (rev >= -1)
1381 1381 return PyInt_FromLong(rev);
1382 1382 if (rev == -2)
1383 1383 raise_revlog_error();
1384 1384 return NULL;
1385 1385 }
1386 1386
1387 1387 /*
1388 1388 * Fully populate the radix tree.
1389 1389 */
1390 1390 static int index_populate_nt(indexObject *self) {
1391 1391 int rev;
1392 1392 if (self->ntrev > 0) {
1393 1393 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1394 1394 const char *n = index_node_existing(self, rev);
1395 1395 if (n == NULL)
1396 1396 return -1;
1397 1397 if (nt_insert(&self->nt, n, rev) == -1)
1398 1398 return -1;
1399 1399 }
1400 1400 self->ntrev = -1;
1401 1401 }
1402 1402 return 0;
1403 1403 }
1404 1404
1405 1405 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1406 1406 {
1407 1407 const char *fullnode;
1408 1408 int nodelen;
1409 1409 char *node;
1410 1410 int rev, i;
1411 1411
1412 1412 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
1413 1413 return NULL;
1414 1414
1415 1415 if (nodelen < 1) {
1416 1416 PyErr_SetString(PyExc_ValueError, "key too short");
1417 1417 return NULL;
1418 1418 }
1419 1419
1420 1420 if (nodelen > 40) {
1421 1421 PyErr_SetString(PyExc_ValueError, "key too long");
1422 1422 return NULL;
1423 1423 }
1424 1424
1425 1425 for (i = 0; i < nodelen; i++)
1426 1426 hexdigit(node, i);
1427 1427 if (PyErr_Occurred()) {
1428 1428 /* input contains non-hex characters */
1429 1429 PyErr_Clear();
1430 1430 Py_RETURN_NONE;
1431 1431 }
1432 1432
1433 1433 if (index_init_nt(self) == -1)
1434 1434 return NULL;
1435 1435 if (index_populate_nt(self) == -1)
1436 1436 return NULL;
1437 1437 rev = nt_partialmatch(&self->nt, node, nodelen);
1438 1438
1439 1439 switch (rev) {
1440 1440 case -4:
1441 1441 raise_revlog_error();
1442 1442 return NULL;
1443 1443 case -2:
1444 1444 Py_RETURN_NONE;
1445 1445 case -1:
1446 1446 return PyBytes_FromStringAndSize(nullid, 20);
1447 1447 }
1448 1448
1449 1449 fullnode = index_node_existing(self, rev);
1450 1450 if (fullnode == NULL) {
1451 1451 return NULL;
1452 1452 }
1453 1453 return PyBytes_FromStringAndSize(fullnode, 20);
1454 1454 }
1455 1455
1456 1456 static PyObject *index_shortest(indexObject *self, PyObject *args)
1457 1457 {
1458 1458 PyObject *val;
1459 1459 char *node;
1460 1460 int length;
1461 1461
1462 1462 if (!PyArg_ParseTuple(args, "O", &val))
1463 1463 return NULL;
1464 1464 if (node_check(val, &node) == -1)
1465 1465 return NULL;
1466 1466
1467 1467 self->ntlookups++;
1468 1468 if (index_init_nt(self) == -1)
1469 1469 return NULL;
1470 1470 if (index_populate_nt(self) == -1)
1471 1471 return NULL;
1472 1472 length = nt_shortest(&self->nt, node);
1473 1473 if (length == -3)
1474 1474 return NULL;
1475 1475 if (length == -2) {
1476 1476 raise_revlog_error();
1477 1477 return NULL;
1478 1478 }
1479 1479 return PyInt_FromLong(length);
1480 1480 }
1481 1481
1482 1482 static PyObject *index_m_get(indexObject *self, PyObject *args)
1483 1483 {
1484 1484 PyObject *val;
1485 1485 char *node;
1486 1486 int rev;
1487 1487
1488 1488 if (!PyArg_ParseTuple(args, "O", &val))
1489 1489 return NULL;
1490 1490 if (node_check(val, &node) == -1)
1491 1491 return NULL;
1492 1492 rev = index_find_node(self, node, 20);
1493 1493 if (rev == -3)
1494 1494 return NULL;
1495 1495 if (rev == -2)
1496 1496 Py_RETURN_NONE;
1497 1497 return PyInt_FromLong(rev);
1498 1498 }
1499 1499
1500 1500 static int index_contains(indexObject *self, PyObject *value)
1501 1501 {
1502 1502 char *node;
1503 1503
1504 1504 if (PyInt_Check(value)) {
1505 1505 long rev = PyInt_AS_LONG(value);
1506 1506 return rev >= -1 && rev < index_length(self);
1507 1507 }
1508 1508
1509 1509 if (node_check(value, &node) == -1)
1510 1510 return -1;
1511 1511
1512 1512 switch (index_find_node(self, node, 20)) {
1513 1513 case -3:
1514 1514 return -1;
1515 1515 case -2:
1516 1516 return 0;
1517 1517 default:
1518 1518 return 1;
1519 1519 }
1520 1520 }
1521 1521
1522 1522 typedef uint64_t bitmask;
1523 1523
1524 1524 /*
1525 1525 * Given a disjoint set of revs, return all candidates for the
1526 1526 * greatest common ancestor. In revset notation, this is the set
1527 1527 * "heads(::a and ::b and ...)"
1528 1528 */
1529 1529 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
1530 1530 int revcount)
1531 1531 {
1532 1532 const bitmask allseen = (1ull << revcount) - 1;
1533 1533 const bitmask poison = 1ull << revcount;
1534 1534 PyObject *gca = PyList_New(0);
1535 1535 int i, v, interesting;
1536 1536 int maxrev = -1;
1537 1537 bitmask sp;
1538 1538 bitmask *seen;
1539 1539
1540 1540 if (gca == NULL)
1541 1541 return PyErr_NoMemory();
1542 1542
1543 1543 for (i = 0; i < revcount; i++) {
1544 1544 if (revs[i] > maxrev)
1545 1545 maxrev = revs[i];
1546 1546 }
1547 1547
1548 1548 seen = calloc(sizeof(*seen), maxrev + 1);
1549 1549 if (seen == NULL) {
1550 1550 Py_DECREF(gca);
1551 1551 return PyErr_NoMemory();
1552 1552 }
1553 1553
1554 1554 for (i = 0; i < revcount; i++)
1555 1555 seen[revs[i]] = 1ull << i;
1556 1556
1557 1557 interesting = revcount;
1558 1558
1559 1559 for (v = maxrev; v >= 0 && interesting; v--) {
1560 1560 bitmask sv = seen[v];
1561 1561 int parents[2];
1562 1562
1563 1563 if (!sv)
1564 1564 continue;
1565 1565
1566 1566 if (sv < poison) {
1567 1567 interesting -= 1;
1568 1568 if (sv == allseen) {
1569 1569 PyObject *obj = PyInt_FromLong(v);
1570 1570 if (obj == NULL)
1571 1571 goto bail;
1572 1572 if (PyList_Append(gca, obj) == -1) {
1573 1573 Py_DECREF(obj);
1574 1574 goto bail;
1575 1575 }
1576 1576 sv |= poison;
1577 1577 for (i = 0; i < revcount; i++) {
1578 1578 if (revs[i] == v)
1579 1579 goto done;
1580 1580 }
1581 1581 }
1582 1582 }
1583 1583 if (index_get_parents(self, v, parents, maxrev) < 0)
1584 1584 goto bail;
1585 1585
1586 1586 for (i = 0; i < 2; i++) {
1587 1587 int p = parents[i];
1588 1588 if (p == -1)
1589 1589 continue;
1590 1590 sp = seen[p];
1591 1591 if (sv < poison) {
1592 1592 if (sp == 0) {
1593 1593 seen[p] = sv;
1594 1594 interesting++;
1595 1595 }
1596 1596 else if (sp != sv)
1597 1597 seen[p] |= sv;
1598 1598 } else {
1599 1599 if (sp && sp < poison)
1600 1600 interesting--;
1601 1601 seen[p] = sv;
1602 1602 }
1603 1603 }
1604 1604 }
1605 1605
1606 1606 done:
1607 1607 free(seen);
1608 1608 return gca;
1609 1609 bail:
1610 1610 free(seen);
1611 1611 Py_XDECREF(gca);
1612 1612 return NULL;
1613 1613 }
1614 1614
1615 1615 /*
1616 1616 * Given a disjoint set of revs, return the subset with the longest
1617 1617 * path to the root.
1618 1618 */
1619 1619 static PyObject *find_deepest(indexObject *self, PyObject *revs)
1620 1620 {
1621 1621 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
1622 1622 static const Py_ssize_t capacity = 24;
1623 1623 int *depth, *interesting = NULL;
1624 1624 int i, j, v, ninteresting;
1625 1625 PyObject *dict = NULL, *keys = NULL;
1626 1626 long *seen = NULL;
1627 1627 int maxrev = -1;
1628 1628 long final;
1629 1629
1630 1630 if (revcount > capacity) {
1631 1631 PyErr_Format(PyExc_OverflowError,
1632 1632 "bitset size (%ld) > capacity (%ld)",
1633 1633 (long)revcount, (long)capacity);
1634 1634 return NULL;
1635 1635 }
1636 1636
1637 1637 for (i = 0; i < revcount; i++) {
1638 1638 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1639 1639 if (n > maxrev)
1640 1640 maxrev = n;
1641 1641 }
1642 1642
1643 1643 depth = calloc(sizeof(*depth), maxrev + 1);
1644 1644 if (depth == NULL)
1645 1645 return PyErr_NoMemory();
1646 1646
1647 1647 seen = calloc(sizeof(*seen), maxrev + 1);
1648 1648 if (seen == NULL) {
1649 1649 PyErr_NoMemory();
1650 1650 goto bail;
1651 1651 }
1652 1652
1653 1653 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
1654 1654 if (interesting == NULL) {
1655 1655 PyErr_NoMemory();
1656 1656 goto bail;
1657 1657 }
1658 1658
1659 1659 if (PyList_Sort(revs) == -1)
1660 1660 goto bail;
1661 1661
1662 1662 for (i = 0; i < revcount; i++) {
1663 1663 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1664 1664 long b = 1l << i;
1665 1665 depth[n] = 1;
1666 1666 seen[n] = b;
1667 1667 interesting[b] = 1;
1668 1668 }
1669 1669
1670 1670 /* invariant: ninteresting is the number of non-zero entries in
1671 1671 * interesting. */
1672 1672 ninteresting = (int)revcount;
1673 1673
1674 1674 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
1675 1675 int dv = depth[v];
1676 1676 int parents[2];
1677 1677 long sv;
1678 1678
1679 1679 if (dv == 0)
1680 1680 continue;
1681 1681
1682 1682 sv = seen[v];
1683 1683 if (index_get_parents(self, v, parents, maxrev) < 0)
1684 1684 goto bail;
1685 1685
1686 1686 for (i = 0; i < 2; i++) {
1687 1687 int p = parents[i];
1688 1688 long sp;
1689 1689 int dp;
1690 1690
1691 1691 if (p == -1)
1692 1692 continue;
1693 1693
1694 1694 dp = depth[p];
1695 1695 sp = seen[p];
1696 1696 if (dp <= dv) {
1697 1697 depth[p] = dv + 1;
1698 1698 if (sp != sv) {
1699 1699 interesting[sv] += 1;
1700 1700 seen[p] = sv;
1701 1701 if (sp) {
1702 1702 interesting[sp] -= 1;
1703 1703 if (interesting[sp] == 0)
1704 1704 ninteresting -= 1;
1705 1705 }
1706 1706 }
1707 1707 }
1708 1708 else if (dv == dp - 1) {
1709 1709 long nsp = sp | sv;
1710 1710 if (nsp == sp)
1711 1711 continue;
1712 1712 seen[p] = nsp;
1713 1713 interesting[sp] -= 1;
1714 1714 if (interesting[sp] == 0)
1715 1715 ninteresting -= 1;
1716 1716 if (interesting[nsp] == 0)
1717 1717 ninteresting += 1;
1718 1718 interesting[nsp] += 1;
1719 1719 }
1720 1720 }
1721 1721 interesting[sv] -= 1;
1722 1722 if (interesting[sv] == 0)
1723 1723 ninteresting -= 1;
1724 1724 }
1725 1725
1726 1726 final = 0;
1727 1727 j = ninteresting;
1728 1728 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
1729 1729 if (interesting[i] == 0)
1730 1730 continue;
1731 1731 final |= i;
1732 1732 j -= 1;
1733 1733 }
1734 1734 if (final == 0) {
1735 1735 keys = PyList_New(0);
1736 1736 goto bail;
1737 1737 }
1738 1738
1739 1739 dict = PyDict_New();
1740 1740 if (dict == NULL)
1741 1741 goto bail;
1742 1742
1743 1743 for (i = 0; i < revcount; i++) {
1744 1744 PyObject *key;
1745 1745
1746 1746 if ((final & (1 << i)) == 0)
1747 1747 continue;
1748 1748
1749 1749 key = PyList_GET_ITEM(revs, i);
1750 1750 Py_INCREF(key);
1751 1751 Py_INCREF(Py_None);
1752 1752 if (PyDict_SetItem(dict, key, Py_None) == -1) {
1753 1753 Py_DECREF(key);
1754 1754 Py_DECREF(Py_None);
1755 1755 goto bail;
1756 1756 }
1757 1757 }
1758 1758
1759 1759 keys = PyDict_Keys(dict);
1760 1760
1761 1761 bail:
1762 1762 free(depth);
1763 1763 free(seen);
1764 1764 free(interesting);
1765 1765 Py_XDECREF(dict);
1766 1766
1767 1767 return keys;
1768 1768 }
1769 1769
1770 1770 /*
1771 1771 * Given a (possibly overlapping) set of revs, return all the
1772 1772 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
1773 1773 */
1774 1774 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
1775 1775 {
1776 1776 PyObject *ret = NULL;
1777 1777 Py_ssize_t argcount, i, len;
1778 1778 bitmask repeat = 0;
1779 1779 int revcount = 0;
1780 1780 int *revs;
1781 1781
1782 1782 argcount = PySequence_Length(args);
1783 1783 revs = PyMem_Malloc(argcount * sizeof(*revs));
1784 1784 if (argcount > 0 && revs == NULL)
1785 1785 return PyErr_NoMemory();
1786 1786 len = index_length(self);
1787 1787
1788 1788 for (i = 0; i < argcount; i++) {
1789 1789 static const int capacity = 24;
1790 1790 PyObject *obj = PySequence_GetItem(args, i);
1791 1791 bitmask x;
1792 1792 long val;
1793 1793
1794 1794 if (!PyInt_Check(obj)) {
1795 1795 PyErr_SetString(PyExc_TypeError,
1796 1796 "arguments must all be ints");
1797 1797 Py_DECREF(obj);
1798 1798 goto bail;
1799 1799 }
1800 1800 val = PyInt_AsLong(obj);
1801 1801 Py_DECREF(obj);
1802 1802 if (val == -1) {
1803 1803 ret = PyList_New(0);
1804 1804 goto done;
1805 1805 }
1806 1806 if (val < 0 || val >= len) {
1807 1807 PyErr_SetString(PyExc_IndexError,
1808 1808 "index out of range");
1809 1809 goto bail;
1810 1810 }
1811 1811 /* this cheesy bloom filter lets us avoid some more
1812 1812 * expensive duplicate checks in the common set-is-disjoint
1813 1813 * case */
1814 1814 x = 1ull << (val & 0x3f);
1815 1815 if (repeat & x) {
1816 1816 int k;
1817 1817 for (k = 0; k < revcount; k++) {
1818 1818 if (val == revs[k])
1819 1819 goto duplicate;
1820 1820 }
1821 1821 }
1822 1822 else repeat |= x;
1823 1823 if (revcount >= capacity) {
1824 1824 PyErr_Format(PyExc_OverflowError,
1825 1825 "bitset size (%d) > capacity (%d)",
1826 1826 revcount, capacity);
1827 1827 goto bail;
1828 1828 }
1829 1829 revs[revcount++] = (int)val;
1830 1830 duplicate:;
1831 1831 }
1832 1832
1833 1833 if (revcount == 0) {
1834 1834 ret = PyList_New(0);
1835 1835 goto done;
1836 1836 }
1837 1837 if (revcount == 1) {
1838 1838 PyObject *obj;
1839 1839 ret = PyList_New(1);
1840 1840 if (ret == NULL)
1841 1841 goto bail;
1842 1842 obj = PyInt_FromLong(revs[0]);
1843 1843 if (obj == NULL)
1844 1844 goto bail;
1845 1845 PyList_SET_ITEM(ret, 0, obj);
1846 1846 goto done;
1847 1847 }
1848 1848
1849 1849 ret = find_gca_candidates(self, revs, revcount);
1850 1850 if (ret == NULL)
1851 1851 goto bail;
1852 1852
1853 1853 done:
1854 1854 PyMem_Free(revs);
1855 1855 return ret;
1856 1856
1857 1857 bail:
1858 1858 PyMem_Free(revs);
1859 1859 Py_XDECREF(ret);
1860 1860 return NULL;
1861 1861 }
1862 1862
1863 1863 /*
1864 1864 * Given a (possibly overlapping) set of revs, return the greatest
1865 1865 * common ancestors: those with the longest path to the root.
1866 1866 */
1867 1867 static PyObject *index_ancestors(indexObject *self, PyObject *args)
1868 1868 {
1869 1869 PyObject *ret;
1870 1870 PyObject *gca = index_commonancestorsheads(self, args);
1871 1871 if (gca == NULL)
1872 1872 return NULL;
1873 1873
1874 1874 if (PyList_GET_SIZE(gca) <= 1) {
1875 1875 return gca;
1876 1876 }
1877 1877
1878 1878 ret = find_deepest(self, gca);
1879 1879 Py_DECREF(gca);
1880 1880 return ret;
1881 1881 }
1882 1882
1883 1883 /*
1884 1884 * Invalidate any trie entries introduced by added revs.
1885 1885 */
1886 1886 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
1887 1887 {
1888 1888 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
1889 1889
1890 1890 for (i = start; i < len; i++) {
1891 1891 PyObject *tuple = PyList_GET_ITEM(self->added, i);
1892 1892 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
1893 1893
1894 1894 nt_delete_node(&self->nt, PyBytes_AS_STRING(node));
1895 1895 }
1896 1896
1897 1897 if (start == 0)
1898 1898 Py_CLEAR(self->added);
1899 1899 }
1900 1900
1901 1901 /*
1902 1902 * Delete a numeric range of revs, which must be at the end of the
1903 1903 * range, but exclude the sentinel nullid entry.
1904 1904 */
1905 1905 static int index_slice_del(indexObject *self, PyObject *item)
1906 1906 {
1907 1907 Py_ssize_t start, stop, step, slicelength;
1908 1908 Py_ssize_t length = index_length(self) + 1;
1909 1909 int ret = 0;
1910 1910
1911 1911 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
1912 1912 #ifdef IS_PY3K
1913 1913 if (PySlice_GetIndicesEx(item, length,
1914 1914 &start, &stop, &step, &slicelength) < 0)
1915 1915 #else
1916 1916 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
1917 1917 &start, &stop, &step, &slicelength) < 0)
1918 1918 #endif
1919 1919 return -1;
1920 1920
1921 1921 if (slicelength <= 0)
1922 1922 return 0;
1923 1923
1924 1924 if ((step < 0 && start < stop) || (step > 0 && start > stop))
1925 1925 stop = start;
1926 1926
1927 1927 if (step < 0) {
1928 1928 stop = start + 1;
1929 1929 start = stop + step*(slicelength - 1) - 1;
1930 1930 step = -step;
1931 1931 }
1932 1932
1933 1933 if (step != 1) {
1934 1934 PyErr_SetString(PyExc_ValueError,
1935 1935 "revlog index delete requires step size of 1");
1936 1936 return -1;
1937 1937 }
1938 1938
1939 1939 if (stop != length - 1) {
1940 1940 PyErr_SetString(PyExc_IndexError,
1941 1941 "revlog index deletion indices are invalid");
1942 1942 return -1;
1943 1943 }
1944 1944
1945 1945 if (start < self->length) {
1946 1946 if (self->ntinitialized) {
1947 1947 Py_ssize_t i;
1948 1948
1949 1949 for (i = start + 1; i < self->length; i++) {
1950 1950 const char *node = index_node_existing(self, i);
1951 1951 if (node == NULL)
1952 1952 return -1;
1953 1953
1954 1954 nt_delete_node(&self->nt, node);
1955 1955 }
1956 1956 if (self->added)
1957 1957 index_invalidate_added(self, 0);
1958 1958 if (self->ntrev > start)
1959 1959 self->ntrev = (int)start;
1960 1960 }
1961 1961 self->length = start;
1962 1962 if (start < self->raw_length) {
1963 1963 if (self->cache) {
1964 1964 Py_ssize_t i;
1965 1965 for (i = start; i < self->raw_length; i++)
1966 1966 Py_CLEAR(self->cache[i]);
1967 1967 }
1968 1968 self->raw_length = start;
1969 1969 }
1970 1970 goto done;
1971 1971 }
1972 1972
1973 1973 if (self->ntinitialized) {
1974 1974 index_invalidate_added(self, start - self->length);
1975 1975 if (self->ntrev > start)
1976 1976 self->ntrev = (int)start;
1977 1977 }
1978 1978 if (self->added)
1979 1979 ret = PyList_SetSlice(self->added, start - self->length,
1980 1980 PyList_GET_SIZE(self->added), NULL);
1981 1981 done:
1982 1982 Py_CLEAR(self->headrevs);
1983 1983 return ret;
1984 1984 }
1985 1985
1986 1986 /*
1987 1987 * Supported ops:
1988 1988 *
1989 1989 * slice deletion
1990 1990 * string assignment (extend node->rev mapping)
1991 1991 * string deletion (shrink node->rev mapping)
1992 1992 */
1993 1993 static int index_assign_subscript(indexObject *self, PyObject *item,
1994 1994 PyObject *value)
1995 1995 {
1996 1996 char *node;
1997 1997 long rev;
1998 1998
1999 1999 if (PySlice_Check(item) && value == NULL)
2000 2000 return index_slice_del(self, item);
2001 2001
2002 2002 if (node_check(item, &node) == -1)
2003 2003 return -1;
2004 2004
2005 2005 if (value == NULL)
2006 2006 return self->ntinitialized ? nt_delete_node(&self->nt, node) : 0;
2007 2007 rev = PyInt_AsLong(value);
2008 2008 if (rev > INT_MAX || rev < 0) {
2009 2009 if (!PyErr_Occurred())
2010 2010 PyErr_SetString(PyExc_ValueError, "rev out of range");
2011 2011 return -1;
2012 2012 }
2013 2013
2014 2014 if (index_init_nt(self) == -1)
2015 2015 return -1;
2016 2016 return nt_insert(&self->nt, node, (int)rev);
2017 2017 }
2018 2018
2019 2019 /*
2020 2020 * Find all RevlogNG entries in an index that has inline data. Update
2021 2021 * the optional "offsets" table with those entries.
2022 2022 */
2023 2023 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2024 2024 {
2025 2025 const char *data = (const char *)self->buf.buf;
2026 2026 Py_ssize_t pos = 0;
2027 2027 Py_ssize_t end = self->buf.len;
2028 2028 long incr = v1_hdrsize;
2029 2029 Py_ssize_t len = 0;
2030 2030
2031 2031 while (pos + v1_hdrsize <= end && pos >= 0) {
2032 2032 uint32_t comp_len;
2033 2033 /* 3rd element of header is length of compressed inline data */
2034 2034 comp_len = getbe32(data + pos + 8);
2035 2035 incr = v1_hdrsize + comp_len;
2036 2036 if (offsets)
2037 2037 offsets[len] = data + pos;
2038 2038 len++;
2039 2039 pos += incr;
2040 2040 }
2041 2041
2042 2042 if (pos != end) {
2043 2043 if (!PyErr_Occurred())
2044 2044 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2045 2045 return -1;
2046 2046 }
2047 2047
2048 2048 return len;
2049 2049 }
2050 2050
2051 2051 static int index_init(indexObject *self, PyObject *args)
2052 2052 {
2053 2053 PyObject *data_obj, *inlined_obj;
2054 2054 Py_ssize_t size;
2055 2055
2056 2056 /* Initialize before argument-checking to avoid index_dealloc() crash. */
2057 2057 self->raw_length = 0;
2058 2058 self->added = NULL;
2059 2059 self->cache = NULL;
2060 2060 self->data = NULL;
2061 2061 memset(&self->buf, 0, sizeof(self->buf));
2062 2062 self->headrevs = NULL;
2063 2063 self->filteredrevs = Py_None;
2064 2064 Py_INCREF(Py_None);
2065 2065 self->ntinitialized = 0;
2066 2066 self->offsets = NULL;
2067 2067
2068 2068 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
2069 2069 return -1;
2070 2070 if (!PyObject_CheckBuffer(data_obj)) {
2071 2071 PyErr_SetString(PyExc_TypeError,
2072 2072 "data does not support buffer interface");
2073 2073 return -1;
2074 2074 }
2075 2075
2076 2076 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
2077 2077 return -1;
2078 2078 size = self->buf.len;
2079 2079
2080 2080 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
2081 2081 self->data = data_obj;
2082 2082
2083 2083 self->ntlookups = self->ntmisses = 0;
2084 2084 self->ntrev = -1;
2085 2085 Py_INCREF(self->data);
2086 2086
2087 2087 if (self->inlined) {
2088 2088 Py_ssize_t len = inline_scan(self, NULL);
2089 2089 if (len == -1)
2090 2090 goto bail;
2091 2091 self->raw_length = len;
2092 2092 self->length = len;
2093 2093 } else {
2094 2094 if (size % v1_hdrsize) {
2095 2095 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2096 2096 goto bail;
2097 2097 }
2098 2098 self->raw_length = size / v1_hdrsize;
2099 2099 self->length = self->raw_length;
2100 2100 }
2101 2101
2102 2102 return 0;
2103 2103 bail:
2104 2104 return -1;
2105 2105 }
2106 2106
2107 2107 static PyObject *index_nodemap(indexObject *self)
2108 2108 {
2109 2109 Py_INCREF(self);
2110 2110 return (PyObject *)self;
2111 2111 }
2112 2112
2113 2113 static void _index_clearcaches(indexObject *self)
2114 2114 {
2115 2115 if (self->cache) {
2116 2116 Py_ssize_t i;
2117 2117
2118 2118 for (i = 0; i < self->raw_length; i++)
2119 2119 Py_CLEAR(self->cache[i]);
2120 2120 free(self->cache);
2121 2121 self->cache = NULL;
2122 2122 }
2123 2123 if (self->offsets) {
2124 2124 PyMem_Free((void *)self->offsets);
2125 2125 self->offsets = NULL;
2126 2126 }
2127 2127 if (self->ntinitialized) {
2128 2128 nt_dealloc(&self->nt);
2129 2129 }
2130 2130 self->ntinitialized = 0;
2131 2131 Py_CLEAR(self->headrevs);
2132 2132 }
2133 2133
2134 2134 static PyObject *index_clearcaches(indexObject *self)
2135 2135 {
2136 2136 _index_clearcaches(self);
2137 2137 self->ntrev = -1;
2138 2138 self->ntlookups = self->ntmisses = 0;
2139 2139 Py_RETURN_NONE;
2140 2140 }
2141 2141
2142 2142 static void index_dealloc(indexObject *self)
2143 2143 {
2144 2144 _index_clearcaches(self);
2145 2145 Py_XDECREF(self->filteredrevs);
2146 2146 if (self->buf.buf) {
2147 2147 PyBuffer_Release(&self->buf);
2148 2148 memset(&self->buf, 0, sizeof(self->buf));
2149 2149 }
2150 2150 Py_XDECREF(self->data);
2151 2151 Py_XDECREF(self->added);
2152 2152 PyObject_Del(self);
2153 2153 }
2154 2154
2155 2155 static PySequenceMethods index_sequence_methods = {
2156 2156 (lenfunc)index_length, /* sq_length */
2157 2157 0, /* sq_concat */
2158 2158 0, /* sq_repeat */
2159 2159 (ssizeargfunc)index_get, /* sq_item */
2160 2160 0, /* sq_slice */
2161 2161 0, /* sq_ass_item */
2162 2162 0, /* sq_ass_slice */
2163 2163 (objobjproc)index_contains, /* sq_contains */
2164 2164 };
2165 2165
2166 2166 static PyMappingMethods index_mapping_methods = {
2167 2167 (lenfunc)index_length, /* mp_length */
2168 2168 (binaryfunc)index_getitem, /* mp_subscript */
2169 2169 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
2170 2170 };
2171 2171
2172 2172 static PyMethodDef index_methods[] = {
2173 2173 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
2174 2174 "return the gca set of the given revs"},
2175 2175 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
2176 2176 METH_VARARGS,
2177 2177 "return the heads of the common ancestors of the given revs"},
2178 2178 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
2179 2179 "clear the index caches"},
2180 2180 {"get", (PyCFunction)index_m_get, METH_VARARGS,
2181 2181 "get an index entry"},
2182 2182 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets,
2183 2183 METH_VARARGS, "compute phases"},
2184 2184 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
2185 2185 "reachableroots"},
2186 2186 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
2187 2187 "get head revisions"}, /* Can do filtering since 3.2 */
2188 2188 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
2189 2189 "get filtered head revisions"}, /* Can always do filtering */
2190 2190 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
2191 2191 "determine revisions with deltas to reconstruct fulltext"},
2192 2192 {"append", (PyCFunction)index_append, METH_O,
2193 2193 "append an index entry"},
2194 2194 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2195 2195 "match a potentially ambiguous node ID"},
2196 2196 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
2197 2197 "find length of shortest hex nodeid of a binary ID"},
2198 2198 {"stats", (PyCFunction)index_stats, METH_NOARGS,
2199 2199 "stats for the index"},
2200 2200 {NULL} /* Sentinel */
2201 2201 };
2202 2202
2203 2203 static PyGetSetDef index_getset[] = {
2204 2204 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2205 2205 {NULL} /* Sentinel */
2206 2206 };
2207 2207
2208 2208 static PyTypeObject indexType = {
2209 2209 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2210 2210 "parsers.index", /* tp_name */
2211 2211 sizeof(indexObject), /* tp_basicsize */
2212 2212 0, /* tp_itemsize */
2213 2213 (destructor)index_dealloc, /* tp_dealloc */
2214 2214 0, /* tp_print */
2215 2215 0, /* tp_getattr */
2216 2216 0, /* tp_setattr */
2217 2217 0, /* tp_compare */
2218 2218 0, /* tp_repr */
2219 2219 0, /* tp_as_number */
2220 2220 &index_sequence_methods, /* tp_as_sequence */
2221 2221 &index_mapping_methods, /* tp_as_mapping */
2222 2222 0, /* tp_hash */
2223 2223 0, /* tp_call */
2224 2224 0, /* tp_str */
2225 2225 0, /* tp_getattro */
2226 2226 0, /* tp_setattro */
2227 2227 0, /* tp_as_buffer */
2228 2228 Py_TPFLAGS_DEFAULT, /* tp_flags */
2229 2229 "revlog index", /* tp_doc */
2230 2230 0, /* tp_traverse */
2231 2231 0, /* tp_clear */
2232 2232 0, /* tp_richcompare */
2233 2233 0, /* tp_weaklistoffset */
2234 2234 0, /* tp_iter */
2235 2235 0, /* tp_iternext */
2236 2236 index_methods, /* tp_methods */
2237 2237 0, /* tp_members */
2238 2238 index_getset, /* tp_getset */
2239 2239 0, /* tp_base */
2240 2240 0, /* tp_dict */
2241 2241 0, /* tp_descr_get */
2242 2242 0, /* tp_descr_set */
2243 2243 0, /* tp_dictoffset */
2244 2244 (initproc)index_init, /* tp_init */
2245 2245 0, /* tp_alloc */
2246 2246 };
2247 2247
2248 2248 /*
2249 2249 * returns a tuple of the form (index, index, cache) with elements as
2250 2250 * follows:
2251 2251 *
2252 2252 * index: an index object that lazily parses RevlogNG records
2253 2253 * cache: if data is inlined, a tuple (0, index_file_content), else None
2254 2254 * index_file_content could be a string, or a buffer
2255 2255 *
2256 2256 * added complications are for backwards compatibility
2257 2257 */
2258 2258 PyObject *parse_index2(PyObject *self, PyObject *args)
2259 2259 {
2260 2260 PyObject *tuple = NULL, *cache = NULL;
2261 2261 indexObject *idx;
2262 2262 int ret;
2263 2263
2264 2264 idx = PyObject_New(indexObject, &indexType);
2265 2265 if (idx == NULL)
2266 2266 goto bail;
2267 2267
2268 2268 ret = index_init(idx, args);
2269 2269 if (ret == -1)
2270 2270 goto bail;
2271 2271
2272 2272 if (idx->inlined) {
2273 2273 cache = Py_BuildValue("iO", 0, idx->data);
2274 2274 if (cache == NULL)
2275 2275 goto bail;
2276 2276 } else {
2277 2277 cache = Py_None;
2278 2278 Py_INCREF(cache);
2279 2279 }
2280 2280
2281 2281 tuple = Py_BuildValue("NN", idx, cache);
2282 2282 if (!tuple)
2283 2283 goto bail;
2284 2284 return tuple;
2285 2285
2286 2286 bail:
2287 2287 Py_XDECREF(idx);
2288 2288 Py_XDECREF(cache);
2289 2289 Py_XDECREF(tuple);
2290 2290 return NULL;
2291 2291 }
2292 2292
2293 #ifdef WITH_RUST
2294
2295 /* rustlazyancestors: iteration over ancestors implemented in Rust
2296 *
2297 * This class holds a reference to an index and to the Rust iterator.
2298 */
2299 typedef struct rustlazyancestorsObjectStruct rustlazyancestorsObject;
2300
2301 struct rustlazyancestorsObjectStruct {
2302 PyObject_HEAD
2303 /* Type-specific fields go here. */
2304 indexObject *index; /* Ref kept to avoid GC'ing the index */
2305 void *iter; /* Rust iterator */
2306 };
2307
2308 /* FFI exposed from Rust code */
2309 rustlazyancestorsObject *rustlazyancestors_init(
2310 indexObject *index,
2311 /* to pass index_get_parents() */
2312 int (*)(indexObject *, Py_ssize_t, int*, int),
2313 /* intrevs vector */
2314 int initrevslen, long *initrevs,
2315 long stoprev,
2316 int inclusive);
2317 void rustlazyancestors_drop(rustlazyancestorsObject *self);
2318 int rustlazyancestors_next(rustlazyancestorsObject *self);
2319
2320 /* CPython instance methods */
2321 static int rustla_init(rustlazyancestorsObject *self,
2322 PyObject *args) {
2323 PyObject *initrevsarg = NULL;
2324 PyObject *inclusivearg = NULL;
2325 long stoprev = 0;
2326 long *initrevs = NULL;
2327 int inclusive = 0;
2328 Py_ssize_t i;
2329
2330 indexObject *index;
2331 if (!PyArg_ParseTuple(args, "O!O!lO!",
2332 &indexType, &index,
2333 &PyList_Type, &initrevsarg,
2334 &stoprev,
2335 &PyBool_Type, &inclusivearg))
2336 return -1;
2337
2338 Py_INCREF(index);
2339 self->index = index;
2340
2341 if (inclusivearg == Py_True)
2342 inclusive = 1;
2343
2344 Py_ssize_t linit = PyList_GET_SIZE(initrevsarg);
2345
2346 initrevs = (long*)calloc(linit, sizeof(long));
2347
2348 if (initrevs == NULL) {
2349 PyErr_NoMemory();
2350 goto bail;
2351 }
2352
2353 for (i=0; i<linit; i++) {
2354 initrevs[i] = PyInt_AsLong(PyList_GET_ITEM(initrevsarg, i));
2355 }
2356 if (PyErr_Occurred())
2357 goto bail;
2358
2359 self->iter = rustlazyancestors_init(index,
2360 index_get_parents,
2361 linit, initrevs,
2362 stoprev, inclusive);
2363 if (self->iter == NULL) {
2364 /* if this is because of GraphError::ParentOutOfRange
2365 * index_get_parents() has already set the proper ValueError */
2366 goto bail;
2367 }
2368
2369 free(initrevs);
2370 return 0;
2371
2372 bail:
2373 free(initrevs);
2374 return -1;
2375 };
2376
2377 static void rustla_dealloc(rustlazyancestorsObject *self)
2378 {
2379 Py_XDECREF(self->index);
2380 if (self->iter != NULL) { /* can happen if rustla_init failed */
2381 rustlazyancestors_drop(self->iter);
2382 }
2383 PyObject_Del(self);
2384 }
2385
2386 static PyObject *rustla_next(rustlazyancestorsObject *self) {
2387 int res = rustlazyancestors_next(self->iter);
2388 if (res == -1) {
2389 /* Setting an explicit exception seems unnecessary
2390 * as examples from Python source code (Objects/rangeobjets.c and
2391 * Modules/_io/stringio.c) seem to demonstrate.
2392 */
2393 return NULL;
2394 }
2395 return PyInt_FromLong(res);
2396 }
2397
2398 static PyTypeObject rustlazyancestorsType = {
2399 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2400 "parsers.rustlazyancestors", /* tp_name */
2401 sizeof(rustlazyancestorsObject), /* tp_basicsize */
2402 0, /* tp_itemsize */
2403 (destructor)rustla_dealloc, /* tp_dealloc */
2404 0, /* tp_print */
2405 0, /* tp_getattr */
2406 0, /* tp_setattr */
2407 0, /* tp_compare */
2408 0, /* tp_repr */
2409 0, /* tp_as_number */
2410 0, /* tp_as_sequence */
2411 0, /* tp_as_mapping */
2412 0, /* tp_hash */
2413 0, /* tp_call */
2414 0, /* tp_str */
2415 0, /* tp_getattro */
2416 0, /* tp_setattro */
2417 0, /* tp_as_buffer */
2418 Py_TPFLAGS_DEFAULT, /* tp_flags */
2419 "Iterator over ancestors, implemented in Rust", /* tp_doc */
2420 0, /* tp_traverse */
2421 0, /* tp_clear */
2422 0, /* tp_richcompare */
2423 0, /* tp_weaklistoffset */
2424 0, /* tp_iter */
2425 (iternextfunc)rustla_next, /* tp_iternext */
2426 0, /* tp_methods */
2427 0, /* tp_members */
2428 0, /* tp_getset */
2429 0, /* tp_base */
2430 0, /* tp_dict */
2431 0, /* tp_descr_get */
2432 0, /* tp_descr_set */
2433 0, /* tp_dictoffset */
2434 (initproc)rustla_init, /* tp_init */
2435 0, /* tp_alloc */
2436 };
2437 #endif /* WITH_RUST */
2438
2293 2439 void revlog_module_init(PyObject *mod)
2294 2440 {
2295 2441 indexType.tp_new = PyType_GenericNew;
2296 2442 if (PyType_Ready(&indexType) < 0)
2297 2443 return;
2298 2444 Py_INCREF(&indexType);
2299 2445 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
2300 2446
2301 2447 nodetreeType.tp_new = PyType_GenericNew;
2302 2448 if (PyType_Ready(&nodetreeType) < 0)
2303 2449 return;
2304 2450 Py_INCREF(&nodetreeType);
2305 2451 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
2306 2452
2307 2453 if (!nullentry) {
2308 2454 nullentry = Py_BuildValue(PY23("iiiiiiis#", "iiiiiiiy#"), 0, 0, 0,
2309 2455 -1, -1, -1, -1, nullid, 20);
2310 2456 }
2311 2457 if (nullentry)
2312 2458 PyObject_GC_UnTrack(nullentry);
2459
2460 #ifdef WITH_RUST
2461 rustlazyancestorsType.tp_new = PyType_GenericNew;
2462 if (PyType_Ready(&rustlazyancestorsType) < 0)
2463 return;
2464 Py_INCREF(&rustlazyancestorsType);
2465 PyModule_AddObject(mod, "rustlazyancestors",
2466 (PyObject *)&rustlazyancestorsType);
2467 #endif
2468
2313 2469 }
@@ -1,1103 +1,1160
1 1 #
2 2 # This is the mercurial setup script.
3 3 #
4 4 # 'python setup.py install', or
5 5 # 'python setup.py --help' for more options
6 6
7 7 import os
8 8
9 9 supportedpy = '~= 2.7'
10 10 if os.environ.get('HGALLOWPYTHON3', ''):
11 11 # Mercurial will never work on Python 3 before 3.5 due to a lack
12 12 # of % formatting on bytestrings, and can't work on 3.6.0 or 3.6.1
13 13 # due to a bug in % formatting in bytestrings.
14 14 # We cannot support Python 3.5.0, 3.5.1, 3.5.2 because of bug in
15 15 # codecs.escape_encode() where it raises SystemError on empty bytestring
16 16 # bug link: https://bugs.python.org/issue25270
17 17 #
18 18 # TODO: when we actually work on Python 3, use this string as the
19 19 # actual supportedpy string.
20 20 supportedpy = ','.join([
21 21 '>=2.7',
22 22 '!=3.0.*',
23 23 '!=3.1.*',
24 24 '!=3.2.*',
25 25 '!=3.3.*',
26 26 '!=3.4.*',
27 27 '!=3.5.0',
28 28 '!=3.5.1',
29 29 '!=3.5.2',
30 30 '!=3.6.0',
31 31 '!=3.6.1',
32 32 ])
33 33
34 34 import sys, platform
35 35 if sys.version_info[0] >= 3:
36 36 printf = eval('print')
37 37 libdir_escape = 'unicode_escape'
38 38 def sysstr(s):
39 39 return s.decode('latin-1')
40 40 else:
41 41 libdir_escape = 'string_escape'
42 42 def printf(*args, **kwargs):
43 43 f = kwargs.get('file', sys.stdout)
44 44 end = kwargs.get('end', '\n')
45 45 f.write(b' '.join(args) + end)
46 46 def sysstr(s):
47 47 return s
48 48
49 49 # Attempt to guide users to a modern pip - this means that 2.6 users
50 50 # should have a chance of getting a 4.2 release, and when we ratchet
51 51 # the version requirement forward again hopefully everyone will get
52 52 # something that works for them.
53 53 if sys.version_info < (2, 7, 0, 'final'):
54 54 pip_message = ('This may be due to an out of date pip. '
55 55 'Make sure you have pip >= 9.0.1.')
56 56 try:
57 57 import pip
58 58 pip_version = tuple([int(x) for x in pip.__version__.split('.')[:3]])
59 59 if pip_version < (9, 0, 1) :
60 60 pip_message = (
61 61 'Your pip version is out of date, please install '
62 62 'pip >= 9.0.1. pip {} detected.'.format(pip.__version__))
63 63 else:
64 64 # pip is new enough - it must be something else
65 65 pip_message = ''
66 66 except Exception:
67 67 pass
68 68 error = """
69 69 Mercurial does not support Python older than 2.7.
70 70 Python {py} detected.
71 71 {pip}
72 72 """.format(py=sys.version_info, pip=pip_message)
73 73 printf(error, file=sys.stderr)
74 74 sys.exit(1)
75 75
76 76 # We don't yet officially support Python 3. But we want to allow developers to
77 77 # hack on. Detect and disallow running on Python 3 by default. But provide a
78 78 # backdoor to enable working on Python 3.
79 79 if sys.version_info[0] != 2:
80 80 badpython = True
81 81
82 82 # Allow Python 3 from source checkouts.
83 83 if os.path.isdir('.hg') or 'HGPYTHON3' in os.environ:
84 84 badpython = False
85 85
86 86 if badpython:
87 87 error = """
88 88 Mercurial only supports Python 2.7.
89 89 Python {py} detected.
90 90 Please re-run with Python 2.7.
91 91 """.format(py=sys.version_info)
92 92
93 93 printf(error, file=sys.stderr)
94 94 sys.exit(1)
95 95
96 96 # Solaris Python packaging brain damage
97 97 try:
98 98 import hashlib
99 99 sha = hashlib.sha1()
100 100 except ImportError:
101 101 try:
102 102 import sha
103 103 sha.sha # silence unused import warning
104 104 except ImportError:
105 105 raise SystemExit(
106 106 "Couldn't import standard hashlib (incomplete Python install).")
107 107
108 108 try:
109 109 import zlib
110 110 zlib.compressobj # silence unused import warning
111 111 except ImportError:
112 112 raise SystemExit(
113 113 "Couldn't import standard zlib (incomplete Python install).")
114 114
115 115 # The base IronPython distribution (as of 2.7.1) doesn't support bz2
116 116 isironpython = False
117 117 try:
118 118 isironpython = (platform.python_implementation()
119 119 .lower().find("ironpython") != -1)
120 120 except AttributeError:
121 121 pass
122 122
123 123 if isironpython:
124 124 sys.stderr.write("warning: IronPython detected (no bz2 support)\n")
125 125 else:
126 126 try:
127 127 import bz2
128 128 bz2.BZ2Compressor # silence unused import warning
129 129 except ImportError:
130 130 raise SystemExit(
131 131 "Couldn't import standard bz2 (incomplete Python install).")
132 132
133 133 ispypy = "PyPy" in sys.version
134 134
135 iswithrustextensions = 'HGWITHRUSTEXT' in os.environ
136
135 137 import ctypes
136 138 import stat, subprocess, time
137 139 import re
138 140 import shutil
139 141 import tempfile
140 142 from distutils import log
141 143 # We have issues with setuptools on some platforms and builders. Until
142 144 # those are resolved, setuptools is opt-in except for platforms where
143 145 # we don't have issues.
144 146 issetuptools = (os.name == 'nt' or 'FORCE_SETUPTOOLS' in os.environ)
145 147 if issetuptools:
146 148 from setuptools import setup
147 149 else:
148 150 from distutils.core import setup
149 151 from distutils.ccompiler import new_compiler
150 152 from distutils.core import Command, Extension
151 153 from distutils.dist import Distribution
152 154 from distutils.command.build import build
153 155 from distutils.command.build_ext import build_ext
154 156 from distutils.command.build_py import build_py
155 157 from distutils.command.build_scripts import build_scripts
156 158 from distutils.command.install import install
157 159 from distutils.command.install_lib import install_lib
158 160 from distutils.command.install_scripts import install_scripts
159 161 from distutils.spawn import spawn, find_executable
160 162 from distutils import file_util
161 163 from distutils.errors import (
162 164 CCompilerError,
163 165 DistutilsError,
164 166 DistutilsExecError,
165 167 )
166 168 from distutils.sysconfig import get_python_inc, get_config_var
167 169 from distutils.version import StrictVersion
168 170
169 171 def write_if_changed(path, content):
170 172 """Write content to a file iff the content hasn't changed."""
171 173 if os.path.exists(path):
172 174 with open(path, 'rb') as fh:
173 175 current = fh.read()
174 176 else:
175 177 current = b''
176 178
177 179 if current != content:
178 180 with open(path, 'wb') as fh:
179 181 fh.write(content)
180 182
181 183 scripts = ['hg']
182 184 if os.name == 'nt':
183 185 # We remove hg.bat if we are able to build hg.exe.
184 186 scripts.append('contrib/win32/hg.bat')
185 187
186 188 def cancompile(cc, code):
187 189 tmpdir = tempfile.mkdtemp(prefix='hg-install-')
188 190 devnull = oldstderr = None
189 191 try:
190 192 fname = os.path.join(tmpdir, 'testcomp.c')
191 193 f = open(fname, 'w')
192 194 f.write(code)
193 195 f.close()
194 196 # Redirect stderr to /dev/null to hide any error messages
195 197 # from the compiler.
196 198 # This will have to be changed if we ever have to check
197 199 # for a function on Windows.
198 200 devnull = open('/dev/null', 'w')
199 201 oldstderr = os.dup(sys.stderr.fileno())
200 202 os.dup2(devnull.fileno(), sys.stderr.fileno())
201 203 objects = cc.compile([fname], output_dir=tmpdir)
202 204 cc.link_executable(objects, os.path.join(tmpdir, "a.out"))
203 205 return True
204 206 except Exception:
205 207 return False
206 208 finally:
207 209 if oldstderr is not None:
208 210 os.dup2(oldstderr, sys.stderr.fileno())
209 211 if devnull is not None:
210 212 devnull.close()
211 213 shutil.rmtree(tmpdir)
212 214
213 215 # simplified version of distutils.ccompiler.CCompiler.has_function
214 216 # that actually removes its temporary files.
215 217 def hasfunction(cc, funcname):
216 218 code = 'int main(void) { %s(); }\n' % funcname
217 219 return cancompile(cc, code)
218 220
219 221 def hasheader(cc, headername):
220 222 code = '#include <%s>\nint main(void) { return 0; }\n' % headername
221 223 return cancompile(cc, code)
222 224
223 225 # py2exe needs to be installed to work
224 226 try:
225 227 import py2exe
226 228 py2exe.Distribution # silence unused import warning
227 229 py2exeloaded = True
228 230 # import py2exe's patched Distribution class
229 231 from distutils.core import Distribution
230 232 except ImportError:
231 233 py2exeloaded = False
232 234
233 235 def runcmd(cmd, env):
234 236 p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
235 237 stderr=subprocess.PIPE, env=env)
236 238 out, err = p.communicate()
237 239 return p.returncode, out, err
238 240
239 241 class hgcommand(object):
240 242 def __init__(self, cmd, env):
241 243 self.cmd = cmd
242 244 self.env = env
243 245
244 246 def run(self, args):
245 247 cmd = self.cmd + args
246 248 returncode, out, err = runcmd(cmd, self.env)
247 249 err = filterhgerr(err)
248 250 if err or returncode != 0:
249 251 printf("stderr from '%s':" % (' '.join(cmd)), file=sys.stderr)
250 252 printf(err, file=sys.stderr)
251 253 return ''
252 254 return out
253 255
254 256 def filterhgerr(err):
255 257 # If root is executing setup.py, but the repository is owned by
256 258 # another user (as in "sudo python setup.py install") we will get
257 259 # trust warnings since the .hg/hgrc file is untrusted. That is
258 260 # fine, we don't want to load it anyway. Python may warn about
259 261 # a missing __init__.py in mercurial/locale, we also ignore that.
260 262 err = [e for e in err.splitlines()
261 263 if (not e.startswith(b'not trusting file')
262 264 and not e.startswith(b'warning: Not importing')
263 265 and not e.startswith(b'obsolete feature not enabled')
264 266 and not e.startswith(b'*** failed to import extension')
265 267 and not e.startswith(b'devel-warn:')
266 268 and not (e.startswith(b'(third party extension')
267 269 and e.endswith(b'or newer of Mercurial; disabling)')))]
268 270 return b'\n'.join(b' ' + e for e in err)
269 271
270 272 def findhg():
271 273 """Try to figure out how we should invoke hg for examining the local
272 274 repository contents.
273 275
274 276 Returns an hgcommand object."""
275 277 # By default, prefer the "hg" command in the user's path. This was
276 278 # presumably the hg command that the user used to create this repository.
277 279 #
278 280 # This repository may require extensions or other settings that would not
279 281 # be enabled by running the hg script directly from this local repository.
280 282 hgenv = os.environ.copy()
281 283 # Use HGPLAIN to disable hgrc settings that would change output formatting,
282 284 # and disable localization for the same reasons.
283 285 hgenv['HGPLAIN'] = '1'
284 286 hgenv['LANGUAGE'] = 'C'
285 287 hgcmd = ['hg']
286 288 # Run a simple "hg log" command just to see if using hg from the user's
287 289 # path works and can successfully interact with this repository.
288 290 check_cmd = ['log', '-r.', '-Ttest']
289 291 try:
290 292 retcode, out, err = runcmd(hgcmd + check_cmd, hgenv)
291 293 except EnvironmentError:
292 294 retcode = -1
293 295 if retcode == 0 and not filterhgerr(err):
294 296 return hgcommand(hgcmd, hgenv)
295 297
296 298 # Fall back to trying the local hg installation.
297 299 hgenv = localhgenv()
298 300 hgcmd = [sys.executable, 'hg']
299 301 try:
300 302 retcode, out, err = runcmd(hgcmd + check_cmd, hgenv)
301 303 except EnvironmentError:
302 304 retcode = -1
303 305 if retcode == 0 and not filterhgerr(err):
304 306 return hgcommand(hgcmd, hgenv)
305 307
306 308 raise SystemExit('Unable to find a working hg binary to extract the '
307 309 'version from the repository tags')
308 310
309 311 def localhgenv():
310 312 """Get an environment dictionary to use for invoking or importing
311 313 mercurial from the local repository."""
312 314 # Execute hg out of this directory with a custom environment which takes
313 315 # care to not use any hgrc files and do no localization.
314 316 env = {'HGMODULEPOLICY': 'py',
315 317 'HGRCPATH': '',
316 318 'LANGUAGE': 'C',
317 319 'PATH': ''} # make pypi modules that use os.environ['PATH'] happy
318 320 if 'LD_LIBRARY_PATH' in os.environ:
319 321 env['LD_LIBRARY_PATH'] = os.environ['LD_LIBRARY_PATH']
320 322 if 'SystemRoot' in os.environ:
321 323 # SystemRoot is required by Windows to load various DLLs. See:
322 324 # https://bugs.python.org/issue13524#msg148850
323 325 env['SystemRoot'] = os.environ['SystemRoot']
324 326 return env
325 327
326 328 version = ''
327 329
328 330 if os.path.isdir('.hg'):
329 331 hg = findhg()
330 332 cmd = ['log', '-r', '.', '--template', '{tags}\n']
331 333 numerictags = [t for t in sysstr(hg.run(cmd)).split() if t[0:1].isdigit()]
332 334 hgid = sysstr(hg.run(['id', '-i'])).strip()
333 335 if not hgid:
334 336 # Bail out if hg is having problems interacting with this repository,
335 337 # rather than falling through and producing a bogus version number.
336 338 # Continuing with an invalid version number will break extensions
337 339 # that define minimumhgversion.
338 340 raise SystemExit('Unable to determine hg version from local repository')
339 341 if numerictags: # tag(s) found
340 342 version = numerictags[-1]
341 343 if hgid.endswith('+'): # propagate the dirty status to the tag
342 344 version += '+'
343 345 else: # no tag found
344 346 ltagcmd = ['parents', '--template', '{latesttag}']
345 347 ltag = sysstr(hg.run(ltagcmd))
346 348 changessincecmd = ['log', '-T', 'x\n', '-r', "only(.,'%s')" % ltag]
347 349 changessince = len(hg.run(changessincecmd).splitlines())
348 350 version = '%s+%s-%s' % (ltag, changessince, hgid)
349 351 if version.endswith('+'):
350 352 version += time.strftime('%Y%m%d')
351 353 elif os.path.exists('.hg_archival.txt'):
352 354 kw = dict([[t.strip() for t in l.split(':', 1)]
353 355 for l in open('.hg_archival.txt')])
354 356 if 'tag' in kw:
355 357 version = kw['tag']
356 358 elif 'latesttag' in kw:
357 359 if 'changessincelatesttag' in kw:
358 360 version = '%(latesttag)s+%(changessincelatesttag)s-%(node).12s' % kw
359 361 else:
360 362 version = '%(latesttag)s+%(latesttagdistance)s-%(node).12s' % kw
361 363 else:
362 364 version = kw.get('node', '')[:12]
363 365
364 366 if version:
365 367 versionb = version
366 368 if not isinstance(versionb, bytes):
367 369 versionb = versionb.encode('ascii')
368 370
369 371 write_if_changed('mercurial/__version__.py', b''.join([
370 372 b'# this file is autogenerated by setup.py\n'
371 373 b'version = b"%s"\n' % versionb,
372 374 ]))
373 375
374 376 try:
375 377 oldpolicy = os.environ.get('HGMODULEPOLICY', None)
376 378 os.environ['HGMODULEPOLICY'] = 'py'
377 379 from mercurial import __version__
378 380 version = __version__.version
379 381 except ImportError:
380 382 version = b'unknown'
381 383 finally:
382 384 if oldpolicy is None:
383 385 del os.environ['HGMODULEPOLICY']
384 386 else:
385 387 os.environ['HGMODULEPOLICY'] = oldpolicy
386 388
387 389 class hgbuild(build):
388 390 # Insert hgbuildmo first so that files in mercurial/locale/ are found
389 391 # when build_py is run next.
390 392 sub_commands = [('build_mo', None)] + build.sub_commands
391 393
392 394 class hgbuildmo(build):
393 395
394 396 description = "build translations (.mo files)"
395 397
396 398 def run(self):
397 399 if not find_executable('msgfmt'):
398 400 self.warn("could not find msgfmt executable, no translations "
399 401 "will be built")
400 402 return
401 403
402 404 podir = 'i18n'
403 405 if not os.path.isdir(podir):
404 406 self.warn("could not find %s/ directory" % podir)
405 407 return
406 408
407 409 join = os.path.join
408 410 for po in os.listdir(podir):
409 411 if not po.endswith('.po'):
410 412 continue
411 413 pofile = join(podir, po)
412 414 modir = join('locale', po[:-3], 'LC_MESSAGES')
413 415 mofile = join(modir, 'hg.mo')
414 416 mobuildfile = join('mercurial', mofile)
415 417 cmd = ['msgfmt', '-v', '-o', mobuildfile, pofile]
416 418 if sys.platform != 'sunos5':
417 419 # msgfmt on Solaris does not know about -c
418 420 cmd.append('-c')
419 421 self.mkpath(join('mercurial', modir))
420 422 self.make_file([pofile], mobuildfile, spawn, (cmd,))
421 423
422 424
423 425 class hgdist(Distribution):
424 426 pure = False
425 427 cffi = ispypy
426 428
427 429 global_options = Distribution.global_options + \
428 430 [('pure', None, "use pure (slow) Python "
429 431 "code instead of C extensions"),
430 432 ]
431 433
432 434 def has_ext_modules(self):
433 435 # self.ext_modules is emptied in hgbuildpy.finalize_options which is
434 436 # too late for some cases
435 437 return not self.pure and Distribution.has_ext_modules(self)
436 438
437 439 # This is ugly as a one-liner. So use a variable.
438 440 buildextnegops = dict(getattr(build_ext, 'negative_options', {}))
439 441 buildextnegops['no-zstd'] = 'zstd'
440 442
441 443 class hgbuildext(build_ext):
442 444 user_options = build_ext.user_options + [
443 445 ('zstd', None, 'compile zstd bindings [default]'),
444 446 ('no-zstd', None, 'do not compile zstd bindings'),
445 447 ]
446 448
447 449 boolean_options = build_ext.boolean_options + ['zstd']
448 450 negative_opt = buildextnegops
449 451
450 452 def initialize_options(self):
451 453 self.zstd = True
452 454 return build_ext.initialize_options(self)
453 455
454 456 def build_extensions(self):
455 457 # Filter out zstd if disabled via argument.
456 458 if not self.zstd:
457 459 self.extensions = [e for e in self.extensions
458 460 if e.name != 'mercurial.zstd']
459 461
460 462 return build_ext.build_extensions(self)
461 463
462 464 def build_extension(self, ext):
465 if isinstance(ext, RustExtension):
466 ext.rustbuild()
463 467 try:
464 468 build_ext.build_extension(self, ext)
465 469 except CCompilerError:
466 470 if not getattr(ext, 'optional', False):
467 471 raise
468 472 log.warn("Failed to build optional extension '%s' (skipping)",
469 473 ext.name)
470 474
471 475 class hgbuildscripts(build_scripts):
472 476 def run(self):
473 477 if os.name != 'nt' or self.distribution.pure:
474 478 return build_scripts.run(self)
475 479
476 480 exebuilt = False
477 481 try:
478 482 self.run_command('build_hgexe')
479 483 exebuilt = True
480 484 except (DistutilsError, CCompilerError):
481 485 log.warn('failed to build optional hg.exe')
482 486
483 487 if exebuilt:
484 488 # Copying hg.exe to the scripts build directory ensures it is
485 489 # installed by the install_scripts command.
486 490 hgexecommand = self.get_finalized_command('build_hgexe')
487 491 dest = os.path.join(self.build_dir, 'hg.exe')
488 492 self.mkpath(self.build_dir)
489 493 self.copy_file(hgexecommand.hgexepath, dest)
490 494
491 495 # Remove hg.bat because it is redundant with hg.exe.
492 496 self.scripts.remove('contrib/win32/hg.bat')
493 497
494 498 return build_scripts.run(self)
495 499
496 500 class hgbuildpy(build_py):
497 501 def finalize_options(self):
498 502 build_py.finalize_options(self)
499 503
500 504 if self.distribution.pure:
501 505 self.distribution.ext_modules = []
502 506 elif self.distribution.cffi:
503 507 from mercurial.cffi import (
504 508 bdiffbuild,
505 509 mpatchbuild,
506 510 )
507 511 exts = [mpatchbuild.ffi.distutils_extension(),
508 512 bdiffbuild.ffi.distutils_extension()]
509 513 # cffi modules go here
510 514 if sys.platform == 'darwin':
511 515 from mercurial.cffi import osutilbuild
512 516 exts.append(osutilbuild.ffi.distutils_extension())
513 517 self.distribution.ext_modules = exts
514 518 else:
515 519 h = os.path.join(get_python_inc(), 'Python.h')
516 520 if not os.path.exists(h):
517 521 raise SystemExit('Python headers are required to build '
518 522 'Mercurial but weren\'t found in %s' % h)
519 523
520 524 def run(self):
521 525 basepath = os.path.join(self.build_lib, 'mercurial')
522 526 self.mkpath(basepath)
523 527
524 528 if self.distribution.pure:
525 529 modulepolicy = 'py'
526 530 elif self.build_lib == '.':
527 531 # in-place build should run without rebuilding C extensions
528 532 modulepolicy = 'allow'
529 533 else:
530 534 modulepolicy = 'c'
531 535
532 536 content = b''.join([
533 537 b'# this file is autogenerated by setup.py\n',
534 538 b'modulepolicy = b"%s"\n' % modulepolicy.encode('ascii'),
535 539 ])
536 540 write_if_changed(os.path.join(basepath, '__modulepolicy__.py'),
537 541 content)
538 542
539 543 build_py.run(self)
540 544
541 545 class buildhgextindex(Command):
542 546 description = 'generate prebuilt index of hgext (for frozen package)'
543 547 user_options = []
544 548 _indexfilename = 'hgext/__index__.py'
545 549
546 550 def initialize_options(self):
547 551 pass
548 552
549 553 def finalize_options(self):
550 554 pass
551 555
552 556 def run(self):
553 557 if os.path.exists(self._indexfilename):
554 558 with open(self._indexfilename, 'w') as f:
555 559 f.write('# empty\n')
556 560
557 561 # here no extension enabled, disabled() lists up everything
558 562 code = ('import pprint; from mercurial import extensions; '
559 563 'pprint.pprint(extensions.disabled())')
560 564 returncode, out, err = runcmd([sys.executable, '-c', code],
561 565 localhgenv())
562 566 if err or returncode != 0:
563 567 raise DistutilsExecError(err)
564 568
565 569 with open(self._indexfilename, 'w') as f:
566 570 f.write('# this file is autogenerated by setup.py\n')
567 571 f.write('docs = ')
568 572 f.write(out)
569 573
570 574 class buildhgexe(build_ext):
571 575 description = 'compile hg.exe from mercurial/exewrapper.c'
572 576 user_options = build_ext.user_options + [
573 577 ('long-paths-support', None, 'enable support for long paths on '
574 578 'Windows (off by default and '
575 579 'experimental)'),
576 580 ]
577 581
578 582 LONG_PATHS_MANIFEST = """
579 583 <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
580 584 <assembly xmlns="urn:schemas-microsoft-com:asm.v1" manifestVersion="1.0">
581 585 <application>
582 586 <windowsSettings
583 587 xmlns:ws2="http://schemas.microsoft.com/SMI/2016/WindowsSettings">
584 588 <ws2:longPathAware>true</ws2:longPathAware>
585 589 </windowsSettings>
586 590 </application>
587 591 </assembly>"""
588 592
589 593 def initialize_options(self):
590 594 build_ext.initialize_options(self)
591 595 self.long_paths_support = False
592 596
593 597 def build_extensions(self):
594 598 if os.name != 'nt':
595 599 return
596 600 if isinstance(self.compiler, HackedMingw32CCompiler):
597 601 self.compiler.compiler_so = self.compiler.compiler # no -mdll
598 602 self.compiler.dll_libraries = [] # no -lmsrvc90
599 603
600 604 # Different Python installs can have different Python library
601 605 # names. e.g. the official CPython distribution uses pythonXY.dll
602 606 # and MinGW uses libpythonX.Y.dll.
603 607 _kernel32 = ctypes.windll.kernel32
604 608 _kernel32.GetModuleFileNameA.argtypes = [ctypes.c_void_p,
605 609 ctypes.c_void_p,
606 610 ctypes.c_ulong]
607 611 _kernel32.GetModuleFileNameA.restype = ctypes.c_ulong
608 612 size = 1000
609 613 buf = ctypes.create_string_buffer(size + 1)
610 614 filelen = _kernel32.GetModuleFileNameA(sys.dllhandle, ctypes.byref(buf),
611 615 size)
612 616
613 617 if filelen > 0 and filelen != size:
614 618 dllbasename = os.path.basename(buf.value)
615 619 if not dllbasename.lower().endswith(b'.dll'):
616 620 raise SystemExit('Python DLL does not end with .dll: %s' %
617 621 dllbasename)
618 622 pythonlib = dllbasename[:-4]
619 623 else:
620 624 log.warn('could not determine Python DLL filename; '
621 625 'assuming pythonXY')
622 626
623 627 hv = sys.hexversion
624 628 pythonlib = 'python%d%d' % (hv >> 24, (hv >> 16) & 0xff)
625 629
626 630 log.info('using %s as Python library name' % pythonlib)
627 631 with open('mercurial/hgpythonlib.h', 'wb') as f:
628 632 f.write(b'/* this file is autogenerated by setup.py */\n')
629 633 f.write(b'#define HGPYTHONLIB "%s"\n' % pythonlib)
630 634 objects = self.compiler.compile(['mercurial/exewrapper.c'],
631 635 output_dir=self.build_temp)
632 636 dir = os.path.dirname(self.get_ext_fullpath('dummy'))
633 637 self.hgtarget = os.path.join(dir, 'hg')
634 638 self.compiler.link_executable(objects, self.hgtarget,
635 639 libraries=[],
636 640 output_dir=self.build_temp)
637 641 if self.long_paths_support:
638 642 self.addlongpathsmanifest()
639 643
640 644 def addlongpathsmanifest(self):
641 645 """Add manifest pieces so that hg.exe understands long paths
642 646
643 647 This is an EXPERIMENTAL feature, use with care.
644 648 To enable long paths support, one needs to do two things:
645 649 - build Mercurial with --long-paths-support option
646 650 - change HKLM\SYSTEM\CurrentControlSet\Control\FileSystem\
647 651 LongPathsEnabled to have value 1.
648 652
649 653 Please ignore 'warning 81010002: Unrecognized Element "longPathAware"';
650 654 it happens because Mercurial uses mt.exe circa 2008, which is not
651 655 yet aware of long paths support in the manifest (I think so at least).
652 656 This does not stop mt.exe from embedding/merging the XML properly.
653 657
654 658 Why resource #1 should be used for .exe manifests? I don't know and
655 659 wasn't able to find an explanation for mortals. But it seems to work.
656 660 """
657 661 exefname = self.compiler.executable_filename(self.hgtarget)
658 662 fdauto, manfname = tempfile.mkstemp(suffix='.hg.exe.manifest')
659 663 os.close(fdauto)
660 664 with open(manfname, 'w') as f:
661 665 f.write(self.LONG_PATHS_MANIFEST)
662 666 log.info("long paths manifest is written to '%s'" % manfname)
663 667 inputresource = '-inputresource:%s;#1' % exefname
664 668 outputresource = '-outputresource:%s;#1' % exefname
665 669 log.info("running mt.exe to update hg.exe's manifest in-place")
666 670 # supplying both -manifest and -inputresource to mt.exe makes
667 671 # it merge the embedded and supplied manifests in the -outputresource
668 672 self.spawn(['mt.exe', '-nologo', '-manifest', manfname,
669 673 inputresource, outputresource])
670 674 log.info("done updating hg.exe's manifest")
671 675 os.remove(manfname)
672 676
673 677 @property
674 678 def hgexepath(self):
675 679 dir = os.path.dirname(self.get_ext_fullpath('dummy'))
676 680 return os.path.join(self.build_temp, dir, 'hg.exe')
677 681
678 682 class hginstall(install):
679 683
680 684 user_options = install.user_options + [
681 685 ('old-and-unmanageable', None,
682 686 'noop, present for eggless setuptools compat'),
683 687 ('single-version-externally-managed', None,
684 688 'noop, present for eggless setuptools compat'),
685 689 ]
686 690
687 691 # Also helps setuptools not be sad while we refuse to create eggs.
688 692 single_version_externally_managed = True
689 693
690 694 def get_sub_commands(self):
691 695 # Screen out egg related commands to prevent egg generation. But allow
692 696 # mercurial.egg-info generation, since that is part of modern
693 697 # packaging.
694 698 excl = set(['bdist_egg'])
695 699 return filter(lambda x: x not in excl, install.get_sub_commands(self))
696 700
697 701 class hginstalllib(install_lib):
698 702 '''
699 703 This is a specialization of install_lib that replaces the copy_file used
700 704 there so that it supports setting the mode of files after copying them,
701 705 instead of just preserving the mode that the files originally had. If your
702 706 system has a umask of something like 027, preserving the permissions when
703 707 copying will lead to a broken install.
704 708
705 709 Note that just passing keep_permissions=False to copy_file would be
706 710 insufficient, as it might still be applying a umask.
707 711 '''
708 712
709 713 def run(self):
710 714 realcopyfile = file_util.copy_file
711 715 def copyfileandsetmode(*args, **kwargs):
712 716 src, dst = args[0], args[1]
713 717 dst, copied = realcopyfile(*args, **kwargs)
714 718 if copied:
715 719 st = os.stat(src)
716 720 # Persist executable bit (apply it to group and other if user
717 721 # has it)
718 722 if st[stat.ST_MODE] & stat.S_IXUSR:
719 723 setmode = int('0755', 8)
720 724 else:
721 725 setmode = int('0644', 8)
722 726 m = stat.S_IMODE(st[stat.ST_MODE])
723 727 m = (m & ~int('0777', 8)) | setmode
724 728 os.chmod(dst, m)
725 729 file_util.copy_file = copyfileandsetmode
726 730 try:
727 731 install_lib.run(self)
728 732 finally:
729 733 file_util.copy_file = realcopyfile
730 734
731 735 class hginstallscripts(install_scripts):
732 736 '''
733 737 This is a specialization of install_scripts that replaces the @LIBDIR@ with
734 738 the configured directory for modules. If possible, the path is made relative
735 739 to the directory for scripts.
736 740 '''
737 741
738 742 def initialize_options(self):
739 743 install_scripts.initialize_options(self)
740 744
741 745 self.install_lib = None
742 746
743 747 def finalize_options(self):
744 748 install_scripts.finalize_options(self)
745 749 self.set_undefined_options('install',
746 750 ('install_lib', 'install_lib'))
747 751
748 752 def run(self):
749 753 install_scripts.run(self)
750 754
751 755 # It only makes sense to replace @LIBDIR@ with the install path if
752 756 # the install path is known. For wheels, the logic below calculates
753 757 # the libdir to be "../..". This is because the internal layout of a
754 758 # wheel archive looks like:
755 759 #
756 760 # mercurial-3.6.1.data/scripts/hg
757 761 # mercurial/__init__.py
758 762 #
759 763 # When installing wheels, the subdirectories of the "<pkg>.data"
760 764 # directory are translated to system local paths and files therein
761 765 # are copied in place. The mercurial/* files are installed into the
762 766 # site-packages directory. However, the site-packages directory
763 767 # isn't known until wheel install time. This means we have no clue
764 768 # at wheel generation time what the installed site-packages directory
765 769 # will be. And, wheels don't appear to provide the ability to register
766 770 # custom code to run during wheel installation. This all means that
767 771 # we can't reliably set the libdir in wheels: the default behavior
768 772 # of looking in sys.path must do.
769 773
770 774 if (os.path.splitdrive(self.install_dir)[0] !=
771 775 os.path.splitdrive(self.install_lib)[0]):
772 776 # can't make relative paths from one drive to another, so use an
773 777 # absolute path instead
774 778 libdir = self.install_lib
775 779 else:
776 780 common = os.path.commonprefix((self.install_dir, self.install_lib))
777 781 rest = self.install_dir[len(common):]
778 782 uplevel = len([n for n in os.path.split(rest) if n])
779 783
780 784 libdir = uplevel * ('..' + os.sep) + self.install_lib[len(common):]
781 785
782 786 for outfile in self.outfiles:
783 787 with open(outfile, 'rb') as fp:
784 788 data = fp.read()
785 789
786 790 # skip binary files
787 791 if b'\0' in data:
788 792 continue
789 793
790 794 # During local installs, the shebang will be rewritten to the final
791 795 # install path. During wheel packaging, the shebang has a special
792 796 # value.
793 797 if data.startswith(b'#!python'):
794 798 log.info('not rewriting @LIBDIR@ in %s because install path '
795 799 'not known' % outfile)
796 800 continue
797 801
798 802 data = data.replace(b'@LIBDIR@', libdir.encode(libdir_escape))
799 803 with open(outfile, 'wb') as fp:
800 804 fp.write(data)
801 805
802 806 cmdclass = {'build': hgbuild,
803 807 'build_mo': hgbuildmo,
804 808 'build_ext': hgbuildext,
805 809 'build_py': hgbuildpy,
806 810 'build_scripts': hgbuildscripts,
807 811 'build_hgextindex': buildhgextindex,
808 812 'install': hginstall,
809 813 'install_lib': hginstalllib,
810 814 'install_scripts': hginstallscripts,
811 815 'build_hgexe': buildhgexe,
812 816 }
813 817
814 818 packages = ['mercurial',
815 819 'mercurial.cext',
816 820 'mercurial.cffi',
817 821 'mercurial.hgweb',
818 822 'mercurial.pure',
819 823 'mercurial.thirdparty',
820 824 'mercurial.thirdparty.attr',
821 825 'mercurial.thirdparty.cbor',
822 826 'mercurial.thirdparty.cbor.cbor2',
823 827 'mercurial.thirdparty.zope',
824 828 'mercurial.thirdparty.zope.interface',
825 829 'mercurial.utils',
826 830 'mercurial.revlogutils',
827 831 'mercurial.testing',
828 832 'hgext', 'hgext.convert', 'hgext.fsmonitor',
829 833 'hgext.fastannotate',
830 834 'hgext.fsmonitor.pywatchman',
831 835 'hgext.infinitepush',
832 836 'hgext.highlight',
833 837 'hgext.largefiles', 'hgext.lfs', 'hgext.narrow',
834 838 'hgext.zeroconf', 'hgext3rd',
835 839 'hgdemandimport']
836 840 if sys.version_info[0] == 2:
837 841 packages.extend(['mercurial.thirdparty.concurrent',
838 842 'mercurial.thirdparty.concurrent.futures'])
839 843
840 844 common_depends = ['mercurial/bitmanipulation.h',
841 845 'mercurial/compat.h',
842 846 'mercurial/cext/util.h']
843 847 common_include_dirs = ['mercurial']
844 848
845 849 osutil_cflags = []
846 850 osutil_ldflags = []
847 851
848 852 # platform specific macros
849 853 for plat, func in [('bsd', 'setproctitle')]:
850 854 if re.search(plat, sys.platform) and hasfunction(new_compiler(), func):
851 855 osutil_cflags.append('-DHAVE_%s' % func.upper())
852 856
853 857 for plat, macro, code in [
854 858 ('bsd|darwin', 'BSD_STATFS', '''
855 859 #include <sys/param.h>
856 860 #include <sys/mount.h>
857 861 int main() { struct statfs s; return sizeof(s.f_fstypename); }
858 862 '''),
859 863 ('linux', 'LINUX_STATFS', '''
860 864 #include <linux/magic.h>
861 865 #include <sys/vfs.h>
862 866 int main() { struct statfs s; return sizeof(s.f_type); }
863 867 '''),
864 868 ]:
865 869 if re.search(plat, sys.platform) and cancompile(new_compiler(), code):
866 870 osutil_cflags.append('-DHAVE_%s' % macro)
867 871
868 872 if sys.platform == 'darwin':
869 873 osutil_ldflags += ['-framework', 'ApplicationServices']
870 874
871 875 xdiff_srcs = [
872 876 'mercurial/thirdparty/xdiff/xdiffi.c',
873 877 'mercurial/thirdparty/xdiff/xprepare.c',
874 878 'mercurial/thirdparty/xdiff/xutils.c',
875 879 ]
876 880
877 881 xdiff_headers = [
878 882 'mercurial/thirdparty/xdiff/xdiff.h',
879 883 'mercurial/thirdparty/xdiff/xdiffi.h',
880 884 'mercurial/thirdparty/xdiff/xinclude.h',
881 885 'mercurial/thirdparty/xdiff/xmacros.h',
882 886 'mercurial/thirdparty/xdiff/xprepare.h',
883 887 'mercurial/thirdparty/xdiff/xtypes.h',
884 888 'mercurial/thirdparty/xdiff/xutils.h',
885 889 ]
886 890
891 class RustExtension(Extension):
892 """A C Extension, conditionnally enhanced with Rust code.
893
894 if iswithrustextensions is False, does nothing else than plain Extension
895 """
896
897 rusttargetdir = os.path.join('rust', 'target', 'release')
898
899 def __init__(self, mpath, sources, rustlibname, subcrate, **kw):
900 Extension.__init__(self, mpath, sources, **kw)
901 if not iswithrustextensions:
902 return
903 srcdir = self.rustsrcdir = os.path.join('rust', subcrate)
904 self.libraries.append(rustlibname)
905 self.extra_compile_args.append('-DWITH_RUST')
906
907 # adding Rust source and control files to depends so that the extension
908 # gets rebuilt if they've changed
909 self.depends.append(os.path.join(srcdir, 'Cargo.toml'))
910 cargo_lock = os.path.join(srcdir, 'Cargo.lock')
911 if os.path.exists(cargo_lock):
912 self.depends.append(cargo_lock)
913 for dirpath, subdir, fnames in os.walk(os.path.join(srcdir, 'src')):
914 self.depends.extend(os.path.join(dirpath, fname)
915 for fname in fnames
916 if os.path.splitext(fname)[1] == '.rs')
917
918 def rustbuild(self):
919 if not iswithrustextensions:
920 return
921 env = os.environ.copy()
922 if 'HGTEST_RESTOREENV' in env:
923 # Mercurial tests change HOME to a temporary directory,
924 # but, if installed with rustup, the Rust toolchain needs
925 # HOME to be correct (otherwise the 'no default toolchain'
926 # error message is issued and the build fails).
927 # This happens currently with test-hghave.t, which does
928 # invoke this build.
929
930 # Unix only fix (os.path.expanduser not really reliable if
931 # HOME is shadowed like this)
932 import pwd
933 env['HOME'] = pwd.getpwuid(os.getuid()).pw_dir
934
935 subprocess.check_call(['cargo', 'build', '-vv', '--release'],
936 env=env, cwd=self.rustsrcdir)
937 self.library_dirs.append(self.rusttargetdir)
938
887 939 extmodules = [
888 940 Extension('mercurial.cext.base85', ['mercurial/cext/base85.c'],
889 941 include_dirs=common_include_dirs,
890 942 depends=common_depends),
891 943 Extension('mercurial.cext.bdiff', ['mercurial/bdiff.c',
892 944 'mercurial/cext/bdiff.c'] + xdiff_srcs,
893 945 include_dirs=common_include_dirs,
894 946 depends=common_depends + ['mercurial/bdiff.h'] + xdiff_headers),
895 947 Extension('mercurial.cext.mpatch', ['mercurial/mpatch.c',
896 948 'mercurial/cext/mpatch.c'],
897 949 include_dirs=common_include_dirs,
898 950 depends=common_depends),
899 Extension('mercurial.cext.parsers', ['mercurial/cext/charencode.c',
900 'mercurial/cext/dirs.c',
901 'mercurial/cext/manifest.c',
902 'mercurial/cext/parsers.c',
903 'mercurial/cext/pathencode.c',
904 'mercurial/cext/revlog.c'],
905 include_dirs=common_include_dirs,
906 depends=common_depends + ['mercurial/cext/charencode.h']),
951 RustExtension('mercurial.cext.parsers', ['mercurial/cext/charencode.c',
952 'mercurial/cext/dirs.c',
953 'mercurial/cext/manifest.c',
954 'mercurial/cext/parsers.c',
955 'mercurial/cext/pathencode.c',
956 'mercurial/cext/revlog.c'],
957 'hgdirectffi',
958 'hg-direct-ffi',
959 include_dirs=common_include_dirs,
960 depends=common_depends + ['mercurial/cext/charencode.h',
961 'mercurial/rust/src/lib.rs',
962 'mercurial/rust/src/ancestors.rs',
963 'mercurial/rust/src/cpython.rs']),
907 964 Extension('mercurial.cext.osutil', ['mercurial/cext/osutil.c'],
908 965 include_dirs=common_include_dirs,
909 966 extra_compile_args=osutil_cflags,
910 967 extra_link_args=osutil_ldflags,
911 968 depends=common_depends),
912 969 Extension(
913 970 'mercurial.thirdparty.zope.interface._zope_interface_coptimizations', [
914 971 'mercurial/thirdparty/zope/interface/_zope_interface_coptimizations.c',
915 972 ]),
916 973 Extension('hgext.fsmonitor.pywatchman.bser',
917 974 ['hgext/fsmonitor/pywatchman/bser.c']),
918 975 ]
919 976
920 977 sys.path.insert(0, 'contrib/python-zstandard')
921 978 import setup_zstd
922 979 extmodules.append(setup_zstd.get_c_extension(
923 980 name='mercurial.zstd',
924 981 root=os.path.abspath(os.path.dirname(__file__))))
925 982
926 983 try:
927 984 from distutils import cygwinccompiler
928 985
929 986 # the -mno-cygwin option has been deprecated for years
930 987 mingw32compilerclass = cygwinccompiler.Mingw32CCompiler
931 988
932 989 class HackedMingw32CCompiler(cygwinccompiler.Mingw32CCompiler):
933 990 def __init__(self, *args, **kwargs):
934 991 mingw32compilerclass.__init__(self, *args, **kwargs)
935 992 for i in 'compiler compiler_so linker_exe linker_so'.split():
936 993 try:
937 994 getattr(self, i).remove('-mno-cygwin')
938 995 except ValueError:
939 996 pass
940 997
941 998 cygwinccompiler.Mingw32CCompiler = HackedMingw32CCompiler
942 999 except ImportError:
943 1000 # the cygwinccompiler package is not available on some Python
944 1001 # distributions like the ones from the optware project for Synology
945 1002 # DiskStation boxes
946 1003 class HackedMingw32CCompiler(object):
947 1004 pass
948 1005
949 1006 if os.name == 'nt':
950 1007 # Allow compiler/linker flags to be added to Visual Studio builds. Passing
951 1008 # extra_link_args to distutils.extensions.Extension() doesn't have any
952 1009 # effect.
953 1010 from distutils import msvccompiler
954 1011
955 1012 msvccompilerclass = msvccompiler.MSVCCompiler
956 1013
957 1014 class HackedMSVCCompiler(msvccompiler.MSVCCompiler):
958 1015 def initialize(self):
959 1016 msvccompilerclass.initialize(self)
960 1017 # "warning LNK4197: export 'func' specified multiple times"
961 1018 self.ldflags_shared.append('/ignore:4197')
962 1019 self.ldflags_shared_debug.append('/ignore:4197')
963 1020
964 1021 msvccompiler.MSVCCompiler = HackedMSVCCompiler
965 1022
966 1023 packagedata = {'mercurial': ['locale/*/LC_MESSAGES/hg.mo',
967 1024 'help/*.txt',
968 1025 'help/internals/*.txt',
969 1026 'default.d/*.rc',
970 1027 'dummycert.pem']}
971 1028
972 1029 def ordinarypath(p):
973 1030 return p and p[0] != '.' and p[-1] != '~'
974 1031
975 1032 for root in ('templates',):
976 1033 for curdir, dirs, files in os.walk(os.path.join('mercurial', root)):
977 1034 curdir = curdir.split(os.sep, 1)[1]
978 1035 dirs[:] = filter(ordinarypath, dirs)
979 1036 for f in filter(ordinarypath, files):
980 1037 f = os.path.join(curdir, f)
981 1038 packagedata['mercurial'].append(f)
982 1039
983 1040 datafiles = []
984 1041
985 1042 # distutils expects version to be str/unicode. Converting it to
986 1043 # unicode on Python 2 still works because it won't contain any
987 1044 # non-ascii bytes and will be implicitly converted back to bytes
988 1045 # when operated on.
989 1046 assert isinstance(version, bytes)
990 1047 setupversion = version.decode('ascii')
991 1048
992 1049 extra = {}
993 1050
994 1051 if issetuptools:
995 1052 extra['python_requires'] = supportedpy
996 1053 if py2exeloaded:
997 1054 extra['console'] = [
998 1055 {'script':'hg',
999 1056 'copyright':'Copyright (C) 2005-2018 Matt Mackall and others',
1000 1057 'product_version':version}]
1001 1058 # sub command of 'build' because 'py2exe' does not handle sub_commands
1002 1059 build.sub_commands.insert(0, ('build_hgextindex', None))
1003 1060 # put dlls in sub directory so that they won't pollute PATH
1004 1061 extra['zipfile'] = 'lib/library.zip'
1005 1062
1006 1063 if os.name == 'nt':
1007 1064 # Windows binary file versions for exe/dll files must have the
1008 1065 # form W.X.Y.Z, where W,X,Y,Z are numbers in the range 0..65535
1009 1066 setupversion = version.split(b'+', 1)[0]
1010 1067
1011 1068 if sys.platform == 'darwin' and os.path.exists('/usr/bin/xcodebuild'):
1012 1069 version = runcmd(['/usr/bin/xcodebuild', '-version'], {})[1].splitlines()
1013 1070 if version:
1014 1071 version = version[0]
1015 1072 if sys.version_info[0] == 3:
1016 1073 version = version.decode('utf-8')
1017 1074 xcode4 = (version.startswith('Xcode') and
1018 1075 StrictVersion(version.split()[1]) >= StrictVersion('4.0'))
1019 1076 xcode51 = re.match(r'^Xcode\s+5\.1', version) is not None
1020 1077 else:
1021 1078 # xcodebuild returns empty on OS X Lion with XCode 4.3 not
1022 1079 # installed, but instead with only command-line tools. Assume
1023 1080 # that only happens on >= Lion, thus no PPC support.
1024 1081 xcode4 = True
1025 1082 xcode51 = False
1026 1083
1027 1084 # XCode 4.0 dropped support for ppc architecture, which is hardcoded in
1028 1085 # distutils.sysconfig
1029 1086 if xcode4:
1030 1087 os.environ['ARCHFLAGS'] = ''
1031 1088
1032 1089 # XCode 5.1 changes clang such that it now fails to compile if the
1033 1090 # -mno-fused-madd flag is passed, but the version of Python shipped with
1034 1091 # OS X 10.9 Mavericks includes this flag. This causes problems in all
1035 1092 # C extension modules, and a bug has been filed upstream at
1036 1093 # http://bugs.python.org/issue21244. We also need to patch this here
1037 1094 # so Mercurial can continue to compile in the meantime.
1038 1095 if xcode51:
1039 1096 cflags = get_config_var('CFLAGS')
1040 1097 if cflags and re.search(r'-mno-fused-madd\b', cflags) is not None:
1041 1098 os.environ['CFLAGS'] = (
1042 1099 os.environ.get('CFLAGS', '') + ' -Qunused-arguments')
1043 1100
1044 1101 setup(name='mercurial',
1045 1102 version=setupversion,
1046 1103 author='Matt Mackall and many others',
1047 1104 author_email='mercurial@mercurial-scm.org',
1048 1105 url='https://mercurial-scm.org/',
1049 1106 download_url='https://mercurial-scm.org/release/',
1050 1107 description=('Fast scalable distributed SCM (revision control, version '
1051 1108 'control) system'),
1052 1109 long_description=('Mercurial is a distributed SCM tool written in Python.'
1053 1110 ' It is used by a number of large projects that require'
1054 1111 ' fast, reliable distributed revision control, such as '
1055 1112 'Mozilla.'),
1056 1113 license='GNU GPLv2 or any later version',
1057 1114 classifiers=[
1058 1115 'Development Status :: 6 - Mature',
1059 1116 'Environment :: Console',
1060 1117 'Intended Audience :: Developers',
1061 1118 'Intended Audience :: System Administrators',
1062 1119 'License :: OSI Approved :: GNU General Public License (GPL)',
1063 1120 'Natural Language :: Danish',
1064 1121 'Natural Language :: English',
1065 1122 'Natural Language :: German',
1066 1123 'Natural Language :: Italian',
1067 1124 'Natural Language :: Japanese',
1068 1125 'Natural Language :: Portuguese (Brazilian)',
1069 1126 'Operating System :: Microsoft :: Windows',
1070 1127 'Operating System :: OS Independent',
1071 1128 'Operating System :: POSIX',
1072 1129 'Programming Language :: C',
1073 1130 'Programming Language :: Python',
1074 1131 'Topic :: Software Development :: Version Control',
1075 1132 ],
1076 1133 scripts=scripts,
1077 1134 packages=packages,
1078 1135 ext_modules=extmodules,
1079 1136 data_files=datafiles,
1080 1137 package_data=packagedata,
1081 1138 cmdclass=cmdclass,
1082 1139 distclass=hgdist,
1083 1140 options={
1084 1141 'py2exe': {
1085 1142 'packages': [
1086 1143 'hgdemandimport',
1087 1144 'hgext',
1088 1145 'email',
1089 1146 # implicitly imported per module policy
1090 1147 # (cffi wouldn't be used as a frozen exe)
1091 1148 'mercurial.cext',
1092 1149 #'mercurial.cffi',
1093 1150 'mercurial.pure',
1094 1151 ],
1095 1152 },
1096 1153 'bdist_mpkg': {
1097 1154 'zipdist': False,
1098 1155 'license': 'COPYING',
1099 1156 'readme': 'contrib/packaging/macosx/Readme.html',
1100 1157 'welcome': 'contrib/packaging/macosx/Welcome.html',
1101 1158 },
1102 1159 },
1103 1160 **extra)
General Comments 0
You need to be logged in to leave comments. Login now