##// END OF EJS Templates
cext: split character encoding functions to new compilation unit...
Yuya Nishihara -
r33752:e9996bd7 default
parent child Browse files
Show More
This diff has been collapsed as it changes many lines, (796 lines changed) Show them Hide them
@@ -1,5 +1,5
1 1 /*
2 parsers.c - efficient content parsing
2 charencode.c - miscellaneous character encoding
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
@@ -8,26 +8,8
8 8 */
9 9
10 10 #include <Python.h>
11 #include <ctype.h>
12 #include <stddef.h>
13 #include <string.h>
14 11
15 12 #include "util.h"
16 #include "bitmanipulation.h"
17
18 #ifdef IS_PY3K
19 /* The mapping of Python types is meant to be temporary to get Python
20 * 3 to compile. We should remove this once Python 3 support is fully
21 * supported and proper types are used in the extensions themselves. */
22 #define PyInt_Type PyLong_Type
23 #define PyInt_Check PyLong_Check
24 #define PyInt_FromLong PyLong_FromLong
25 #define PyInt_FromSsize_t PyLong_FromSsize_t
26 #define PyInt_AS_LONG PyLong_AS_LONG
27 #define PyInt_AsLong PyLong_AsLong
28 #endif
29
30 static const char *const versionerrortext = "Python minor version mismatch";
31 13
32 14 static const char lowertable[128] = {
33 15 '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
@@ -139,7 +121,7 quit:
139 121 return ret;
140 122 }
141 123
142 static PyObject *asciilower(PyObject *self, PyObject *args)
124 PyObject *asciilower(PyObject *self, PyObject *args)
143 125 {
144 126 PyObject *str_obj;
145 127 if (!PyArg_ParseTuple(args, "O!:asciilower", &PyBytes_Type, &str_obj))
@@ -147,7 +129,7 static PyObject *asciilower(PyObject *se
147 129 return _asciitransform(str_obj, lowertable, NULL);
148 130 }
149 131
150 static PyObject *asciiupper(PyObject *self, PyObject *args)
132 PyObject *asciiupper(PyObject *self, PyObject *args)
151 133 {
152 134 PyObject *str_obj;
153 135 if (!PyArg_ParseTuple(args, "O!:asciiupper", &PyBytes_Type, &str_obj))
@@ -155,17 +137,7 static PyObject *asciiupper(PyObject *se
155 137 return _asciitransform(str_obj, uppertable, NULL);
156 138 }
157 139
158 static PyObject *dict_new_presized(PyObject *self, PyObject *args)
159 {
160 Py_ssize_t expected_size;
161
162 if (!PyArg_ParseTuple(args, "n:make_presized_dict", &expected_size))
163 return NULL;
164
165 return _dict_new_presized(expected_size);
166 }
167
168 static PyObject *make_file_foldmap(PyObject *self, PyObject *args)
140 PyObject *make_file_foldmap(PyObject *self, PyObject *args)
169 141 {
170 142 PyObject *dmap, *spec_obj, *normcase_fallback;
171 143 PyObject *file_foldmap = NULL;
@@ -235,763 +207,3 quit:
235 207 Py_XDECREF(file_foldmap);
236 208 return NULL;
237 209 }
238
239 /*
240 * This code assumes that a manifest is stitched together with newline
241 * ('\n') characters.
242 */
243 static PyObject *parse_manifest(PyObject *self, PyObject *args)
244 {
245 PyObject *mfdict, *fdict;
246 char *str, *start, *end;
247 int len;
248
249 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
250 &PyDict_Type, &mfdict,
251 &PyDict_Type, &fdict,
252 &str, &len))
253 goto quit;
254
255 start = str;
256 end = str + len;
257 while (start < end) {
258 PyObject *file = NULL, *node = NULL;
259 PyObject *flags = NULL;
260 char *zero = NULL, *newline = NULL;
261 ptrdiff_t nlen;
262
263 zero = memchr(start, '\0', end - start);
264 if (!zero) {
265 PyErr_SetString(PyExc_ValueError,
266 "manifest entry has no separator");
267 goto quit;
268 }
269
270 newline = memchr(zero + 1, '\n', end - (zero + 1));
271 if (!newline) {
272 PyErr_SetString(PyExc_ValueError,
273 "manifest contains trailing garbage");
274 goto quit;
275 }
276
277 file = PyBytes_FromStringAndSize(start, zero - start);
278
279 if (!file)
280 goto bail;
281
282 nlen = newline - zero - 1;
283
284 node = unhexlify(zero + 1, nlen > 40 ? 40 : (int)nlen);
285 if (!node)
286 goto bail;
287
288 if (nlen > 40) {
289 flags = PyBytes_FromStringAndSize(zero + 41,
290 nlen - 40);
291 if (!flags)
292 goto bail;
293
294 if (PyDict_SetItem(fdict, file, flags) == -1)
295 goto bail;
296 }
297
298 if (PyDict_SetItem(mfdict, file, node) == -1)
299 goto bail;
300
301 start = newline + 1;
302
303 Py_XDECREF(flags);
304 Py_XDECREF(node);
305 Py_XDECREF(file);
306 continue;
307 bail:
308 Py_XDECREF(flags);
309 Py_XDECREF(node);
310 Py_XDECREF(file);
311 goto quit;
312 }
313
314 Py_INCREF(Py_None);
315 return Py_None;
316 quit:
317 return NULL;
318 }
319
320 static inline dirstateTupleObject *make_dirstate_tuple(char state, int mode,
321 int size, int mtime)
322 {
323 dirstateTupleObject *t = PyObject_New(dirstateTupleObject,
324 &dirstateTupleType);
325 if (!t)
326 return NULL;
327 t->state = state;
328 t->mode = mode;
329 t->size = size;
330 t->mtime = mtime;
331 return t;
332 }
333
334 static PyObject *dirstate_tuple_new(PyTypeObject *subtype, PyObject *args,
335 PyObject *kwds)
336 {
337 /* We do all the initialization here and not a tp_init function because
338 * dirstate_tuple is immutable. */
339 dirstateTupleObject *t;
340 char state;
341 int size, mode, mtime;
342 if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime))
343 return NULL;
344
345 t = (dirstateTupleObject *)subtype->tp_alloc(subtype, 1);
346 if (!t)
347 return NULL;
348 t->state = state;
349 t->mode = mode;
350 t->size = size;
351 t->mtime = mtime;
352
353 return (PyObject *)t;
354 }
355
356 static void dirstate_tuple_dealloc(PyObject *o)
357 {
358 PyObject_Del(o);
359 }
360
361 static Py_ssize_t dirstate_tuple_length(PyObject *o)
362 {
363 return 4;
364 }
365
366 static PyObject *dirstate_tuple_item(PyObject *o, Py_ssize_t i)
367 {
368 dirstateTupleObject *t = (dirstateTupleObject *)o;
369 switch (i) {
370 case 0:
371 return PyBytes_FromStringAndSize(&t->state, 1);
372 case 1:
373 return PyInt_FromLong(t->mode);
374 case 2:
375 return PyInt_FromLong(t->size);
376 case 3:
377 return PyInt_FromLong(t->mtime);
378 default:
379 PyErr_SetString(PyExc_IndexError, "index out of range");
380 return NULL;
381 }
382 }
383
384 static PySequenceMethods dirstate_tuple_sq = {
385 dirstate_tuple_length, /* sq_length */
386 0, /* sq_concat */
387 0, /* sq_repeat */
388 dirstate_tuple_item, /* sq_item */
389 0, /* sq_ass_item */
390 0, /* sq_contains */
391 0, /* sq_inplace_concat */
392 0 /* sq_inplace_repeat */
393 };
394
395 PyTypeObject dirstateTupleType = {
396 PyVarObject_HEAD_INIT(NULL, 0)
397 "dirstate_tuple", /* tp_name */
398 sizeof(dirstateTupleObject),/* tp_basicsize */
399 0, /* tp_itemsize */
400 (destructor)dirstate_tuple_dealloc, /* tp_dealloc */
401 0, /* tp_print */
402 0, /* tp_getattr */
403 0, /* tp_setattr */
404 0, /* tp_compare */
405 0, /* tp_repr */
406 0, /* tp_as_number */
407 &dirstate_tuple_sq, /* tp_as_sequence */
408 0, /* tp_as_mapping */
409 0, /* tp_hash */
410 0, /* tp_call */
411 0, /* tp_str */
412 0, /* tp_getattro */
413 0, /* tp_setattro */
414 0, /* tp_as_buffer */
415 Py_TPFLAGS_DEFAULT, /* tp_flags */
416 "dirstate tuple", /* tp_doc */
417 0, /* tp_traverse */
418 0, /* tp_clear */
419 0, /* tp_richcompare */
420 0, /* tp_weaklistoffset */
421 0, /* tp_iter */
422 0, /* tp_iternext */
423 0, /* tp_methods */
424 0, /* tp_members */
425 0, /* tp_getset */
426 0, /* tp_base */
427 0, /* tp_dict */
428 0, /* tp_descr_get */
429 0, /* tp_descr_set */
430 0, /* tp_dictoffset */
431 0, /* tp_init */
432 0, /* tp_alloc */
433 dirstate_tuple_new, /* tp_new */
434 };
435
436 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
437 {
438 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
439 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
440 char state, *cur, *str, *cpos;
441 int mode, size, mtime;
442 unsigned int flen, len, pos = 40;
443 int readlen;
444
445 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
446 &PyDict_Type, &dmap,
447 &PyDict_Type, &cmap,
448 &str, &readlen))
449 goto quit;
450
451 len = readlen;
452
453 /* read parents */
454 if (len < 40) {
455 PyErr_SetString(
456 PyExc_ValueError, "too little data for parents");
457 goto quit;
458 }
459
460 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
461 if (!parents)
462 goto quit;
463
464 /* read filenames */
465 while (pos >= 40 && pos < len) {
466 if (pos + 17 > len) {
467 PyErr_SetString(PyExc_ValueError,
468 "overflow in dirstate");
469 goto quit;
470 }
471 cur = str + pos;
472 /* unpack header */
473 state = *cur;
474 mode = getbe32(cur + 1);
475 size = getbe32(cur + 5);
476 mtime = getbe32(cur + 9);
477 flen = getbe32(cur + 13);
478 pos += 17;
479 cur += 17;
480 if (flen > len - pos) {
481 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
482 goto quit;
483 }
484
485 entry = (PyObject *)make_dirstate_tuple(state, mode, size,
486 mtime);
487 cpos = memchr(cur, 0, flen);
488 if (cpos) {
489 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
490 cname = PyBytes_FromStringAndSize(cpos + 1,
491 flen - (cpos - cur) - 1);
492 if (!fname || !cname ||
493 PyDict_SetItem(cmap, fname, cname) == -1 ||
494 PyDict_SetItem(dmap, fname, entry) == -1)
495 goto quit;
496 Py_DECREF(cname);
497 } else {
498 fname = PyBytes_FromStringAndSize(cur, flen);
499 if (!fname ||
500 PyDict_SetItem(dmap, fname, entry) == -1)
501 goto quit;
502 }
503 Py_DECREF(fname);
504 Py_DECREF(entry);
505 fname = cname = entry = NULL;
506 pos += flen;
507 }
508
509 ret = parents;
510 Py_INCREF(ret);
511 quit:
512 Py_XDECREF(fname);
513 Py_XDECREF(cname);
514 Py_XDECREF(entry);
515 Py_XDECREF(parents);
516 return ret;
517 }
518
519 /*
520 * Build a set of non-normal and other parent entries from the dirstate dmap
521 */
522 static PyObject *nonnormalotherparententries(PyObject *self, PyObject *args) {
523 PyObject *dmap, *fname, *v;
524 PyObject *nonnset = NULL, *otherpset = NULL, *result = NULL;
525 Py_ssize_t pos;
526
527 if (!PyArg_ParseTuple(args, "O!:nonnormalentries",
528 &PyDict_Type, &dmap))
529 goto bail;
530
531 nonnset = PySet_New(NULL);
532 if (nonnset == NULL)
533 goto bail;
534
535 otherpset = PySet_New(NULL);
536 if (otherpset == NULL)
537 goto bail;
538
539 pos = 0;
540 while (PyDict_Next(dmap, &pos, &fname, &v)) {
541 dirstateTupleObject *t;
542 if (!dirstate_tuple_check(v)) {
543 PyErr_SetString(PyExc_TypeError,
544 "expected a dirstate tuple");
545 goto bail;
546 }
547 t = (dirstateTupleObject *)v;
548
549 if (t->state == 'n' && t->size == -2) {
550 if (PySet_Add(otherpset, fname) == -1) {
551 goto bail;
552 }
553 }
554
555 if (t->state == 'n' && t->mtime != -1)
556 continue;
557 if (PySet_Add(nonnset, fname) == -1)
558 goto bail;
559 }
560
561 result = Py_BuildValue("(OO)", nonnset, otherpset);
562 if (result == NULL)
563 goto bail;
564 Py_DECREF(nonnset);
565 Py_DECREF(otherpset);
566 return result;
567 bail:
568 Py_XDECREF(nonnset);
569 Py_XDECREF(otherpset);
570 Py_XDECREF(result);
571 return NULL;
572 }
573
574 /*
575 * Efficiently pack a dirstate object into its on-disk format.
576 */
577 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
578 {
579 PyObject *packobj = NULL;
580 PyObject *map, *copymap, *pl, *mtime_unset = NULL;
581 Py_ssize_t nbytes, pos, l;
582 PyObject *k, *v = NULL, *pn;
583 char *p, *s;
584 int now;
585
586 if (!PyArg_ParseTuple(args, "O!O!Oi:pack_dirstate",
587 &PyDict_Type, &map, &PyDict_Type, &copymap,
588 &pl, &now))
589 return NULL;
590
591 if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) {
592 PyErr_SetString(PyExc_TypeError, "expected 2-element sequence");
593 return NULL;
594 }
595
596 /* Figure out how much we need to allocate. */
597 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
598 PyObject *c;
599 if (!PyBytes_Check(k)) {
600 PyErr_SetString(PyExc_TypeError, "expected string key");
601 goto bail;
602 }
603 nbytes += PyBytes_GET_SIZE(k) + 17;
604 c = PyDict_GetItem(copymap, k);
605 if (c) {
606 if (!PyBytes_Check(c)) {
607 PyErr_SetString(PyExc_TypeError,
608 "expected string key");
609 goto bail;
610 }
611 nbytes += PyBytes_GET_SIZE(c) + 1;
612 }
613 }
614
615 packobj = PyBytes_FromStringAndSize(NULL, nbytes);
616 if (packobj == NULL)
617 goto bail;
618
619 p = PyBytes_AS_STRING(packobj);
620
621 pn = PySequence_ITEM(pl, 0);
622 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
623 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
624 goto bail;
625 }
626 memcpy(p, s, l);
627 p += 20;
628 pn = PySequence_ITEM(pl, 1);
629 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
630 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
631 goto bail;
632 }
633 memcpy(p, s, l);
634 p += 20;
635
636 for (pos = 0; PyDict_Next(map, &pos, &k, &v); ) {
637 dirstateTupleObject *tuple;
638 char state;
639 int mode, size, mtime;
640 Py_ssize_t len, l;
641 PyObject *o;
642 char *t;
643
644 if (!dirstate_tuple_check(v)) {
645 PyErr_SetString(PyExc_TypeError,
646 "expected a dirstate tuple");
647 goto bail;
648 }
649 tuple = (dirstateTupleObject *)v;
650
651 state = tuple->state;
652 mode = tuple->mode;
653 size = tuple->size;
654 mtime = tuple->mtime;
655 if (state == 'n' && mtime == now) {
656 /* See pure/parsers.py:pack_dirstate for why we do
657 * this. */
658 mtime = -1;
659 mtime_unset = (PyObject *)make_dirstate_tuple(
660 state, mode, size, mtime);
661 if (!mtime_unset)
662 goto bail;
663 if (PyDict_SetItem(map, k, mtime_unset) == -1)
664 goto bail;
665 Py_DECREF(mtime_unset);
666 mtime_unset = NULL;
667 }
668 *p++ = state;
669 putbe32((uint32_t)mode, p);
670 putbe32((uint32_t)size, p + 4);
671 putbe32((uint32_t)mtime, p + 8);
672 t = p + 12;
673 p += 16;
674 len = PyBytes_GET_SIZE(k);
675 memcpy(p, PyBytes_AS_STRING(k), len);
676 p += len;
677 o = PyDict_GetItem(copymap, k);
678 if (o) {
679 *p++ = '\0';
680 l = PyBytes_GET_SIZE(o);
681 memcpy(p, PyBytes_AS_STRING(o), l);
682 p += l;
683 len += l + 1;
684 }
685 putbe32((uint32_t)len, t);
686 }
687
688 pos = p - PyBytes_AS_STRING(packobj);
689 if (pos != nbytes) {
690 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
691 (long)pos, (long)nbytes);
692 goto bail;
693 }
694
695 return packobj;
696 bail:
697 Py_XDECREF(mtime_unset);
698 Py_XDECREF(packobj);
699 Py_XDECREF(v);
700 return NULL;
701 }
702
703 #define BUMPED_FIX 1
704 #define USING_SHA_256 2
705 #define FM1_HEADER_SIZE (4 + 8 + 2 + 2 + 1 + 1 + 1)
706
707 static PyObject *readshas(
708 const char *source, unsigned char num, Py_ssize_t hashwidth)
709 {
710 int i;
711 PyObject *list = PyTuple_New(num);
712 if (list == NULL) {
713 return NULL;
714 }
715 for (i = 0; i < num; i++) {
716 PyObject *hash = PyBytes_FromStringAndSize(source, hashwidth);
717 if (hash == NULL) {
718 Py_DECREF(list);
719 return NULL;
720 }
721 PyTuple_SET_ITEM(list, i, hash);
722 source += hashwidth;
723 }
724 return list;
725 }
726
727 static PyObject *fm1readmarker(const char *databegin, const char *dataend,
728 uint32_t *msize)
729 {
730 const char *data = databegin;
731 const char *meta;
732
733 double mtime;
734 int16_t tz;
735 uint16_t flags;
736 unsigned char nsuccs, nparents, nmetadata;
737 Py_ssize_t hashwidth = 20;
738
739 PyObject *prec = NULL, *parents = NULL, *succs = NULL;
740 PyObject *metadata = NULL, *ret = NULL;
741 int i;
742
743 if (data + FM1_HEADER_SIZE > dataend) {
744 goto overflow;
745 }
746
747 *msize = getbe32(data);
748 data += 4;
749 mtime = getbefloat64(data);
750 data += 8;
751 tz = getbeint16(data);
752 data += 2;
753 flags = getbeuint16(data);
754 data += 2;
755
756 if (flags & USING_SHA_256) {
757 hashwidth = 32;
758 }
759
760 nsuccs = (unsigned char)(*data++);
761 nparents = (unsigned char)(*data++);
762 nmetadata = (unsigned char)(*data++);
763
764 if (databegin + *msize > dataend) {
765 goto overflow;
766 }
767 dataend = databegin + *msize; /* narrow down to marker size */
768
769 if (data + hashwidth > dataend) {
770 goto overflow;
771 }
772 prec = PyBytes_FromStringAndSize(data, hashwidth);
773 data += hashwidth;
774 if (prec == NULL) {
775 goto bail;
776 }
777
778 if (data + nsuccs * hashwidth > dataend) {
779 goto overflow;
780 }
781 succs = readshas(data, nsuccs, hashwidth);
782 if (succs == NULL) {
783 goto bail;
784 }
785 data += nsuccs * hashwidth;
786
787 if (nparents == 1 || nparents == 2) {
788 if (data + nparents * hashwidth > dataend) {
789 goto overflow;
790 }
791 parents = readshas(data, nparents, hashwidth);
792 if (parents == NULL) {
793 goto bail;
794 }
795 data += nparents * hashwidth;
796 } else {
797 parents = Py_None;
798 Py_INCREF(parents);
799 }
800
801 if (data + 2 * nmetadata > dataend) {
802 goto overflow;
803 }
804 meta = data + (2 * nmetadata);
805 metadata = PyTuple_New(nmetadata);
806 if (metadata == NULL) {
807 goto bail;
808 }
809 for (i = 0; i < nmetadata; i++) {
810 PyObject *tmp, *left = NULL, *right = NULL;
811 Py_ssize_t leftsize = (unsigned char)(*data++);
812 Py_ssize_t rightsize = (unsigned char)(*data++);
813 if (meta + leftsize + rightsize > dataend) {
814 goto overflow;
815 }
816 left = PyBytes_FromStringAndSize(meta, leftsize);
817 meta += leftsize;
818 right = PyBytes_FromStringAndSize(meta, rightsize);
819 meta += rightsize;
820 tmp = PyTuple_New(2);
821 if (!left || !right || !tmp) {
822 Py_XDECREF(left);
823 Py_XDECREF(right);
824 Py_XDECREF(tmp);
825 goto bail;
826 }
827 PyTuple_SET_ITEM(tmp, 0, left);
828 PyTuple_SET_ITEM(tmp, 1, right);
829 PyTuple_SET_ITEM(metadata, i, tmp);
830 }
831 ret = Py_BuildValue("(OOHO(di)O)", prec, succs, flags,
832 metadata, mtime, (int)tz * 60, parents);
833 goto bail; /* return successfully */
834
835 overflow:
836 PyErr_SetString(PyExc_ValueError, "overflow in obsstore");
837 bail:
838 Py_XDECREF(prec);
839 Py_XDECREF(succs);
840 Py_XDECREF(metadata);
841 Py_XDECREF(parents);
842 return ret;
843 }
844
845
846 static PyObject *fm1readmarkers(PyObject *self, PyObject *args) {
847 const char *data, *dataend;
848 int datalen;
849 Py_ssize_t offset, stop;
850 PyObject *markers = NULL;
851
852 if (!PyArg_ParseTuple(args, "s#nn", &data, &datalen, &offset, &stop)) {
853 return NULL;
854 }
855 dataend = data + datalen;
856 data += offset;
857 markers = PyList_New(0);
858 if (!markers) {
859 return NULL;
860 }
861 while (offset < stop) {
862 uint32_t msize;
863 int error;
864 PyObject *record = fm1readmarker(data, dataend, &msize);
865 if (!record) {
866 goto bail;
867 }
868 error = PyList_Append(markers, record);
869 Py_DECREF(record);
870 if (error) {
871 goto bail;
872 }
873 data += msize;
874 offset += msize;
875 }
876 return markers;
877 bail:
878 Py_DECREF(markers);
879 return NULL;
880 }
881
882 static char parsers_doc[] = "Efficient content parsing.";
883
884 PyObject *encodedir(PyObject *self, PyObject *args);
885 PyObject *pathencode(PyObject *self, PyObject *args);
886 PyObject *lowerencode(PyObject *self, PyObject *args);
887 PyObject *parse_index2(PyObject *self, PyObject *args);
888
889 static PyMethodDef methods[] = {
890 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
891 {"nonnormalotherparententries", nonnormalotherparententries, METH_VARARGS,
892 "create a set containing non-normal and other parent entries of given "
893 "dirstate\n"},
894 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
895 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
896 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
897 {"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"},
898 {"asciiupper", asciiupper, METH_VARARGS, "uppercase an ASCII string\n"},
899 {"dict_new_presized", dict_new_presized, METH_VARARGS,
900 "construct a dict with an expected size\n"},
901 {"make_file_foldmap", make_file_foldmap, METH_VARARGS,
902 "make file foldmap\n"},
903 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
904 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
905 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
906 {"fm1readmarkers", fm1readmarkers, METH_VARARGS,
907 "parse v1 obsolete markers\n"},
908 {NULL, NULL}
909 };
910
911 void dirs_module_init(PyObject *mod);
912 void manifest_module_init(PyObject *mod);
913 void revlog_module_init(PyObject *mod);
914
915 static const int version = 1;
916
917 static void module_init(PyObject *mod)
918 {
919 PyModule_AddIntConstant(mod, "version", version);
920
921 /* This module constant has two purposes. First, it lets us unit test
922 * the ImportError raised without hard-coding any error text. This
923 * means we can change the text in the future without breaking tests,
924 * even across changesets without a recompile. Second, its presence
925 * can be used to determine whether the version-checking logic is
926 * present, which also helps in testing across changesets without a
927 * recompile. Note that this means the pure-Python version of parsers
928 * should not have this module constant. */
929 PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
930
931 dirs_module_init(mod);
932 manifest_module_init(mod);
933 revlog_module_init(mod);
934
935 if (PyType_Ready(&dirstateTupleType) < 0)
936 return;
937 Py_INCREF(&dirstateTupleType);
938 PyModule_AddObject(mod, "dirstatetuple",
939 (PyObject *)&dirstateTupleType);
940 }
941
942 static int check_python_version(void)
943 {
944 PyObject *sys = PyImport_ImportModule("sys"), *ver;
945 long hexversion;
946 if (!sys)
947 return -1;
948 ver = PyObject_GetAttrString(sys, "hexversion");
949 Py_DECREF(sys);
950 if (!ver)
951 return -1;
952 hexversion = PyInt_AsLong(ver);
953 Py_DECREF(ver);
954 /* sys.hexversion is a 32-bit number by default, so the -1 case
955 * should only occur in unusual circumstances (e.g. if sys.hexversion
956 * is manually set to an invalid value). */
957 if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
958 PyErr_Format(PyExc_ImportError, "%s: The Mercurial extension "
959 "modules were compiled with Python " PY_VERSION ", but "
960 "Mercurial is currently using Python with sys.hexversion=%ld: "
961 "Python %s\n at: %s", versionerrortext, hexversion,
962 Py_GetVersion(), Py_GetProgramFullPath());
963 return -1;
964 }
965 return 0;
966 }
967
968 #ifdef IS_PY3K
969 static struct PyModuleDef parsers_module = {
970 PyModuleDef_HEAD_INIT,
971 "parsers",
972 parsers_doc,
973 -1,
974 methods
975 };
976
977 PyMODINIT_FUNC PyInit_parsers(void)
978 {
979 PyObject *mod;
980
981 if (check_python_version() == -1)
982 return NULL;
983 mod = PyModule_Create(&parsers_module);
984 module_init(mod);
985 return mod;
986 }
987 #else
988 PyMODINIT_FUNC initparsers(void)
989 {
990 PyObject *mod;
991
992 if (check_python_version() == -1)
993 return;
994 mod = Py_InitModule3("parsers", methods, parsers_doc);
995 module_init(mod);
996 }
997 #endif
@@ -38,7 +38,7 typedef struct {
38 38 #define MANIFEST_NOT_SORTED -2
39 39 #define MANIFEST_MALFORMED -3
40 40
41 /* defined in parsers.c */
41 /* defined in charencode.c */
42 42 PyObject *unhexlify(const char *str, int len);
43 43
44 44 /* get the length of the path for a line */
@@ -29,131 +29,11
29 29
30 30 static const char *const versionerrortext = "Python minor version mismatch";
31 31
32 static const char lowertable[128] = {
33 '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
34 '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
35 '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
36 '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
37 '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
38 '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
39 '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
40 '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
41 '\x40',
42 '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', /* A-G */
43 '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', /* H-O */
44 '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', /* P-W */
45 '\x78', '\x79', '\x7a', /* X-Z */
46 '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
47 '\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67',
48 '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f',
49 '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77',
50 '\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f'
51 };
52
53 static const char uppertable[128] = {
54 '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
55 '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
56 '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
57 '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
58 '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
59 '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
60 '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
61 '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
62 '\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47',
63 '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f',
64 '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57',
65 '\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
66 '\x60',
67 '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', /* a-g */
68 '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f', /* h-o */
69 '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57', /* p-w */
70 '\x58', '\x59', '\x5a', /* x-z */
71 '\x7b', '\x7c', '\x7d', '\x7e', '\x7f'
72 };
73
74 /*
75 * Turn a hex-encoded string into binary.
76 */
77 PyObject *unhexlify(const char *str, int len)
78 {
79 PyObject *ret;
80 char *d;
81 int i;
82
83 ret = PyBytes_FromStringAndSize(NULL, len / 2);
84
85 if (!ret)
86 return NULL;
87
88 d = PyBytes_AsString(ret);
89
90 for (i = 0; i < len;) {
91 int hi = hexdigit(str, i++);
92 int lo = hexdigit(str, i++);
93 *d++ = (hi << 4) | lo;
94 }
95
96 return ret;
97 }
98
99 static inline PyObject *_asciitransform(PyObject *str_obj,
100 const char table[128],
101 PyObject *fallback_fn)
102 {
103 char *str, *newstr;
104 Py_ssize_t i, len;
105 PyObject *newobj = NULL;
106 PyObject *ret = NULL;
107
108 str = PyBytes_AS_STRING(str_obj);
109 len = PyBytes_GET_SIZE(str_obj);
110
111 newobj = PyBytes_FromStringAndSize(NULL, len);
112 if (!newobj)
113 goto quit;
114
115 newstr = PyBytes_AS_STRING(newobj);
116
117 for (i = 0; i < len; i++) {
118 char c = str[i];
119 if (c & 0x80) {
120 if (fallback_fn != NULL) {
121 ret = PyObject_CallFunctionObjArgs(fallback_fn,
122 str_obj, NULL);
123 } else {
124 PyObject *err = PyUnicodeDecodeError_Create(
125 "ascii", str, len, i, (i + 1),
126 "unexpected code byte");
127 PyErr_SetObject(PyExc_UnicodeDecodeError, err);
128 Py_XDECREF(err);
129 }
130 goto quit;
131 }
132 newstr[i] = table[(unsigned char)c];
133 }
134
135 ret = newobj;
136 Py_INCREF(ret);
137 quit:
138 Py_XDECREF(newobj);
139 return ret;
140 }
141
142 static PyObject *asciilower(PyObject *self, PyObject *args)
143 {
144 PyObject *str_obj;
145 if (!PyArg_ParseTuple(args, "O!:asciilower", &PyBytes_Type, &str_obj))
146 return NULL;
147 return _asciitransform(str_obj, lowertable, NULL);
148 }
149
150 static PyObject *asciiupper(PyObject *self, PyObject *args)
151 {
152 PyObject *str_obj;
153 if (!PyArg_ParseTuple(args, "O!:asciiupper", &PyBytes_Type, &str_obj))
154 return NULL;
155 return _asciitransform(str_obj, uppertable, NULL);
156 }
32 /* defined in charencode.c */
33 PyObject *unhexlify(const char *str, int len);
34 PyObject *asciilower(PyObject *self, PyObject *args);
35 PyObject *asciiupper(PyObject *self, PyObject *args);
36 PyObject *make_file_foldmap(PyObject *self, PyObject *args);
157 37
158 38 static PyObject *dict_new_presized(PyObject *self, PyObject *args)
159 39 {
@@ -165,77 +45,6 static PyObject *dict_new_presized(PyObj
165 45 return _dict_new_presized(expected_size);
166 46 }
167 47
168 static PyObject *make_file_foldmap(PyObject *self, PyObject *args)
169 {
170 PyObject *dmap, *spec_obj, *normcase_fallback;
171 PyObject *file_foldmap = NULL;
172 enum normcase_spec spec;
173 PyObject *k, *v;
174 dirstateTupleObject *tuple;
175 Py_ssize_t pos = 0;
176 const char *table;
177
178 if (!PyArg_ParseTuple(args, "O!O!O!:make_file_foldmap",
179 &PyDict_Type, &dmap,
180 &PyInt_Type, &spec_obj,
181 &PyFunction_Type, &normcase_fallback))
182 goto quit;
183
184 spec = (int)PyInt_AS_LONG(spec_obj);
185 switch (spec) {
186 case NORMCASE_LOWER:
187 table = lowertable;
188 break;
189 case NORMCASE_UPPER:
190 table = uppertable;
191 break;
192 case NORMCASE_OTHER:
193 table = NULL;
194 break;
195 default:
196 PyErr_SetString(PyExc_TypeError, "invalid normcasespec");
197 goto quit;
198 }
199
200 /* Add some more entries to deal with additions outside this
201 function. */
202 file_foldmap = _dict_new_presized((PyDict_Size(dmap) / 10) * 11);
203 if (file_foldmap == NULL)
204 goto quit;
205
206 while (PyDict_Next(dmap, &pos, &k, &v)) {
207 if (!dirstate_tuple_check(v)) {
208 PyErr_SetString(PyExc_TypeError,
209 "expected a dirstate tuple");
210 goto quit;
211 }
212
213 tuple = (dirstateTupleObject *)v;
214 if (tuple->state != 'r') {
215 PyObject *normed;
216 if (table != NULL) {
217 normed = _asciitransform(k, table,
218 normcase_fallback);
219 } else {
220 normed = PyObject_CallFunctionObjArgs(
221 normcase_fallback, k, NULL);
222 }
223
224 if (normed == NULL)
225 goto quit;
226 if (PyDict_SetItem(file_foldmap, normed, k) == -1) {
227 Py_DECREF(normed);
228 goto quit;
229 }
230 Py_DECREF(normed);
231 }
232 }
233 return file_foldmap;
234 quit:
235 Py_XDECREF(file_foldmap);
236 return NULL;
237 }
238
239 48 /*
240 49 * This code assumes that a manifest is stitched together with newline
241 50 * ('\n') characters.
@@ -760,7 +760,8 extmodules = [
760 760 'mercurial/cext/mpatch.c'],
761 761 include_dirs=common_include_dirs,
762 762 depends=common_depends),
763 Extension('mercurial.cext.parsers', ['mercurial/cext/dirs.c',
763 Extension('mercurial.cext.parsers', ['mercurial/cext/charencode.c',
764 'mercurial/cext/dirs.c',
764 765 'mercurial/cext/manifest.c',
765 766 'mercurial/cext/parsers.c',
766 767 'mercurial/cext/pathencode.c',
General Comments 0
You need to be logged in to leave comments. Login now