upstream/mercurial-mirror Commit - r16437:d126a0d1

util.h: replace ntohl/htonl with get/putbe32

Matt Mackall -

r16437:d126a0d1 default

parent child

mercurial/bdiff.c

0 +3 -5

              /*
               bdiff.c - efficient binary diff extension for Mercurial
               Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
               This software may be used and distributed according to the terms of
               the GNU General Public License, incorporated herein by reference.
               Based roughly on Python difflib
              */
              #include <Python.h>
              #include <stdlib.h>
              #include <string.h>
              #include <limits.h>
              #include "util.h"
              struct line {
              	int hash, len, n, e;
              	const char *l;
              };
              struct pos {
              	int pos, len;
              };
              struct hunk;
              struct hunk {
              	int a1, a2, b1, b2;
              	struct hunk *next;
              };
              static int splitlines(const char *a, int len, struct line **lr)
              {
              	unsigned hash;
              	int i;
              	const char *p, *b = a;
              	const char * const plast = a + len - 1;
              	struct line *l;
              	/* count the lines */
              	i = 1; /* extra line for sentinel */
              	for (p = a; p < a + len; p++)
              		if (*p == '\n' || p == plast)
              			i++;
              	*lr = l = (struct line *)malloc(sizeof(struct line) * i);
              	if (!l)
              		return -1;
              	/* build the line array and calculate hashes */
              	hash = 0;
              	for (p = a; p < a + len; p++) {
              		/* Leonid Yuriev's hash */
              		hash = (hash * 1664525) + (unsigned char)*p + 1013904223;
              		if (*p == '\n' || p == plast) {
              			l->hash = hash;
              			hash = 0;
              			l->len = p - b + 1;
              			l->l = b;
              			l->n = INT_MAX;
              			l++;
              			b = p + 1;
              		}
              	}
              	/* set up a sentinel */
              	l->hash = 0;
              	l->len = 0;
              	l->l = a + len;
              	return i - 1;
              }
              static inline int cmp(struct line *a, struct line *b)
              {
              	return a->hash != b->hash || a->len != b->len || memcmp(a->l, b->l, a->len);
              }
              static int equatelines(struct line *a, int an, struct line *b, int bn)
              {
              	int i, j, buckets = 1, t, scale;
              	struct pos *h = NULL;
              	/* build a hash table of the next highest power of 2 */
              	while (buckets < bn + 1)
              		buckets *= 2;
              	/* try to allocate a large hash table to avoid collisions */
              	for (scale = 4; scale; scale /= 2) {
              		h = (struct pos *)malloc(scale * buckets * sizeof(struct pos));
              		if (h)
              			break;
              	}
              	if (!h)
              		return 0;
              	buckets = buckets * scale - 1;
              	/* clear the hash table */
              	for (i = 0; i <= buckets; i++) {
              		h[i].pos = INT_MAX;
              		h[i].len = 0;
              	}
              	/* add lines to the hash table chains */
              	for (i = bn - 1; i >= 0; i--) {
              		/* find the equivalence class */
              		for (j = b[i].hash & buckets; h[j].pos != INT_MAX;
              		     j = (j + 1) & buckets)
              			if (!cmp(b + i, b + h[j].pos))
              				break;
              		/* add to the head of the equivalence class */
              		b[i].n = h[j].pos;
              		b[i].e = j;
              		h[j].pos = i;
              		h[j].len++; /* keep track of popularity */
              	}
              	/* compute popularity threshold */
              	t = (bn >= 31000) ? bn / 1000 : 1000000 / (bn + 1);
              	/* match items in a to their equivalence class in b */
              	for (i = 0; i < an; i++) {
              		/* find the equivalence class */
              		for (j = a[i].hash & buckets; h[j].pos != INT_MAX;
              		     j = (j + 1) & buckets)
              			if (!cmp(a + i, b + h[j].pos))
              				break;
              		a[i].e = j; /* use equivalence class for quick compare */
              		if (h[j].len <= t)
              			a[i].n = h[j].pos; /* point to head of match list */
              		else
              			a[i].n = INT_MAX; /* too popular */
              	}
              	/* discard hash tables */
              	free(h);
              	return 1;
              }
              static int longest_match(struct line *a, struct line *b, struct pos *pos,
              			 int a1, int a2, int b1, int b2, int *omi, int *omj)
              {
              	int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;
              	for (i = a1; i < a2; i++) {
              		/* skip things before the current block */
              		for (j = a[i].n; j < b1; j = b[j].n)
              			;
              		/* loop through all lines match a[i] in b */
              		for (; j < b2; j = b[j].n) {
              			/* does this extend an earlier match? */
              			if (i > a1 && j > b1 && pos[j - 1].pos == i - 1)
              				k = pos[j - 1].len + 1;
              			else
              				k = 1;
              			pos[j].pos = i;
              			pos[j].len = k;
              			/* best match so far? */
              			if (k > mk) {
              				mi = i;
              				mj = j;
              				mk = k;
              			}
              		}
              	}
              	if (mk) {
              		mi = mi - mk + 1;
              		mj = mj - mk + 1;
              	}
              	/* expand match to include neighboring popular lines */
              	while (mi - mb > a1 && mj - mb > b1 &&
              	       a[mi - mb - 1].e == b[mj - mb - 1].e)
              		mb++;
              	while (mi + mk < a2 && mj + mk < b2 &&
              	       a[mi + mk].e == b[mj + mk].e)
              		mk++;
              	*omi = mi - mb;
              	*omj = mj - mb;
              	return mk + mb;
              }
              static struct hunk *recurse(struct line *a, struct line *b, struct pos *pos,
              			    int a1, int a2, int b1, int b2, struct hunk *l)
              {
              	int i, j, k;
              	while (1) {
              		/* find the longest match in this chunk */
              		k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
              		if (!k)
              			return l;
              		/* and recurse on the remaining chunks on either side */
              		l = recurse(a, b, pos, a1, i, b1, j, l);
              		if (!l)
              			return NULL;
              		l->next = (struct hunk *)malloc(sizeof(struct hunk));
              		if (!l->next)
              			return NULL;
              		l = l->next;
              		l->a1 = i;
              		l->a2 = i + k;
              		l->b1 = j;
              		l->b2 = j + k;
              		l->next = NULL;
              		/* tail-recursion didn't happen, so do equivalent iteration */
              		a1 = i + k;
              		b1 = j + k;
              	}
              }
              static int diff(struct line *a, int an, struct line *b, int bn,
              		 struct hunk *base)
              {
              	struct hunk *curr;
              	struct pos *pos;
              	int t, count = 0;
              	/* allocate and fill arrays */
              	t = equatelines(a, an, b, bn);
              	pos = (struct pos *)calloc(bn ? bn : 1, sizeof(struct pos));
              	if (pos && t) {
              		/* generate the matching block list */
              		curr = recurse(a, b, pos, 0, an, 0, bn, base);
              		if (!curr)
              			return -1;
              		/* sentinel end hunk */
              		curr->next = (struct hunk *)malloc(sizeof(struct hunk));
              		if (!curr->next)
              			return -1;
              		curr = curr->next;
              		curr->a1 = curr->a2 = an;
              		curr->b1 = curr->b2 = bn;
              		curr->next = NULL;
              	}
              	free(pos);
              	/* normalize the hunk list, try to push each hunk towards the end */
              	for (curr = base->next; curr; curr = curr->next) {
              		struct hunk *next = curr->next;
              		int shift = 0;
              		if (!next)
              			break;
              		if (curr->a2 == next->a1)
              			while (curr->a2 + shift < an && curr->b2 + shift < bn
              			       && !cmp(a + curr->a2 + shift,
              				       b + curr->b2 + shift))
              				shift++;
              		else if (curr->b2 == next->b1)
              			while (curr->b2 + shift < bn && curr->a2 + shift < an
              			       && !cmp(b + curr->b2 + shift,
              				       a + curr->a2 + shift))
              				shift++;
              		if (!shift)
              			continue;
              		curr->b2 += shift;
              		next->b1 += shift;
              		curr->a2 += shift;
              		next->a1 += shift;
              	}
              	for (curr = base->next; curr; curr = curr->next)
              		count++;
              	return count;
              }
              static void freehunks(struct hunk *l)
              {
              	struct hunk *n;
              	for (; l; l = n) {
              		n = l->next;
              		free(l);
              	}
              }
              static PyObject *blocks(PyObject *self, PyObject *args)
              {
              	PyObject *sa, *sb, *rl = NULL, *m;
              	struct line *a, *b;
              	struct hunk l, *h;
              	int an, bn, count, pos = 0;
              	if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
              		return NULL;
              	an = splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a);
              	bn = splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b);
              	if (!a || !b)
              		goto nomem;
              	l.next = NULL;
              	count = diff(a, an, b, bn, &l);
              	if (count < 0)
              		goto nomem;
              	rl = PyList_New(count);
              	if (!rl)
              		goto nomem;
              	for (h = l.next; h; h = h->next) {
              		m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
              		PyList_SetItem(rl, pos, m);
              		pos++;
              	}
              nomem:
              	free(a);
              	free(b);
              	freehunks(l.next);
              	return rl ? rl : PyErr_NoMemory();
              }
              static PyObject *bdiff(PyObject *self, PyObject *args)
              {
              	char *sa, *sb, *rb;
              	PyObject *result = NULL;
              	struct line *al, *bl;
              	struct hunk l, *h;
-             	uint32_t encode[3];
              	int an, bn, len = 0, la, lb, count;
              	if (!PyArg_ParseTuple(args, "s#s#:bdiff", &sa, &la, &sb, &lb))
              		return NULL;
              	an = splitlines(sa, la, &al);
              	bn = splitlines(sb, lb, &bl);
              	if (!al || !bl)
              		goto nomem;
              	l.next = NULL;
              	count = diff(al, an, bl, bn, &l);
              	if (count < 0)
              		goto nomem;
              	/* calculate length of output */
              	la = lb = 0;
              	for (h = l.next; h; h = h->next) {
              		if (h->a1 != la || h->b1 != lb)
              			len += 12 + bl[h->b1].l - bl[lb].l;
              		la = h->a2;
              		lb = h->b2;
              	}
              	result = PyBytes_FromStringAndSize(NULL, len);
              	if (!result)
              		goto nomem;
              	/* build binary patch */
              	rb = PyBytes_AsString(result);
              	la = lb = 0;
              	for (h = l.next; h; h = h->next) {
              		if (h->a1 != la || h->b1 != lb) {
              			len = bl[h->b1].l - bl[lb].l;
-             			encode[0] = htonl(al[la].l - al->l);
-             			encode[1] = htonl(al[h->a1].l - al->l);
-             			encode[2] = htonl(len);
-             			memcpy(rb, encode, 12);
+             			putbe32(al[la].l - al->l, rb);
+             			putbe32(al[h->a1].l - al->l, rb + 4);
+             			putbe32(len, rb + 8);
              			memcpy(rb + 12, bl[lb].l, len);
              			rb += 12 + len;
              		}
              		la = h->a2;
              		lb = h->b2;
              	}
              nomem:
              	free(al);
              	free(bl);
              	freehunks(l.next);
              	return result ? result : PyErr_NoMemory();
              }
              /*
               * If allws != 0, remove all whitespace (' ', \t and \r). Otherwise,
               * reduce whitespace sequences to a single space and trim remaining whitespace
               * from end of lines.
               */
              static PyObject *fixws(PyObject *self, PyObject *args)
              {
              	PyObject *s, *result = NULL;
              	char allws, c;
              	const char *r;
              	int i, rlen, wlen = 0;
              	char *w;
              	if (!PyArg_ParseTuple(args, "Sb:fixws", &s, &allws))
              		return NULL;
              	r = PyBytes_AsString(s);
              	rlen = PyBytes_Size(s);
              	w = (char *)malloc(rlen ? rlen : 1);
              	if (!w)
              		goto nomem;
              	for (i = 0; i != rlen; i++) {
              		c = r[i];
              		if (c == ' ' || c == '\t' || c == '\r') {
              			if (!allws && (wlen == 0 || w[wlen - 1] != ' '))
              				w[wlen++] = ' ';
              		} else if (c == '\n' && !allws
              			  && wlen > 0 && w[wlen - 1] == ' ') {
              			w[wlen - 1] = '\n';
              		} else {
              			w[wlen++] = c;
              		}
              	}
              	result = PyBytes_FromStringAndSize(w, wlen);
              nomem:
              	free(w);
              	return result ? result : PyErr_NoMemory();
              }
              static char mdiff_doc[] = "Efficient binary diff.";
              static PyMethodDef methods[] = {
              	{"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
              	{"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
              	{"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"},
              	{NULL, NULL}
              };
              #ifdef IS_PY3K
              static struct PyModuleDef bdiff_module = {
              	PyModuleDef_HEAD_INIT,
              	"bdiff",
              	mdiff_doc,
              	-1,
              	methods
              };
              PyMODINIT_FUNC PyInit_bdiff(void)
              {
              	return PyModule_Create(&bdiff_module);
              }
              #else
              PyMODINIT_FUNC initbdiff(void)
              {
              	Py_InitModule3("bdiff", methods, mdiff_doc);
              }
              #endif

mercurial/mpatch.c

0 +6 -10

              /*
               mpatch.c - efficient binary patching for Mercurial
               This implements a patch algorithm that's O(m + nlog n) where m is the
               size of the output and n is the number of patches.
               Given a list of binary patches, it unpacks each into a hunk list,
               then combines the hunk lists with a treewise recursion to form a
               single hunk list. This hunk list is then applied to the original
               text.
               The text (or binary) fragments are copied directly from their source
               Python objects into a preallocated output string to avoid the
               allocation of intermediate Python objects. Working memory is about 2x
               the total number of hunks.
               Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
               This software may be used and distributed according to the terms
               of the GNU General Public License, incorporated herein by reference.
              */
              #include <Python.h>
              #include <stdlib.h>
              #include <string.h>
              #include "util.h"
              static char mpatch_doc[] = "Efficient binary patching.";
              static PyObject *mpatch_Error;
              struct frag {
              	int start, end, len;
              	const char *data;
              };
              struct flist {
              	struct frag *base, *head, *tail;
              };
              static struct flist *lalloc(int size)
              {
              	struct flist *a = NULL;
              	if (size < 1)
              		size = 1;
              	a = (struct flist *)malloc(sizeof(struct flist));
              	if (a) {
              		a->base = (struct frag *)malloc(sizeof(struct frag) * size);
              		if (a->base) {
              			a->head = a->tail = a->base;
              			return a;
              		}
              		free(a);
              		a = NULL;
              	}
              	if (!PyErr_Occurred())
              		PyErr_NoMemory();
              	return NULL;
              }
              static void lfree(struct flist *a)
              {
              	if (a) {
              		free(a->base);
              		free(a);
              	}
              }
              static int lsize(struct flist *a)
              {
              	return a->tail - a->head;
              }
              /* move hunks in source that are less cut to dest, compensating
                 for changes in offset. the last hunk may be split if necessary.
              */
              static int gather(struct flist *dest, struct flist *src, int cut, int offset)
              {
              	struct frag *d = dest->tail, *s = src->head;
              	int postend, c, l;
              	while (s != src->tail) {
              		if (s->start + offset >= cut)
              			break; /* we've gone far enough */
              		postend = offset + s->start + s->len;
              		if (postend <= cut) {
              			/* save this hunk */
              			offset += s->start + s->len - s->end;
              			*d++ = *s++;
              		}
              		else {
              			/* break up this hunk */
              			c = cut - offset;
              			if (s->end < c)
              				c = s->end;
              			l = cut - offset - s->start;
              			if (s->len < l)
              				l = s->len;
              			offset += s->start + l - c;
              			d->start = s->start;
              			d->end = c;
              			d->len = l;
              			d->data = s->data;
              			d++;
              			s->start = c;
              			s->len = s->len - l;
              			s->data = s->data + l;
              			break;
              		}
              	}
              	dest->tail = d;
              	src->head = s;
              	return offset;
              }
              /* like gather, but with no output list */
              static int discard(struct flist *src, int cut, int offset)
              {
              	struct frag *s = src->head;
              	int postend, c, l;
              	while (s != src->tail) {
              		if (s->start + offset >= cut)
              			break;
              		postend = offset + s->start + s->len;
              		if (postend <= cut) {
              			offset += s->start + s->len - s->end;
              			s++;
              		}
              		else {
              			c = cut - offset;
              			if (s->end < c)
              				c = s->end;
              			l = cut - offset - s->start;
              			if (s->len < l)
              				l = s->len;
              			offset += s->start + l - c;
              			s->start = c;
              			s->len = s->len - l;
              			s->data = s->data + l;
              			break;
              		}
              	}
              	src->head = s;
              	return offset;
              }
              /* combine hunk lists a and b, while adjusting b for offset changes in a/
                 this deletes a and b and returns the resultant list. */
              static struct flist *combine(struct flist *a, struct flist *b)
              {
              	struct flist *c = NULL;
              	struct frag *bh, *ct;
              	int offset = 0, post;
              	if (a && b)
              		c = lalloc((lsize(a) + lsize(b)) * 2);
              	if (c) {
              		for (bh = b->head; bh != b->tail; bh++) {
              			/* save old hunks */
              			offset = gather(c, a, bh->start, offset);
              			/* discard replaced hunks */
              			post = discard(a, bh->end, offset);
              			/* insert new hunk */
              			ct = c->tail;
              			ct->start = bh->start - offset;
              			ct->end = bh->end - post;
              			ct->len = bh->len;
              			ct->data = bh->data;
              			c->tail++;
              			offset = post;
              		}
              		/* hold on to tail from a */
              		memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
              		c->tail += lsize(a);
              	}
              	lfree(a);
              	lfree(b);
              	return c;
              }
              /* decode a binary patch into a hunk list */
              static struct flist *decode(const char *bin, int len)
              {
              	struct flist *l;
              	struct frag *lt;
              	const char *data = bin + 12, *end = bin + len;
-             	uint32_t decode[3]; /* for dealing with alignment issues */
              	/* assume worst case size, we won't have many of these lists */
              	l = lalloc(len / 12);
              	if (!l)
              		return NULL;
              	lt = l->tail;
              	while (data <= end) {
-             		memcpy(decode, bin, 12);
-             		lt->start = ntohl(decode[0]);
-             		lt->end = ntohl(decode[1]);
-             		lt->len = ntohl(decode[2]);
+             		lt->start = getbe32(bin);
+             		lt->end = getbe32(bin + 4);
+             		lt->len = getbe32(bin + 8);
              		if (lt->start > lt->end)
              			break; /* sanity check */
              		bin = data + lt->len;
              		if (bin < data)
              			break; /* big data + big (bogus) len can wrap around */
              		lt->data = data;
              		data = bin + 12;
              		lt++;
              	}
              	if (bin != end) {
              		if (!PyErr_Occurred())
              			PyErr_SetString(mpatch_Error, "patch cannot be decoded");
              		lfree(l);
              		return NULL;
              	}
              	l->tail = lt;
              	return l;
              }
              /* calculate the size of resultant text */
              static int calcsize(int len, struct flist *l)
              {
              	int outlen = 0, last = 0;
              	struct frag *f = l->head;
              	while (f != l->tail) {
              		if (f->start < last || f->end > len) {
              			if (!PyErr_Occurred())
              				PyErr_SetString(mpatch_Error,
              				                "invalid patch");
              			return -1;
              		}
              		outlen += f->start - last;
              		last = f->end;
              		outlen += f->len;
              		f++;
              	}
              	outlen += len - last;
              	return outlen;
              }
              static int apply(char *buf, const char *orig, int len, struct flist *l)
              {
              	struct frag *f = l->head;
              	int last = 0;
              	char *p = buf;
              	while (f != l->tail) {
              		if (f->start < last || f->end > len) {
              			if (!PyErr_Occurred())
              				PyErr_SetString(mpatch_Error,
              				                "invalid patch");
              			return 0;
              		}
              		memcpy(p, orig + last, f->start - last);
              		p += f->start - last;
              		memcpy(p, f->data, f->len);
              		last = f->end;
              		p += f->len;
              		f++;
              	}
              	memcpy(p, orig + last, len - last);
              	return 1;
              }
              /* recursively generate a patch of all bins between start and end */
              static struct flist *fold(PyObject *bins, int start, int end)
              {
              	int len;
              	Py_ssize_t blen;
              	const char *buffer;
              	if (start + 1 == end) {
              		/* trivial case, output a decoded list */
              		PyObject *tmp = PyList_GetItem(bins, start);
              		if (!tmp)
              			return NULL;
              		if (PyObject_AsCharBuffer(tmp, &buffer, &blen))
              			return NULL;
              		return decode(buffer, blen);
              	}
              	/* divide and conquer, memory management is elsewhere */
              	len = (end - start) / 2;
              	return combine(fold(bins, start, start + len),
              		       fold(bins, start + len, end));
              }
              static PyObject *
              patches(PyObject *self, PyObject *args)
              {
              	PyObject *text, *bins, *result;
              	struct flist *patch;
              	const char *in;
              	char *out;
              	int len, outlen;
              	Py_ssize_t inlen;
              	if (!PyArg_ParseTuple(args, "OO:mpatch", &text, &bins))
              		return NULL;
              	len = PyList_Size(bins);
              	if (!len) {
              		/* nothing to do */
              		Py_INCREF(text);
              		return text;
              	}
              	if (PyObject_AsCharBuffer(text, &in, &inlen))
              		return NULL;
              	patch = fold(bins, 0, len);
              	if (!patch)
              		return NULL;
              	outlen = calcsize(inlen, patch);
              	if (outlen < 0) {
              		result = NULL;
              		goto cleanup;
              	}
              	result = PyBytes_FromStringAndSize(NULL, outlen);
              	if (!result) {
              		result = NULL;
              		goto cleanup;
              	}
              	out = PyBytes_AsString(result);
              	if (!apply(out, in, inlen, patch)) {
              		Py_DECREF(result);
              		result = NULL;
              	}
              cleanup:
              	lfree(patch);
              	return result;
              }
              /* calculate size of a patched file directly */
              static PyObject *
              patchedsize(PyObject *self, PyObject *args)
              {
              	long orig, start, end, len, outlen = 0, last = 0;
              	int patchlen;
              	char *bin, *binend, *data;
-             	uint32_t decode[3]; /* for dealing with alignment issues */
              	if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen))
              		return NULL;
              	binend = bin + patchlen;
              	data = bin + 12;
              	while (data <= binend) {
-             		memcpy(decode, bin, 12);
-             		start = ntohl(decode[0]);
-             		end = ntohl(decode[1]);
-             		len = ntohl(decode[2]);
+             		start = getbe32(bin);
+             		end = getbe32(bin + 4);
+             		len = getbe32(bin + 8);
              		if (start > end)
              			break; /* sanity check */
              		bin = data + len;
              		if (bin < data)
              			break; /* big data + big (bogus) len can wrap around */
              		data = bin + 12;
              		outlen += start - last;
              		last = end;
              		outlen += len;
              	}
              	if (bin != binend) {
              		if (!PyErr_Occurred())
              			PyErr_SetString(mpatch_Error, "patch cannot be decoded");
              		return NULL;
              	}
              	outlen += orig - last;
              	return Py_BuildValue("l", outlen);
              }
              static PyMethodDef methods[] = {
              	{"patches", patches, METH_VARARGS, "apply a series of patches\n"},
              	{"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"},
              	{NULL, NULL}
              };
              #ifdef IS_PY3K
              static struct PyModuleDef mpatch_module = {
              	PyModuleDef_HEAD_INIT,
              	"mpatch",
              	mpatch_doc,
              	-1,
              	methods
              };
              PyMODINIT_FUNC PyInit_mpatch(void)
              {
              	PyObject *m;
              	m = PyModule_Create(&mpatch_module);
              	if (m == NULL)
              		return NULL;
              	mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
              	Py_INCREF(mpatch_Error);
              	PyModule_AddObject(m, "mpatchError", mpatch_Error);
              	return m;
              }
              #else
              PyMODINIT_FUNC
              initmpatch(void)
              {
              	Py_InitModule3("mpatch", methods, mpatch_doc);
              	mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
              }
              #endif

mercurial/parsers.c

0 +14 -19

              /*
               parsers.c - efficient content parsing
               Copyright 2008 Matt Mackall <mpm@selenic.com> and others
               This software may be used and distributed according to the terms of
               the GNU General Public License, incorporated herein by reference.
              */
              #include <Python.h>
              #include <ctype.h>
              #include <string.h>
              #include "util.h"
              static int hexdigit(char c)
              {
              	if (c >= '0' && c <= '9')
              		return c - '0';
              	if (c >= 'a' && c <= 'f')
              		return c - 'a' + 10;
              	if (c >= 'A' && c <= 'F')
              		return c - 'A' + 10;
              	PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
              	return 0;
              }
              /*
               * Turn a hex-encoded string into binary.
               */
              static PyObject *unhexlify(const char *str, int len)
              {
              	PyObject *ret;
              	const char *c;
              	char *d;
              	ret = PyBytes_FromStringAndSize(NULL, len / 2);
              	if (!ret)
              		return NULL;
              	d = PyBytes_AsString(ret);
              	for (c = str; c < str + len;) {
              		int hi = hexdigit(*c++);
              		int lo = hexdigit(*c++);
              		*d++ = (hi << 4) | lo;
              	}
              	return ret;
              }
              /*
               * This code assumes that a manifest is stitched together with newline
               * ('\n') characters.
               */
              static PyObject *parse_manifest(PyObject *self, PyObject *args)
              {
              	PyObject *mfdict, *fdict;
              	char *str, *cur, *start, *zero;
              	int len;
              	if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
              			      &PyDict_Type, &mfdict,
              			      &PyDict_Type, &fdict,
              			      &str, &len))
              		goto quit;
              	for (start = cur = str, zero = NULL; cur < str + len; cur++) {
              		PyObject *file = NULL, *node = NULL;
              		PyObject *flags = NULL;
              		int nlen;
              		if (!*cur) {
              			zero = cur;
              			continue;
              		}
              		else if (*cur != '\n')
              			continue;
              		if (!zero) {
              			PyErr_SetString(PyExc_ValueError,
              					"manifest entry has no separator");
              			goto quit;
              		}
              		file = PyBytes_FromStringAndSize(start, zero - start);
              		if (!file)
              			goto bail;
              		nlen = cur - zero - 1;
              		node = unhexlify(zero + 1, nlen > 40 ? 40 : nlen);
              		if (!node)
              			goto bail;
              		if (nlen > 40) {
              			flags = PyBytes_FromStringAndSize(zero + 41,
              							   nlen - 40);
              			if (!flags)
              				goto bail;
              			if (PyDict_SetItem(fdict, file, flags) == -1)
              				goto bail;
              		}
              		if (PyDict_SetItem(mfdict, file, node) == -1)
              			goto bail;
              		start = cur + 1;
              		zero = NULL;
              		Py_XDECREF(flags);
              		Py_XDECREF(node);
              		Py_XDECREF(file);
              		continue;
              	bail:
              		Py_XDECREF(flags);
              		Py_XDECREF(node);
              		Py_XDECREF(file);
              		goto quit;
              	}
              	if (len > 0 && *(cur - 1) != '\n') {
              		PyErr_SetString(PyExc_ValueError,
              				"manifest contains trailing garbage");
              		goto quit;
              	}
              	Py_INCREF(Py_None);
              	return Py_None;
              quit:
              	return NULL;
              }
              static PyObject *parse_dirstate(PyObject *self, PyObject *args)
              {
              	PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
              	PyObject *fname = NULL, *cname = NULL, *entry = NULL;
              	char *str, *cur, *end, *cpos;
              	int state, mode, size, mtime;
              	unsigned int flen;
              	int len;
-             	uint32_t decode[4]; /* for alignment */
              	if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
              			      &PyDict_Type, &dmap,
              			      &PyDict_Type, &cmap,
              			      &str, &len))
              		goto quit;
              	/* read parents */
              	if (len < 40)
              		goto quit;
              	parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
              	if (!parents)
              		goto quit;
              	/* read filenames */
              	cur = str + 40;
              	end = str + len;
              	while (cur < end - 17) {
              		/* unpack header */
              		state = *cur;
-             		memcpy(decode, cur + 1, 16);
-             		mode = ntohl(decode[0]);
-             		size = ntohl(decode[1]);
-             		mtime = ntohl(decode[2]);
-             		flen = ntohl(decode[3]);
+             		mode = getbe32(cur + 1);
+             		size = getbe32(cur + 5);
+             		mtime = getbe32(cur + 9);
+             		flen = getbe32(cur + 13);
              		cur += 17;
              		if (cur + flen > end || cur + flen < cur) {
              			PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
              			goto quit;
              		}
              		entry = Py_BuildValue("ciii", state, mode, size, mtime);
              		if (!entry)
              			goto quit;
              		PyObject_GC_UnTrack(entry); /* don't waste time with this */
              		cpos = memchr(cur, 0, flen);
              		if (cpos) {
              			fname = PyBytes_FromStringAndSize(cur, cpos - cur);
              			cname = PyBytes_FromStringAndSize(cpos + 1,
              							   flen - (cpos - cur) - 1);
              			if (!fname || !cname ||
              			    PyDict_SetItem(cmap, fname, cname) == -1 ||
              			    PyDict_SetItem(dmap, fname, entry) == -1)
              				goto quit;
              			Py_DECREF(cname);
              		} else {
              			fname = PyBytes_FromStringAndSize(cur, flen);
              			if (!fname ||
              			    PyDict_SetItem(dmap, fname, entry) == -1)
              				goto quit;
              		}
              		cur += flen;
              		Py_DECREF(fname);
              		Py_DECREF(entry);
              		fname = cname = entry = NULL;
              	}
              	ret = parents;
              	Py_INCREF(ret);
              quit:
              	Py_XDECREF(fname);
              	Py_XDECREF(cname);
              	Py_XDECREF(entry);
              	Py_XDECREF(parents);
              	return ret;
              }
              /*
               * A base-16 trie for fast node->rev mapping.
               *
               * Positive value is index of the next node in the trie
               * Negative value is a leaf: -(rev + 1)
               * Zero is empty
               */
              typedef struct {
              	int children[16];
              } nodetree;
              /*
               * This class has two behaviours.
               *
               * When used in a list-like way (with integer keys), we decode an
               * entry in a RevlogNG index file on demand. Our last entry is a
               * sentinel, always a nullid.  We have limited support for
               * integer-keyed insert and delete, only at elements right before the
               * sentinel.
               *
               * With string keys, we lazily perform a reverse mapping from node to
               * rev, using a base-16 trie.
               */
              typedef struct {
              	PyObject_HEAD
              	/* Type-specific fields go here. */
              	PyObject *data;        /* raw bytes of index */
              	PyObject **cache;      /* cached tuples */
              	const char **offsets;  /* populated on demand */
              	Py_ssize_t raw_length; /* original number of elements */
              	Py_ssize_t length;     /* current number of elements */
              	PyObject *added;       /* populated on demand */
              	nodetree *nt;          /* base-16 trie */
              	int ntlength;          /* # nodes in use */
              	int ntcapacity;        /* # nodes allocated */
              	int ntdepth;           /* maximum depth of tree */
              	int ntsplits;          /* # splits performed */
              	int ntrev;             /* last rev scanned */
              	int ntlookups;         /* # lookups */
              	int ntmisses;          /* # lookups that miss the cache */
              	int inlined;
              } indexObject;
              static Py_ssize_t index_length(const indexObject *self)
              {
              	if (self->added == NULL)
              		return self->length;
              	return self->length + PyList_GET_SIZE(self->added);
              }
              static PyObject *nullentry;
              static const char nullid[20];
              static long inline_scan(indexObject *self, const char **offsets);
              #if LONG_MAX == 0x7fffffffL
              static char *tuple_format = "Kiiiiiis#";
              #else
              static char *tuple_format = "kiiiiiis#";
              #endif
              /*
               * Return a pointer to the beginning of a RevlogNG record.
               */
              static const char *index_deref(indexObject *self, Py_ssize_t pos)
              {
              	if (self->inlined && pos > 0) {
              		if (self->offsets == NULL) {
              			self->offsets = malloc(self->raw_length *
              					       sizeof(*self->offsets));
              			if (self->offsets == NULL)
              				return (const char *)PyErr_NoMemory();
              			inline_scan(self, self->offsets);
              		}
              		return self->offsets[pos];
              	}
              	return PyString_AS_STRING(self->data) + pos * 64;
              }
              /*
               * RevlogNG format (all in big endian, data may be inlined):
               *    6 bytes: offset
               *    2 bytes: flags
               *    4 bytes: compressed length
               *    4 bytes: uncompressed length
               *    4 bytes: base revision
               *    4 bytes: link revision
               *    4 bytes: parent 1 revision
               *    4 bytes: parent 2 revision
               *   32 bytes: nodeid (only 20 bytes used)
               */
              static PyObject *index_get(indexObject *self, Py_ssize_t pos)
              {
-             	uint32_t decode[8]; /* to enforce alignment with inline data */
              	uint64_t offset_flags;
              	int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
              	const char *c_node_id;
              	const char *data;
              	Py_ssize_t length = index_length(self);
              	PyObject *entry;
              	if (pos < 0)
              		pos += length;
              	if (pos < 0 || pos >= length) {
              		PyErr_SetString(PyExc_IndexError, "revlog index out of range");
              		return NULL;
              	}
              	if (pos == length - 1) {
              		Py_INCREF(nullentry);
              		return nullentry;
              	}
              	if (pos >= self->length - 1) {
              		PyObject *obj;
              		obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
              		Py_INCREF(obj);
              		return obj;
              	}
              	if (self->cache) {
              		if (self->cache[pos]) {
              			Py_INCREF(self->cache[pos]);
              			return self->cache[pos];
              		}
              	} else {
              		self->cache = calloc(self->raw_length, sizeof(PyObject *));
              		if (self->cache == NULL)
              			return PyErr_NoMemory();
              	}
              	data = index_deref(self, pos);
              	if (data == NULL)
              		return NULL;
-             	memcpy(decode, data, 8 * sizeof(uint32_t));
-             	offset_flags = ntohl(decode[1]);
+             	offset_flags = getbe32(data + 4);
              	if (pos == 0) /* mask out version number for the first entry */
              		offset_flags &= 0xFFFF;
              	else {
-             		uint32_t offset_high = ntohl(decode[0]);
+             		uint32_t offset_high = getbe32(data);
              		offset_flags |= ((uint64_t)offset_high) << 32;
              	}
-             	comp_len = ntohl(decode[2]);
-             	uncomp_len = ntohl(decode[3]);
-             	base_rev = ntohl(decode[4]);
-             	link_rev = ntohl(decode[5]);
-             	parent_1 = ntohl(decode[6]);
-             	parent_2 = ntohl(decode[7]);
+             	comp_len = getbe32(data + 8);
+             	uncomp_len = getbe32(data + 12);
+             	base_rev = getbe32(data + 16);
+             	link_rev = getbe32(data + 20);
+             	parent_1 = getbe32(data + 24);
+             	parent_2 = getbe32(data + 28);
              	c_node_id = data + 32;
              	entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
              			      uncomp_len, base_rev, link_rev,
              			      parent_1, parent_2, c_node_id, 20);
              	if (entry)
              		PyObject_GC_UnTrack(entry);
              	self->cache[pos] = entry;
              	Py_INCREF(entry);
              	return entry;
              }
              /*
               * Return the 20-byte SHA of the node corresponding to the given rev.
               */
              static const char *index_node(indexObject *self, Py_ssize_t pos)
              {
              	Py_ssize_t length = index_length(self);
              	const char *data;
              	if (pos == length - 1)
              		return nullid;
              	if (pos >= length)
              		return NULL;
              	if (pos >= self->length - 1) {
              		PyObject *tuple, *str;
              		tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
              		str = PyTuple_GetItem(tuple, 7);
              		return str ? PyString_AS_STRING(str) : NULL;
              	}
              	data = index_deref(self, pos);
              	return data ? data + 32 : NULL;
              }
              static int nt_insert(indexObject *self, const char *node, int rev);
              static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
              {
              	if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
              		return -1;
              	if (*nodelen == 20)
              		return 0;
              	PyErr_SetString(PyExc_ValueError, "20-byte hash required");
              	return -1;
              }
              static PyObject *index_insert(indexObject *self, PyObject *args)
              {
              	PyObject *obj;
              	char *node;
              	long offset;
              	Py_ssize_t len, nodelen;
              	if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
              		return NULL;
              	if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
              		PyErr_SetString(PyExc_TypeError, "8-tuple required");
              		return NULL;
              	}
              	if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
              		return NULL;
              	len = index_length(self);
              	if (offset < 0)
              		offset += len;
              	if (offset != len - 1) {
              		PyErr_SetString(PyExc_IndexError,
              				"insert only supported at index -1");
              		return NULL;
              	}
              	if (offset > INT_MAX) {
              		PyErr_SetString(PyExc_ValueError,
              				"currently only 2**31 revs supported");
              		return NULL;
              	}
              	if (self->added == NULL) {
              		self->added = PyList_New(0);
              		if (self->added == NULL)
              			return NULL;
              	}
              	if (PyList_Append(self->added, obj) == -1)
              		return NULL;
              	if (self->nt)
              		nt_insert(self, node, (int)offset);
              	Py_RETURN_NONE;
              }
              static void _index_clearcaches(indexObject *self)
              {
              	if (self->cache) {
              		Py_ssize_t i;
              		for (i = 0; i < self->raw_length; i++) {
              			Py_XDECREF(self->cache[i]);
              			self->cache[i] = NULL;
              		}
              		free(self->cache);
              		self->cache = NULL;
              	}
              	if (self->offsets) {
              		free(self->offsets);
              		self->offsets = NULL;
              	}
              	if (self->nt) {
              		free(self->nt);
              		self->nt = NULL;
              	}
              }
              static PyObject *index_clearcaches(indexObject *self)
              {
              	_index_clearcaches(self);
              	self->ntlength = self->ntcapacity = 0;
              	self->ntdepth = self->ntsplits = 0;
              	self->ntrev = -1;
              	self->ntlookups = self->ntmisses = 0;
              	Py_RETURN_NONE;
              }
              static PyObject *index_stats(indexObject *self)
              {
              	PyObject *obj = PyDict_New();
              	if (obj == NULL)
              		return NULL;
              #define istat(__n, __d) \
              	if (PyDict_SetItemString(obj, __d, PyInt_FromLong(self->__n)) == -1) \
              		goto bail;
              	if (self->added) {
              		Py_ssize_t len = PyList_GET_SIZE(self->added);
              		if (PyDict_SetItemString(obj, "index entries added",
              					 PyInt_FromLong(len)) == -1)
              			goto bail;
              	}
              	if (self->raw_length != self->length - 1)
              		istat(raw_length, "revs on disk");
              	istat(length, "revs in memory");
              	istat(ntcapacity, "node trie capacity");
              	istat(ntdepth, "node trie depth");
              	istat(ntlength, "node trie count");
              	istat(ntlookups, "node trie lookups");
              	istat(ntmisses, "node trie misses");
              	istat(ntrev, "node trie last rev scanned");
              	istat(ntsplits, "node trie splits");
              #undef istat
              	return obj;
              bail:
              	Py_XDECREF(obj);
              	return NULL;
              }
              static inline int nt_level(const char *node, int level)
              {
              	int v = node[level>>1];
              	if (!(level & 1))
              		v >>= 4;
              	return v & 0xf;
              }
              static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen)
              {
              	int level, off;
              	if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
              		return -1;
              	if (self->nt == NULL)
              		return -2;
              	for (level = off = 0; level < nodelen; level++) {
              		int k = nt_level(node, level);
              		nodetree *n = &self->nt[off];
              		int v = n->children[k];
              		if (v < 0) {
              			const char *n;
              			v = -v - 1;
              			n = index_node(self, v);
              			if (n == NULL)
              				return -2;
              			return memcmp(node, n, nodelen > 20 ? 20 : nodelen)
              				? -2 : v;
              		}
              		if (v == 0)
              			return -2;
              		off = v;
              	}
              	return -2;
              }
              static int nt_new(indexObject *self)
              {
              	if (self->ntlength == self->ntcapacity) {
              		self->ntcapacity *= 2;
              		self->nt = realloc(self->nt,
              				   self->ntcapacity * sizeof(nodetree));
              		if (self->nt == NULL) {
              			PyErr_SetString(PyExc_MemoryError, "out of memory");
              			return -1;
              		}
              		memset(&self->nt[self->ntlength], 0,
              		       sizeof(nodetree) * (self->ntcapacity - self->ntlength));
              	}
              	return self->ntlength++;
              }
              static int nt_insert(indexObject *self, const char *node, int rev)
              {
              	int level = 0;
              	int off = 0;
              	while (level < 20) {
              		int k = nt_level(node, level);
              		nodetree *n;
              		int v;
              		n = &self->nt[off];
              		v = n->children[k];
              		if (v == 0) {
              			n->children[k] = -rev - 1;
              			return 0;
              		}
              		if (v < 0) {
              			const char *oldnode = index_node(self, -v - 1);
              			int noff;
              			if (!oldnode || !memcmp(oldnode, node, 20)) {
              				n->children[k] = -rev - 1;
              				return 0;
              			}
              			noff = nt_new(self);
              			if (noff == -1)
              				return -1;
              			/* self->nt may have been changed by realloc */
              			self->nt[off].children[k] = noff;
              			off = noff;
              			n = &self->nt[off];
              			n->children[nt_level(oldnode, ++level)] = v;
              			if (level > self->ntdepth)
              				self->ntdepth = level;
              			self->ntsplits += 1;
              		} else {
              			level += 1;
              			off = v;
              		}
              	}
              	return -1;
              }
              /*
               * Return values:
               *
               *   -3: error (exception set)
               *   -2: not found (no exception set)
               * rest: valid rev
               */
              static int index_find_node(indexObject *self,
              			   const char *node, Py_ssize_t nodelen)
              {
              	int rev;
              	self->ntlookups++;
              	rev = nt_find(self, node, nodelen);
              	if (rev >= -1)
              		return rev;
              	if (self->nt == NULL) {
              		self->ntcapacity = self->raw_length < 4
              			? 4 : self->raw_length / 2;
              		self->nt = calloc(self->ntcapacity, sizeof(nodetree));
              		if (self->nt == NULL) {
              			PyErr_SetString(PyExc_MemoryError, "out of memory");
              			return -3;
              		}
              		self->ntlength = 1;
              		self->ntrev = (int)index_length(self) - 1;
              		self->ntlookups = 1;
              		self->ntmisses = 0;
              	}
              	/*
              	 * For the first handful of lookups, we scan the entire index,
              	 * and cache only the matching nodes. This optimizes for cases
              	 * like "hg tip", where only a few nodes are accessed.
              	 *
              	 * After that, we cache every node we visit, using a single
              	 * scan amortized over multiple lookups.  This gives the best
              	 * bulk performance, e.g. for "hg log".
              	 */
              	if (self->ntmisses++ < 4) {
              		for (rev = self->ntrev - 1; rev >= 0; rev--) {
              			const char *n = index_node(self, rev);
              			if (n == NULL)
              				return -2;
              			if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
              				if (nt_insert(self, n, rev) == -1)
              					return -3;
              				break;
              			}
              		}
              	} else {
              		for (rev = self->ntrev - 1; rev >= 0; rev--) {
              			const char *n = index_node(self, rev);
              			if (n == NULL)
              				return -2;
              			if (nt_insert(self, n, rev) == -1)
              				return -3;
              			if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
              				break;
              			}
              		}
              		self->ntrev = rev;
              	}
              	if (rev >= 0)
              		return rev;
              	return -2;
              }
              static PyObject *raise_revlog_error(void)
              {
              	static PyObject *errclass;
              	PyObject *mod = NULL, *errobj;
              	if (errclass == NULL) {
              		PyObject *dict;
              		mod = PyImport_ImportModule("mercurial.error");
              		if (mod == NULL)
              			goto classfail;
              		dict = PyModule_GetDict(mod);
              		if (dict == NULL)
              			goto classfail;
              		errclass = PyDict_GetItemString(dict, "RevlogError");
              		if (errclass == NULL) {
              			PyErr_SetString(PyExc_SystemError,
              					"could not find RevlogError");
              			goto classfail;
              		}
              		Py_INCREF(errclass);
              	}
              	errobj = PyObject_CallFunction(errclass, NULL);
              	if (errobj == NULL)
              		return NULL;
              	PyErr_SetObject(errclass, errobj);
              	return errobj;
              classfail:
              	Py_XDECREF(mod);
              	return NULL;
              }
              static PyObject *index_getitem(indexObject *self, PyObject *value)
              {
              	char *node;
              	Py_ssize_t nodelen;
              	int rev;
              	if (PyInt_Check(value))
              		return index_get(self, PyInt_AS_LONG(value));
              	if (PyString_AsStringAndSize(value, &node, &nodelen) == -1)
              		return NULL;
              	rev = index_find_node(self, node, nodelen);
              	if (rev >= -1)
              		return PyInt_FromLong(rev);
              	if (rev == -2)
              		raise_revlog_error();
              	return NULL;
              }
              static PyObject *index_m_get(indexObject *self, PyObject *args)
              {
              	char *node;
              	int nodelen, rev;
              	if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
              		return NULL;
              	rev = index_find_node(self, node, nodelen);
              	if (rev ==  -3)
              		return NULL;
              	if (rev == -2)
              		Py_RETURN_NONE;
              	return PyInt_FromLong(rev);
              }
              static int index_contains(indexObject *self, PyObject *value)
              {
              	char *node;
              	Py_ssize_t nodelen;
              	if (PyInt_Check(value)) {
              		long rev = PyInt_AS_LONG(value);
              		return rev >= -1 && rev < index_length(self);
              	}
              	if (!PyString_Check(value))
              		return 0;
              	node = PyString_AS_STRING(value);
              	nodelen = PyString_GET_SIZE(value);
              	switch (index_find_node(self, node, nodelen)) {
              	case -3:
              		return -1;
              	case -2:
              		return 0;
              	default:
              		return 1;
              	}
              }
              /*
               * Invalidate any trie entries introduced by added revs.
               */
              static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
              {
              	Py_ssize_t i, len = PyList_GET_SIZE(self->added);
              	for (i = start; i < len; i++) {
              		PyObject *tuple = PyList_GET_ITEM(self->added, i);
              		PyObject *node = PyTuple_GET_ITEM(tuple, 7);
              		nt_insert(self, PyString_AS_STRING(node), -1);
              	}
              	if (start == 0) {
              		Py_DECREF(self->added);
              		self->added = NULL;
              	}
              }
              /*
               * Delete a numeric range of revs, which must be at the end of the
               * range, but exclude the sentinel nullid entry.
               */
              static int index_slice_del(indexObject *self, PyObject *item)
              {
              	Py_ssize_t start, stop, step, slicelength;
              	Py_ssize_t length = index_length(self);
              	if (PySlice_GetIndicesEx((PySliceObject*)item, length,
              				 &start, &stop, &step, &slicelength) < 0)
              		return -1;
              	if (slicelength <= 0)
              		return 0;
              	if ((step < 0 && start < stop) || (step > 0 && start > stop))
              		stop = start;
              	if (step < 0) {
              		stop = start + 1;
              		start = stop + step*(slicelength - 1) - 1;
              		step = -step;
              	}
              	if (step != 1) {
              		PyErr_SetString(PyExc_ValueError,
              				"revlog index delete requires step size of 1");
              		return -1;
              	}
              	if (stop != length - 1) {
              		PyErr_SetString(PyExc_IndexError,
              				"revlog index deletion indices are invalid");
              		return -1;
              	}
              	if (start < self->length - 1) {
              		if (self->nt) {
              			Py_ssize_t i;
              			for (i = start + 1; i < self->length - 1; i++) {
              				const char *node = index_node(self, i);
              				if (node)
              					nt_insert(self, node, -1);
              			}
              			if (self->added)
              				nt_invalidate_added(self, 0);
              			if (self->ntrev > start)
              				self->ntrev = (int)start;
              		}
              		self->length = start + 1;
              		return 0;
              	}
              	if (self->nt) {
              		nt_invalidate_added(self, start - self->length + 1);
              		if (self->ntrev > start)
              			self->ntrev = (int)start;
              	}
              	return self->added
              		? PyList_SetSlice(self->added, start - self->length + 1,
              				  PyList_GET_SIZE(self->added), NULL)
              		: 0;
              }
              /*
               * Supported ops:
               *
               * slice deletion
               * string assignment (extend node->rev mapping)
               * string deletion (shrink node->rev mapping)
               */
              static int index_assign_subscript(indexObject *self, PyObject *item,
              				  PyObject *value)
              {
              	char *node;
              	Py_ssize_t nodelen;
              	long rev;
              	if (PySlice_Check(item) && value == NULL)
              		return index_slice_del(self, item);
              	if (node_check(item, &node, &nodelen) == -1)
              		return -1;
              	if (value == NULL)
              		return self->nt ? nt_insert(self, node, -1) : 0;
              	rev = PyInt_AsLong(value);
              	if (rev > INT_MAX || rev < 0) {
              		if (!PyErr_Occurred())
              			PyErr_SetString(PyExc_ValueError, "rev out of range");
              		return -1;
              	}
              	return nt_insert(self, node, (int)rev);
              }
              /*
               * Find all RevlogNG entries in an index that has inline data. Update
               * the optional "offsets" table with those entries.
               */
              static long inline_scan(indexObject *self, const char **offsets)
              {
              	const char *data = PyString_AS_STRING(self->data);
              	const char *end = data + PyString_GET_SIZE(self->data);
              	const long hdrsize = 64;
              	long incr = hdrsize;
              	Py_ssize_t len = 0;
              	while (data + hdrsize <= end) {
              		uint32_t comp_len;
              		const char *old_data;
              		/* 3rd element of header is length of compressed inline data */
-             		memcpy(&comp_len, data + 8, sizeof(uint32_t));
-             		incr = hdrsize + ntohl(comp_len);
+             		comp_len = getbe32(data + 8);
+             		incr = hdrsize + comp_len;
              		if (incr < hdrsize)
              			break;
              		if (offsets)
              			offsets[len] = data;
              		len++;
              		old_data = data;
              		data += incr;
              		if (data <= old_data)
              			break;
              	}
              	if (data != end && data + hdrsize != end) {
              		if (!PyErr_Occurred())
              			PyErr_SetString(PyExc_ValueError, "corrupt index file");
              		return -1;
              	}
              	return len;
              }
              static int index_real_init(indexObject *self, const char *data, int size,
              			   PyObject *inlined_obj, PyObject *data_obj)
              {
              	self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
              	self->data = data_obj;
              	self->cache = NULL;
              	self->added = NULL;
              	self->offsets = NULL;
              	self->nt = NULL;
              	self->ntlength = self->ntcapacity = 0;
              	self->ntdepth = self->ntsplits = 0;
              	self->ntlookups = self->ntmisses = 0;
              	self->ntrev = -1;
              	Py_INCREF(self->data);
              	if (self->inlined) {
              		long len = inline_scan(self, NULL);
              		if (len == -1)
              			goto bail;
              		self->raw_length = len;
              		self->length = len + 1;
              	} else {
              		if (size % 64) {
              			PyErr_SetString(PyExc_ValueError, "corrupt index file");
              			goto bail;
              		}
              		self->raw_length = size / 64;
              		self->length = self->raw_length + 1;
              	}
              	return 0;
              bail:
              	return -1;
              }
              static int index_init(indexObject *self, PyObject *args, PyObject *kwds)
              {
              	const char *data;
              	int size;
              	PyObject *inlined_obj;
              	if (!PyArg_ParseTuple(args, "s#O", &data, &size, &inlined_obj))
              		return -1;
              	return index_real_init(self, data, size, inlined_obj,
              			       PyTuple_GET_ITEM(args, 0));
              }
              static PyObject *index_nodemap(indexObject *self)
              {
              	return (PyObject *)self;
              }
              static void index_dealloc(indexObject *self)
              {
              	_index_clearcaches(self);
              	Py_DECREF(self->data);
              	Py_XDECREF(self->added);
              	PyObject_Del(self);
              }
              static PySequenceMethods index_sequence_methods = {
              	(lenfunc)index_length,   /* sq_length */
 ,                       /* sq_concat */
 ,                       /* sq_repeat */
              	(ssizeargfunc)index_get, /* sq_item */
 ,                       /* sq_slice */
 ,                       /* sq_ass_item */
 ,                       /* sq_ass_slice */
              	(objobjproc)index_contains, /* sq_contains */
              };
              static PyMappingMethods index_mapping_methods = {
              	(lenfunc)index_length,                 /* mp_length */
              	(binaryfunc)index_getitem,             /* mp_subscript */
              	(objobjargproc)index_assign_subscript, /* mp_ass_subscript */
              };
              static PyMethodDef index_methods[] = {
              	{"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
              	 "clear the index caches"},
              	{"get", (PyCFunction)index_m_get, METH_VARARGS,
              	 "get an index entry"},
              	{"insert", (PyCFunction)index_insert, METH_VARARGS,
              	 "insert an index entry"},
              	{"stats", (PyCFunction)index_stats, METH_NOARGS,
              	 "stats for the index"},
              	{NULL} /* Sentinel */
              };
              static PyGetSetDef index_getset[] = {
              	{"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
              	{NULL} /* Sentinel */
              };
              static PyTypeObject indexType = {
              	PyObject_HEAD_INIT(NULL)
 ,                         /* ob_size */
              	"parsers.index",           /* tp_name */
              	sizeof(indexObject),       /* tp_basicsize */
 ,                         /* tp_itemsize */
              	(destructor)index_dealloc, /* tp_dealloc */
 ,                         /* tp_print */
 ,                         /* tp_getattr */
 ,                         /* tp_setattr */
 ,                         /* tp_compare */
 ,                         /* tp_repr */
 ,                         /* tp_as_number */
              	&index_sequence_methods,   /* tp_as_sequence */
              	&index_mapping_methods,    /* tp_as_mapping */
 ,                         /* tp_hash */
 ,                         /* tp_call */
 ,                         /* tp_str */
 ,                         /* tp_getattro */
 ,                         /* tp_setattro */
 ,                         /* tp_as_buffer */
              	Py_TPFLAGS_DEFAULT,        /* tp_flags */
              	"revlog index",            /* tp_doc */
 ,                         /* tp_traverse */
 ,                         /* tp_clear */
 ,                         /* tp_richcompare */
 ,                         /* tp_weaklistoffset */
 ,                         /* tp_iter */
 ,                         /* tp_iternext */
              	index_methods,             /* tp_methods */
 ,                         /* tp_members */
              	index_getset,              /* tp_getset */
 ,                         /* tp_base */
 ,                         /* tp_dict */
 ,                         /* tp_descr_get */
 ,                         /* tp_descr_set */
 ,                         /* tp_dictoffset */
              	(initproc)index_init,      /* tp_init */
 ,                         /* tp_alloc */
              	PyType_GenericNew,         /* tp_new */
              };
              /*
               * returns a tuple of the form (index, index, cache) with elements as
               * follows:
               *
               * index: an index object that lazily parses RevlogNG records
               * cache: if data is inlined, a tuple (index_file_content, 0), else None
               *
               * added complications are for backwards compatibility
               */
              static PyObject *parse_index2(PyObject *self, PyObject *args)
              {
              	const char *data;
              	int size, ret;
              	PyObject *inlined_obj, *tuple = NULL, *cache = NULL;
              	indexObject *idx;
              	if (!PyArg_ParseTuple(args, "s#O", &data, &size, &inlined_obj))
              		return NULL;
              	idx = PyObject_New(indexObject, &indexType);
              	if (idx == NULL)
              		goto bail;
              	ret = index_real_init(idx, data, size, inlined_obj,
              			      PyTuple_GET_ITEM(args, 0));
              	if (ret)
              		goto bail;
              	if (idx->inlined) {
              		Py_INCREF(idx->data);
              		cache = Py_BuildValue("iO", 0, idx->data);
              		if (cache == NULL)
              			goto bail;
              	} else {
              		cache = Py_None;
              		Py_INCREF(cache);
              	}
              	Py_INCREF(idx);
              	tuple = Py_BuildValue("NN", idx, cache);
              	if (!tuple)
              		goto bail;
              	return tuple;
              bail:
              	Py_XDECREF(idx);
              	Py_XDECREF(cache);
              	Py_XDECREF(tuple);
              	return NULL;
              }
              static char parsers_doc[] = "Efficient content parsing.";
              static PyMethodDef methods[] = {
              	{"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
              	{"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
              	{"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
              	{NULL, NULL}
              };
              static void module_init(PyObject *mod)
              {
              	if (PyType_Ready(&indexType) < 0)
              		return;
              	Py_INCREF(&indexType);
              	PyModule_AddObject(mod, "index", (PyObject *)&indexType);
              	nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
              				  -1, -1, -1, -1, nullid, 20);
              	if (nullentry)
              		PyObject_GC_UnTrack(nullentry);
              }
              #ifdef IS_PY3K
              static struct PyModuleDef parsers_module = {
              	PyModuleDef_HEAD_INIT,
              	"parsers",
              	parsers_doc,
              	-1,
              	methods
              };
              PyMODINIT_FUNC PyInit_parsers(void)
              {
              	PyObject *mod = PyModule_Create(&parsers_module);
              	module_init(mod);
              	return mod;
              }
              #else
              PyMODINIT_FUNC initparsers(void)
              {
              	PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc);
              	module_init(mod);
              }
              #endif

mercurial/util.h

0 +18 -7

              /*
               util.h - utility functions for interfacing with the various python APIs.
               This software may be used and distributed according to the terms of
               the GNU General Public License, incorporated herein by reference.
              */
              #ifndef _HG_UTIL_H_
              #define _HG_UTIL_H_
              #if PY_MAJOR_VERSION >= 3
              #define IS_PY3K
              #define PyInt_FromLong PyLong_FromLong
              #define PyInt_AsLong PyLong_AsLong
              /*
               Mapping of some of the python < 2.x PyString* functions to py3k's PyUnicode.
               The commented names below represent those that are present in the PyBytes
               definitions for python < 2.6 (below in this file) that don't have a direct
               implementation.
              */
              #define PyStringObject PyUnicodeObject
              #define PyString_Type PyUnicode_Type
              #define PyString_Check PyUnicode_Check
              #define PyString_CheckExact PyUnicode_CheckExact
              #define PyString_CHECK_INTERNED PyUnicode_CHECK_INTERNED
              #define PyString_AS_STRING PyUnicode_AsLatin1String
              #define PyString_GET_SIZE PyUnicode_GET_SIZE
              #define PyString_FromStringAndSize PyUnicode_FromStringAndSize
              #define PyString_FromString PyUnicode_FromString
              #define PyString_FromFormatV PyUnicode_FromFormatV
              #define PyString_FromFormat PyUnicode_FromFormat
              /* #define PyString_Size PyUnicode_GET_SIZE */
              /* #define PyString_AsString */
              /* #define PyString_Repr */
              #define PyString_Concat PyUnicode_Concat
              #define PyString_ConcatAndDel PyUnicode_AppendAndDel
              #define _PyString_Resize PyUnicode_Resize
              /* #define _PyString_Eq */
              #define PyString_Format PyUnicode_Format
              /* #define _PyString_FormatLong */
              /* #define PyString_DecodeEscape */
              #define _PyString_Join PyUnicode_Join
              #define PyString_Decode PyUnicode_Decode
              #define PyString_Encode PyUnicode_Encode
              #define PyString_AsEncodedObject PyUnicode_AsEncodedObject
              #define PyString_AsEncodedString PyUnicode_AsEncodedString
              #define PyString_AsDecodedObject PyUnicode_AsDecodedObject
              #define PyString_AsDecodedString PyUnicode_AsDecodedUnicode
              /* #define PyString_AsStringAndSize */
              #define _PyString_InsertThousandsGrouping _PyUnicode_InsertThousandsGrouping
              #endif /* PY_MAJOR_VERSION */
              /* Backports from 2.6 */
              #if PY_VERSION_HEX < 0x02060000
              #define Py_TYPE(ob) (ob)->ob_type
              #define Py_SIZE(ob) (ob)->ob_size
              #define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size,
              /* Shamelessly stolen from bytesobject.h */
              #define PyBytesObject PyStringObject
              #define PyBytes_Type PyString_Type
              #define PyBytes_Check PyString_Check
              #define PyBytes_CheckExact PyString_CheckExact
              #define PyBytes_CHECK_INTERNED PyString_CHECK_INTERNED
              #define PyBytes_AS_STRING PyString_AS_STRING
              #define PyBytes_GET_SIZE PyString_GET_SIZE
              #define Py_TPFLAGS_BYTES_SUBCLASS Py_TPFLAGS_STRING_SUBCLASS
              #define PyBytes_FromStringAndSize PyString_FromStringAndSize
              #define PyBytes_FromString PyString_FromString
              #define PyBytes_FromFormatV PyString_FromFormatV
              #define PyBytes_FromFormat PyString_FromFormat
              #define PyBytes_Size PyString_Size
              #define PyBytes_AsString PyString_AsString
              #define PyBytes_Repr PyString_Repr
              #define PyBytes_Concat PyString_Concat
              #define PyBytes_ConcatAndDel PyString_ConcatAndDel
              #define _PyBytes_Resize _PyString_Resize
              #define _PyBytes_Eq _PyString_Eq
              #define PyBytes_Format PyString_Format
              #define _PyBytes_FormatLong _PyString_FormatLong
              #define PyBytes_DecodeEscape PyString_DecodeEscape
              #define _PyBytes_Join _PyString_Join
              #define PyBytes_Decode PyString_Decode
              #define PyBytes_Encode PyString_Encode
              #define PyBytes_AsEncodedObject PyString_AsEncodedObject
              #define PyBytes_AsEncodedString PyString_AsEncodedString
              #define PyBytes_AsDecodedObject PyString_AsDecodedObject
              #define PyBytes_AsDecodedString PyString_AsDecodedString
              #define PyBytes_AsStringAndSize PyString_AsStringAndSize
              #define _PyBytes_InsertThousandsGrouping _PyString_InsertThousandsGrouping
              #endif /* PY_VERSION_HEX */
              #if (PY_VERSION_HEX < 0x02050000)
              /* Definitions to get compatibility with python 2.4 and earlier which
                 does not have Py_ssize_t. See also PEP 353.
                 Note: msvc (8 or earlier) does not have ssize_t, so we use Py_ssize_t.
              */
              typedef int Py_ssize_t;
              typedef Py_ssize_t (*lenfunc)(PyObject *);
              typedef PyObject *(*ssizeargfunc)(PyObject *, Py_ssize_t);
              #if !defined(PY_SSIZE_T_MIN)
              #define PY_SSIZE_T_MAX INT_MAX
              #define PY_SSIZE_T_MIN INT_MIN
              #endif
              #endif
              #ifdef _WIN32
              #ifdef _MSC_VER
              /* msvc 6.0 has problems */
              #define inline __inline
              typedef unsigned long uint32_t;
              typedef unsigned __int64 uint64_t;
              #else
              #include <stdint.h>
              #endif
-             static uint32_t ntohl(uint32_t x)
+             {
-             	return ((x & 0x000000ffUL) << 24) |
-             	       ((x & 0x0000ff00UL) <<  8) |
-             	       ((x & 0x00ff0000UL) >>  8) |
-             	       ((x & 0xff000000UL) >> 24);
+             }
              #else
              /* not windows */
              #include <sys/types.h>
              #if defined __BEOS__ && !defined __HAIKU__
              #include <ByteOrder.h>
              #else
              #include <arpa/inet.h>
              #endif
              #include <inttypes.h>
              #endif
              #if defined __hpux || defined __SUNPRO_C || defined _AIX
              #define inline
              #endif
              #ifdef __linux
              #define inline __inline
              #endif
+             static inline uint32_t getbe32(const char *c)
+             {
+             	const unsigned char *d = (const unsigned char *)c;
+             	return ((d[0] << 24) |
+             		(d[1] << 16) |
+             		(d[2] << 8) |
+             		(d[3]));
+             }
+             static inline void putbe32(uint32_t x, char *c)
+             {
+             	c[0] = (x >> 24) & 0xff;
+             	c[1] = (x >> 16) & 0xff;
+             	c[2] = (x >> 8) & 0xff;
+             	c[3] = (x) & 0xff;
+             }
              #endif /* _HG_UTIL_H_ */

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages