##// END OF EJS Templates
A bunch of parsing/help updates...
A bunch of parsing/help updates -----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1 A bunch of parsing/help updates more explanation of how to get non-basic commands shorten names of debug functions and add docstrings add undo long docstring promote anotate, export, and revert make the global opts array global refactor parsing kill two unused arguments to fancyopts update test-help manifest hash: 459ae2273aaf54f71b4576677a681dc53ab2908c -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.0 (GNU/Linux) iD8DBQFCyEDhywK+sNU5EO8RAr0DAJ9LTu8Fc2quLRtuwLPTQzWqlOJWKwCbBpZk pnMkYnshsutVYljcil1P46I= =Sleg -----END PGP SIGNATURE-----

File last commit:

r553:f2442a6a merge default
r596:9a8daeff default
Show More
bdiff.c
349 lines | 7.3 KiB | text/x-c | CLexer
mpm@selenic.com
Add a fast binary diff extension (not yet used)...
r400 /*
bdiff.c - efficient binary diff extension for Mercurial
Copyright 2005 Matt Mackall <mpm@selenic.com>
This software may be used and distributed according to the terms of
the GNU General Public License, incorporated herein by reference.
Based roughly on Python difflib
*/
#include <Python.h>
#include <stdlib.h>
#include <string.h>
mpm@selenic.com
Add 'other OS' bits to bdiff.c / style cleanups...
r411 #ifdef _WIN32
mpm@selenic.com
[PATCH] bdiff/mpatch under MSVC...
r551 #ifdef _MSC_VER
#define inline __inline
typedef unsigned long uint32_t;
#else
mpm@selenic.com
More fiddling with uint32_t includes for extensions...
r510 #include <stdint.h>
mpm@selenic.com
[PATCH] bdiff/mpatch under MSVC...
r551 #endif
mpm@selenic.com
Add 'other OS' bits to bdiff.c / style cleanups...
r411 static uint32_t htonl(uint32_t x)
{
return ((x & 0x000000ffUL) << 24) |
((x & 0x0000ff00UL) << 8) |
((x & 0x00ff0000UL) >> 8) |
((x & 0xff000000UL) >> 24);
}
#else
mpm@selenic.com
More fiddling with uint32_t includes for extensions...
r510 #include <sys/types.h>
mpm@selenic.com
extensions: use stdint.h...
r472 #include <netinet/in.h>
mpm@selenic.com
Add 'other OS' bits to bdiff.c / style cleanups...
r411 #endif
mpm@selenic.com
Add a fast binary diff extension (not yet used)...
r400
struct line {
mpm@selenic.com
Add bdiff.blocks / minor performance tweaks...
r433 int h, len, n, e;
mpm@selenic.com
Add a fast binary diff extension (not yet used)...
r400 const char *l;
};
mpm@selenic.com
Minor speed improvements for bdiff...
r474 struct pos {
int pos, len;
};
mpm@selenic.com
Add a fast binary diff extension (not yet used)...
r400 struct hunk {
int a1, a2, b1, b2;
};
struct hunklist {
struct hunk *base, *head;
};
mpm@selenic.com
Use __inline instead of inline...
r439 static __inline uint32_t rol32(uint32_t word, unsigned int shift)
mpm@selenic.com
Add a fast binary diff extension (not yet used)...
r400 {
return (word << shift) | (word >> (32 - shift));
}
int splitlines(const char *a, int len, struct line **lr)
{
int h, i;
const char *p, *b = a;
struct line *l;
/* count the lines */
i = 1; /* extra line for sentinel */
for (p = a; p < a + len; p++)
if (*p == '\n' || p == a + len - 1)
i++;
*lr = l = malloc(sizeof(struct line) * i);
if (!l)
return -1;
/* build the line array and calculate hashes */
h = 0;
for (p = a; p < a + len; p++) {
h = *p + rol32(h, 7); /* a simple hash from GNU diff */
if (*p == '\n' || p == a + len - 1) {
l->len = p - b + 1;
mpm@selenic.com
Add bdiff.blocks / minor performance tweaks...
r433 l->h = h * l->len;
mpm@selenic.com
Add a fast binary diff extension (not yet used)...
r400 l->l = b;
l->n = -1;
l++;
b = p + 1;
h = 0;
}
}
/* set up a sentinel */
l->h = l->len = 0;
l->l = a + len;
return i - 1;
}
int inline cmp(struct line *a, struct line *b)
{
mpm@selenic.com
Add bdiff.blocks / minor performance tweaks...
r433 return a->h != b->h || a->len != b->len || memcmp(a->l, b->l, a->len);
mpm@selenic.com
Add a fast binary diff extension (not yet used)...
r400 }
static int equatelines(struct line *a, int an, struct line *b, int bn)
{
mpm@selenic.com
Minor speed improvements for bdiff...
r474 int i, j, buckets = 1, t;
struct pos *h;
mpm@selenic.com
Add a fast binary diff extension (not yet used)...
r400
/* build a hash table of the next highest power of 2 */
while (buckets < bn + 1)
buckets *= 2;
mpm@selenic.com
Minor speed improvements for bdiff...
r474 h = malloc(buckets * sizeof(struct pos));
mpm@selenic.com
Add a fast binary diff extension (not yet used)...
r400 buckets = buckets - 1;
mpm@selenic.com
Minor speed improvements for bdiff...
r474 if (!h)
mpm@selenic.com
Add a fast binary diff extension (not yet used)...
r400 return 0;
/* clear the hash table */
mpm@selenic.com
Minor speed improvements for bdiff...
r474 for (i = 0; i <= buckets; i++) {
h[i].pos = -1;
h[i].len = 0;
}
mpm@selenic.com
Add a fast binary diff extension (not yet used)...
r400
/* add lines to the hash table chains */
for (i = bn - 1; i >= 0; i--) {
/* find the equivalence class */
mpm@selenic.com
Minor speed improvements for bdiff...
r474 for (j = b[i].h & buckets; h[j].pos != -1;
j = (j + 1) & buckets)
if (!cmp(b + i, b + h[j].pos))
mpm@selenic.com
Add a fast binary diff extension (not yet used)...
r400 break;
/* add to the head of the equivalence class */
mpm@selenic.com
Minor speed improvements for bdiff...
r474 b[i].n = h[j].pos;
mpm@selenic.com
Add bdiff.blocks / minor performance tweaks...
r433 b[i].e = j;
mpm@selenic.com
Minor speed improvements for bdiff...
r474 h[j].pos = i;
h[j].len++; /* keep track of popularity */
mpm@selenic.com
Add a fast binary diff extension (not yet used)...
r400 }
/* compute popularity threshold */
t = (bn >= 200) ? bn / 100 : bn + 1;
/* match items in a to their equivalence class in b */
for (i = 0; i < an; i++) {
/* find the equivalence class */
mpm@selenic.com
Minor speed improvements for bdiff...
r474 for (j = a[i].h & buckets; h[j].pos != -1;
j = (j + 1) & buckets)
if (!cmp(a + i, b + h[j].pos))
mpm@selenic.com
Add a fast binary diff extension (not yet used)...
r400 break;
mpm@selenic.com
Add bdiff.blocks / minor performance tweaks...
r433 a[i].e = j; /* use equivalence class for quick compare */
mpm@selenic.com
Minor speed improvements for bdiff...
r474 if(h[j].len <= t)
a[i].n = h[j].pos; /* point to head of match list */
mpm@selenic.com
Add a fast binary diff extension (not yet used)...
r400 else
a[i].n = -1; /* too popular */
}
/* discard hash tables */
free(h);
return 1;
}
mpm@selenic.com
Minor speed improvements for bdiff...
r474 static int longest_match(struct line *a, struct line *b, struct pos *pos,
mpm@selenic.com
Add a fast binary diff extension (not yet used)...
r400 int a1, int a2, int b1, int b2, int *omi, int *omj)
{
int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;
for (i = a1; i < a2; i++) {
/* skip things before the current block */
for (j = a[i].n; j != -1 && j < b1; j = b[j].n)
;
/* loop through all lines match a[i] in b */
for (; j != -1 && j < b2; j = b[j].n) {
/* does this extend an earlier match? */
mpm@selenic.com
Minor speed improvements for bdiff...
r474 if (i > a1 && j > b1 && pos[j - 1].pos == i - 1)
k = pos[j - 1].len + 1;
mpm@selenic.com
Add a fast binary diff extension (not yet used)...
r400 else
k = 1;
mpm@selenic.com
Minor speed improvements for bdiff...
r474 pos[j].pos = i;
pos[j].len = k;
mpm@selenic.com
Add a fast binary diff extension (not yet used)...
r400
/* best match so far? */
if (k > mk) {
mi = i;
mj = j;
mk = k;
}
}
}
if (mk) {
mi = mi - mk + 1;
mj = mj - mk + 1;
}
/* expand match to include neighboring popular lines */
while (mi - mb > a1 && mj - mb > b1 &&
mpm@selenic.com
Add bdiff.blocks / minor performance tweaks...
r433 a[mi - mb - 1].e == b[mj - mb - 1].e)
mpm@selenic.com
Add a fast binary diff extension (not yet used)...
r400 mb++;
while (mi + mk < a2 && mj + mk < b2 &&
mpm@selenic.com
Add bdiff.blocks / minor performance tweaks...
r433 a[mi + mk].e == b[mj + mk].e)
mpm@selenic.com
Add a fast binary diff extension (not yet used)...
r400 mk++;
*omi = mi - mb;
*omj = mj - mb;
return mk + mb;
}
mpm@selenic.com
Minor speed improvements for bdiff...
r474 static void recurse(struct line *a, struct line *b, struct pos *pos,
mpm@selenic.com
Add a fast binary diff extension (not yet used)...
r400 int a1, int a2, int b1, int b2, struct hunklist *l)
{
int i, j, k;
/* find the longest match in this chunk */
mpm@selenic.com
Minor speed improvements for bdiff...
r474 k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
mpm@selenic.com
Add a fast binary diff extension (not yet used)...
r400 if (!k)
return;
/* and recurse on the remaining chunks on either side */
mpm@selenic.com
Minor speed improvements for bdiff...
r474 recurse(a, b, pos, a1, i, b1, j, l);
mpm@selenic.com
Add a fast binary diff extension (not yet used)...
r400 l->head->a1 = i;
l->head->a2 = i + k;
l->head->b1 = j;
l->head->b2 = j + k;
l->head++;
mpm@selenic.com
Minor speed improvements for bdiff...
r474 recurse(a, b, pos, i + k, a2, j + k, b2, l);
mpm@selenic.com
Add a fast binary diff extension (not yet used)...
r400 }
mpm@selenic.com
Add bdiff.blocks / minor performance tweaks...
r433 static struct hunklist diff(struct line *a, int an, struct line *b, int bn)
mpm@selenic.com
Add a fast binary diff extension (not yet used)...
r400 {
mpm@selenic.com
Add bdiff.blocks / minor performance tweaks...
r433 struct hunklist l;
mpm@selenic.com
Minor speed improvements for bdiff...
r474 struct pos *pos;
int t;
mpm@selenic.com
Add bdiff.blocks / minor performance tweaks...
r433
/* allocate and fill arrays */
t = equatelines(a, an, b, bn);
mpm@selenic.com
Minor speed improvements for bdiff...
r474 pos = calloc(bn, sizeof(struct pos));
mpm@selenic.com
Add bdiff.blocks / minor performance tweaks...
r433 l.head = l.base = malloc(sizeof(struct hunk) * ((an + bn) / 4 + 2));
mpm@selenic.com
Minor speed improvements for bdiff...
r474 if (pos && l.base && t) {
mpm@selenic.com
Add bdiff.blocks / minor performance tweaks...
r433 /* generate the matching block list */
mpm@selenic.com
Minor speed improvements for bdiff...
r474 recurse(a, b, pos, 0, an, 0, bn, &l);
mpm@selenic.com
Add bdiff.blocks / minor performance tweaks...
r433 l.head->a1 = an;
l.head->b1 = bn;
l.head++;
}
mpm@selenic.com
Minor speed improvements for bdiff...
r474 free(pos);
mpm@selenic.com
Add bdiff.blocks / minor performance tweaks...
r433 return l;
}
static PyObject *blocks(PyObject *self, PyObject *args)
{
mpm@selenic.com
Fix a compile warning for bdiff...
r435 PyObject *sa, *sb, *rl = NULL, *m;
mpm@selenic.com
Add bdiff.blocks / minor performance tweaks...
r433 struct line *a, *b;
mpm@selenic.com
Add a fast binary diff extension (not yet used)...
r400 struct hunklist l;
struct hunk *h;
mpm@selenic.com
Add bdiff.blocks / minor performance tweaks...
r433 int an, bn, pos = 0;
mpm@selenic.com
Add a fast binary diff extension (not yet used)...
r400
if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
return NULL;
mpm@selenic.com
Add bdiff.blocks / minor performance tweaks...
r433 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &a);
bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &b);
if (!a || !b)
goto nomem;
l = diff(a, an, b, bn);
rl = PyList_New(l.head - l.base);
if (!l.head || !rl)
goto nomem;
for(h = l.base; h != l.head; h++) {
m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
PyList_SetItem(rl, pos, m);
pos++;
}
nomem:
free(a);
free(b);
free(l.base);
return rl ? rl : PyErr_NoMemory();
}
static PyObject *bdiff(PyObject *self, PyObject *args)
{
PyObject *sa, *sb, *result = NULL;
struct line *al, *bl;
struct hunklist l;
struct hunk *h;
char encode[12], *rb;
int an, bn, len = 0, la = 0, lb = 0;
if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
return NULL;
mpm@selenic.com
Add a fast binary diff extension (not yet used)...
r400 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &al);
bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &bl);
mpm@selenic.com
Add bdiff.blocks / minor performance tweaks...
r433 if (!al || !bl)
mpm@selenic.com
Add a fast binary diff extension (not yet used)...
r400 goto nomem;
mpm@selenic.com
Add bdiff.blocks / minor performance tweaks...
r433 l = diff(al, an, bl, bn);
if (!l.head)
goto nomem;
mpm@selenic.com
Add a fast binary diff extension (not yet used)...
r400
/* calculate length of output */
for(h = l.base; h != l.head; h++) {
if (h->a1 != la || h->b1 != lb)
len += 12 + bl[h->b1].l - bl[lb].l;
la = h->a2;
lb = h->b2;
}
result = PyString_FromStringAndSize(NULL, len);
if (!result)
goto nomem;
/* build binary patch */
rb = PyString_AsString(result);
la = lb = 0;
for(h = l.base; h != l.head; h++) {
if (h->a1 != la || h->b1 != lb) {
len = bl[h->b1].l - bl[lb].l;
*(uint32_t *)(encode) = htonl(al[la].l - al->l);
*(uint32_t *)(encode + 4) = htonl(al[h->a1].l - al->l);
*(uint32_t *)(encode + 8) = htonl(len);
memcpy(rb, encode, 12);
memcpy(rb + 12, bl[lb].l, len);
rb += 12 + len;
}
la = h->a2;
lb = h->b2;
}
nomem:
free(al);
free(bl);
free(l.base);
return result ? result : PyErr_NoMemory();
}
static char mdiff_doc[] = "Efficient binary diff.";
static PyMethodDef methods[] = {
{"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
mpm@selenic.com
Add bdiff.blocks / minor performance tweaks...
r433 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
mpm@selenic.com
Add a fast binary diff extension (not yet used)...
r400 {NULL, NULL}
};
PyMODINIT_FUNC initbdiff(void)
{
Py_InitModule3("bdiff", methods, mdiff_doc);
}