Show More
@@ -30,7 +30,7 b' static uint32_t htonl(uint32_t x)' | |||||
30 | #endif |
|
30 | #endif | |
31 |
|
31 | |||
32 | struct line { |
|
32 | struct line { | |
33 | int h, len, n; |
|
33 | int h, len, n, e; | |
34 | const char *l; |
|
34 | const char *l; | |
35 | }; |
|
35 | }; | |
36 |
|
36 | |||
@@ -69,7 +69,7 b' int splitlines(const char *a, int len, s' | |||||
69 | h = *p + rol32(h, 7); /* a simple hash from GNU diff */ |
|
69 | h = *p + rol32(h, 7); /* a simple hash from GNU diff */ | |
70 | if (*p == '\n' || p == a + len - 1) { |
|
70 | if (*p == '\n' || p == a + len - 1) { | |
71 | l->len = p - b + 1; |
|
71 | l->len = p - b + 1; | |
72 | l->h = h; |
|
72 | l->h = h * l->len; | |
73 | l->l = b; |
|
73 | l->l = b; | |
74 | l->n = -1; |
|
74 | l->n = -1; | |
75 | l++; |
|
75 | l++; | |
@@ -86,7 +86,7 b' int splitlines(const char *a, int len, s' | |||||
86 |
|
86 | |||
87 | int inline cmp(struct line *a, struct line *b) |
|
87 | int inline cmp(struct line *a, struct line *b) | |
88 | { |
|
88 | { | |
89 | return a->len != b->len || memcmp(a->l, b->l, a->len); |
|
89 | return a->h != b->h || a->len != b->len || memcmp(a->l, b->l, a->len); | |
90 | } |
|
90 | } | |
91 |
|
91 | |||
92 | static int equatelines(struct line *a, int an, struct line *b, int bn) |
|
92 | static int equatelines(struct line *a, int an, struct line *b, int bn) | |
@@ -118,7 +118,7 b' static int equatelines(struct line *a, i' | |||||
118 |
|
118 | |||
119 | /* add to the head of the equivalence class */ |
|
119 | /* add to the head of the equivalence class */ | |
120 | b[i].n = h[j]; |
|
120 | b[i].n = h[j]; | |
121 |
b[i]. |
|
121 | b[i].e = j; | |
122 | h[j] = i; |
|
122 | h[j] = i; | |
123 | l[j]++; /* keep track of popularity */ |
|
123 | l[j]++; /* keep track of popularity */ | |
124 | } |
|
124 | } | |
@@ -133,7 +133,7 b' static int equatelines(struct line *a, i' | |||||
133 | if (!cmp(a + i, b + h[j])) |
|
133 | if (!cmp(a + i, b + h[j])) | |
134 | break; |
|
134 | break; | |
135 |
|
135 | |||
136 |
a[i]. |
|
136 | a[i].e = j; /* use equivalence class for quick compare */ | |
137 | if(l[j] <= t) |
|
137 | if(l[j] <= t) | |
138 | a[i].n = h[j]; /* point to head of match list */ |
|
138 | a[i].n = h[j]; /* point to head of match list */ | |
139 | else |
|
139 | else | |
@@ -159,11 +159,11 b' static int longest_match(struct line *a,' | |||||
159 | /* loop through all lines match a[i] in b */ |
|
159 | /* loop through all lines match a[i] in b */ | |
160 | for (; j != -1 && j < b2; j = b[j].n) { |
|
160 | for (; j != -1 && j < b2; j = b[j].n) { | |
161 | /* does this extend an earlier match? */ |
|
161 | /* does this extend an earlier match? */ | |
162 | if (i > a1 && j > b1 && jpos[j - 1] == i) |
|
162 | if (i > a1 && j > b1 && jpos[j - 1] == i - 1) | |
163 | k = jlen[j - 1] + 1; |
|
163 | k = jlen[j - 1] + 1; | |
164 | else |
|
164 | else | |
165 | k = 1; |
|
165 | k = 1; | |
166 |
jpos[j] = i |
|
166 | jpos[j] = i; | |
167 | jlen[j] = k; |
|
167 | jlen[j] = k; | |
168 |
|
168 | |||
169 | /* best match so far? */ |
|
169 | /* best match so far? */ | |
@@ -182,10 +182,10 b' static int longest_match(struct line *a,' | |||||
182 |
|
182 | |||
183 | /* expand match to include neighboring popular lines */ |
|
183 | /* expand match to include neighboring popular lines */ | |
184 | while (mi - mb > a1 && mj - mb > b1 && |
|
184 | while (mi - mb > a1 && mj - mb > b1 && | |
185 |
a[mi - mb - 1]. |
|
185 | a[mi - mb - 1].e == b[mj - mb - 1].e) | |
186 | mb++; |
|
186 | mb++; | |
187 | while (mi + mk < a2 && mj + mk < b2 && |
|
187 | while (mi + mk < a2 && mj + mk < b2 && | |
188 |
a[mi + mk]. |
|
188 | a[mi + mk].e == b[mj + mk].e) | |
189 | mk++; |
|
189 | mk++; | |
190 |
|
190 | |||
191 | *omi = mi - mb; |
|
191 | *omi = mi - mb; | |
@@ -213,33 +213,84 b' static void recurse(struct line *a, stru' | |||||
213 | recurse(a, b, jpos, jlen, i + k, a2, j + k, b2, l); |
|
213 | recurse(a, b, jpos, jlen, i + k, a2, j + k, b2, l); | |
214 | } |
|
214 | } | |
215 |
|
215 | |||
216 | static PyObject *bdiff(PyObject *self, PyObject *args) |
|
216 | static struct hunklist diff(struct line *a, int an, struct line *b, int bn) | |
217 | { |
|
217 | { | |
218 | PyObject *sa, *sb, *result = NULL; |
|
218 | struct hunklist l; | |
|
219 | int *jpos, *jlen, t; | |||
|
220 | ||||
|
221 | /* allocate and fill arrays */ | |||
|
222 | t = equatelines(a, an, b, bn); | |||
|
223 | jpos = calloc(bn, sizeof(int)); | |||
|
224 | jlen = calloc(bn, sizeof(int)); | |||
|
225 | l.head = l.base = malloc(sizeof(struct hunk) * ((an + bn) / 4 + 2)); | |||
|
226 | ||||
|
227 | if (jpos && jlen && l.base && t) { | |||
|
228 | /* generate the matching block list */ | |||
|
229 | recurse(a, b, jpos, jlen, 0, an, 0, bn, &l); | |||
|
230 | l.head->a1 = an; | |||
|
231 | l.head->b1 = bn; | |||
|
232 | l.head++; | |||
|
233 | } | |||
|
234 | ||||
|
235 | free(jpos); | |||
|
236 | free(jlen); | |||
|
237 | return l; | |||
|
238 | } | |||
|
239 | ||||
|
240 | static PyObject *blocks(PyObject *self, PyObject *args) | |||
|
241 | { | |||
|
242 | PyObject *sa, *sb, *rl, *m; | |||
|
243 | struct line *a, *b; | |||
219 | struct hunklist l; |
|
244 | struct hunklist l; | |
220 | struct hunk *h; |
|
245 | struct hunk *h; | |
221 | struct line *al, *bl; |
|
246 | int an, bn, pos = 0; | |
222 | char encode[12], *rb; |
|
|||
223 | int an, bn, len = 0, t, la = 0, lb = 0, *jpos, *jlen; |
|
|||
224 |
|
247 | |||
225 | if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb)) |
|
248 | if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb)) | |
226 | return NULL; |
|
249 | return NULL; | |
227 |
|
250 | |||
228 | /* allocate and fill arrays */ |
|
251 | an = splitlines(PyString_AsString(sa), PyString_Size(sa), &a); | |
|
252 | bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &b); | |||
|
253 | if (!a || !b) | |||
|
254 | goto nomem; | |||
|
255 | ||||
|
256 | l = diff(a, an, b, bn); | |||
|
257 | rl = PyList_New(l.head - l.base); | |||
|
258 | if (!l.head || !rl) | |||
|
259 | goto nomem; | |||
|
260 | ||||
|
261 | for(h = l.base; h != l.head; h++) { | |||
|
262 | m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2); | |||
|
263 | PyList_SetItem(rl, pos, m); | |||
|
264 | pos++; | |||
|
265 | } | |||
|
266 | ||||
|
267 | nomem: | |||
|
268 | free(a); | |||
|
269 | free(b); | |||
|
270 | free(l.base); | |||
|
271 | return rl ? rl : PyErr_NoMemory(); | |||
|
272 | } | |||
|
273 | ||||
|
274 | static PyObject *bdiff(PyObject *self, PyObject *args) | |||
|
275 | { | |||
|
276 | PyObject *sa, *sb, *result = NULL; | |||
|
277 | struct line *al, *bl; | |||
|
278 | struct hunklist l; | |||
|
279 | struct hunk *h; | |||
|
280 | char encode[12], *rb; | |||
|
281 | int an, bn, len = 0, la = 0, lb = 0; | |||
|
282 | ||||
|
283 | if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb)) | |||
|
284 | return NULL; | |||
|
285 | ||||
229 | an = splitlines(PyString_AsString(sa), PyString_Size(sa), &al); |
|
286 | an = splitlines(PyString_AsString(sa), PyString_Size(sa), &al); | |
230 | bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &bl); |
|
287 | bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &bl); | |
231 | t = equatelines(al, an, bl, bn); |
|
288 | if (!al || !bl) | |
232 | jpos = calloc(bn, sizeof(int)); |
|
|||
233 | jlen = calloc(bn, sizeof(int)); |
|
|||
234 | l.head = l.base = malloc(sizeof(struct hunk) * ((an + bn) / 4 + 2)); |
|
|||
235 | if (!al || !bl || !jpos || !jlen || !l.base || !t) |
|
|||
236 | goto nomem; |
|
289 | goto nomem; | |
237 |
|
290 | |||
238 | /* generate the matching block list */ |
|
291 | l = diff(al, an, bl, bn); | |
239 | recurse(al, bl, jpos, jlen, 0, an, 0, bn, &l); |
|
292 | if (!l.head) | |
240 | l.head->a1 = an; |
|
293 | goto nomem; | |
241 | l.head->b1 = bn; |
|
|||
242 | l.head++; |
|
|||
243 |
|
294 | |||
244 | /* calculate length of output */ |
|
295 | /* calculate length of output */ | |
245 | for(h = l.base; h != l.head; h++) { |
|
296 | for(h = l.base; h != l.head; h++) { | |
@@ -274,8 +325,6 b' static PyObject *bdiff(PyObject *self, P' | |||||
274 | nomem: |
|
325 | nomem: | |
275 | free(al); |
|
326 | free(al); | |
276 | free(bl); |
|
327 | free(bl); | |
277 | free(jpos); |
|
|||
278 | free(jlen); |
|
|||
279 | free(l.base); |
|
328 | free(l.base); | |
280 | return result ? result : PyErr_NoMemory(); |
|
329 | return result ? result : PyErr_NoMemory(); | |
281 | } |
|
330 | } | |
@@ -284,6 +333,7 b' static char mdiff_doc[] = "Efficient bin' | |||||
284 |
|
333 | |||
285 | static PyMethodDef methods[] = { |
|
334 | static PyMethodDef methods[] = { | |
286 | {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"}, |
|
335 | {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"}, | |
|
336 | {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"}, | |||
287 | {NULL, NULL} |
|
337 | {NULL, NULL} | |
288 | }; |
|
338 | }; | |
289 |
|
339 |
General Comments 0
You need to be logged in to leave comments.
Login now