##// END OF EJS Templates
bdiff: fix compile with GCC -ansi (issue1690)
Matt Mackall -
r8858:16f6c137 default
parent child Browse files
Show More
@@ -1,397 +1,401
1 /*
1 /*
2 bdiff.c - efficient binary diff extension for Mercurial
2 bdiff.c - efficient binary diff extension for Mercurial
3
3
4 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8
8
9 Based roughly on Python difflib
9 Based roughly on Python difflib
10 */
10 */
11
11
12 #include <Python.h>
12 #include <Python.h>
13 #include <stdlib.h>
13 #include <stdlib.h>
14 #include <string.h>
14 #include <string.h>
15 #include <limits.h>
15 #include <limits.h>
16
16
17 #if defined __hpux || defined __SUNPRO_C || defined _AIX
17 #if defined __hpux || defined __SUNPRO_C || defined _AIX
18 # define inline
18 # define inline
19 #endif
19 #endif
20
20
21 #ifdef __linux
22 # define inline __inline
23 #endif
24
21 #ifdef _WIN32
25 #ifdef _WIN32
22 #ifdef _MSC_VER
26 #ifdef _MSC_VER
23 #define inline __inline
27 #define inline __inline
24 typedef unsigned long uint32_t;
28 typedef unsigned long uint32_t;
25 #else
29 #else
26 #include <stdint.h>
30 #include <stdint.h>
27 #endif
31 #endif
28 static uint32_t htonl(uint32_t x)
32 static uint32_t htonl(uint32_t x)
29 {
33 {
30 return ((x & 0x000000ffUL) << 24) |
34 return ((x & 0x000000ffUL) << 24) |
31 ((x & 0x0000ff00UL) << 8) |
35 ((x & 0x0000ff00UL) << 8) |
32 ((x & 0x00ff0000UL) >> 8) |
36 ((x & 0x00ff0000UL) >> 8) |
33 ((x & 0xff000000UL) >> 24);
37 ((x & 0xff000000UL) >> 24);
34 }
38 }
35 #else
39 #else
36 #include <sys/types.h>
40 #include <sys/types.h>
37 #if defined __BEOS__ && !defined __HAIKU__
41 #if defined __BEOS__ && !defined __HAIKU__
38 #include <ByteOrder.h>
42 #include <ByteOrder.h>
39 #else
43 #else
40 #include <arpa/inet.h>
44 #include <arpa/inet.h>
41 #endif
45 #endif
42 #include <inttypes.h>
46 #include <inttypes.h>
43 #endif
47 #endif
44
48
45 struct line {
49 struct line {
46 int h, len, n, e;
50 int h, len, n, e;
47 const char *l;
51 const char *l;
48 };
52 };
49
53
50 struct pos {
54 struct pos {
51 int pos, len;
55 int pos, len;
52 };
56 };
53
57
54 struct hunk {
58 struct hunk {
55 int a1, a2, b1, b2;
59 int a1, a2, b1, b2;
56 };
60 };
57
61
58 struct hunklist {
62 struct hunklist {
59 struct hunk *base, *head;
63 struct hunk *base, *head;
60 };
64 };
61
65
62 int splitlines(const char *a, int len, struct line **lr)
66 int splitlines(const char *a, int len, struct line **lr)
63 {
67 {
64 int h, i;
68 int h, i;
65 const char *p, *b = a;
69 const char *p, *b = a;
66 const char * const plast = a + len - 1;
70 const char * const plast = a + len - 1;
67 struct line *l;
71 struct line *l;
68
72
69 /* count the lines */
73 /* count the lines */
70 i = 1; /* extra line for sentinel */
74 i = 1; /* extra line for sentinel */
71 for (p = a; p < a + len; p++)
75 for (p = a; p < a + len; p++)
72 if (*p == '\n' || p == plast)
76 if (*p == '\n' || p == plast)
73 i++;
77 i++;
74
78
75 *lr = l = (struct line *)malloc(sizeof(struct line) * i);
79 *lr = l = (struct line *)malloc(sizeof(struct line) * i);
76 if (!l)
80 if (!l)
77 return -1;
81 return -1;
78
82
79 /* build the line array and calculate hashes */
83 /* build the line array and calculate hashes */
80 h = 0;
84 h = 0;
81 for (p = a; p < a + len; p++) {
85 for (p = a; p < a + len; p++) {
82 /* Leonid Yuriev's hash */
86 /* Leonid Yuriev's hash */
83 h = (h * 1664525) + *p + 1013904223;
87 h = (h * 1664525) + *p + 1013904223;
84
88
85 if (*p == '\n' || p == plast) {
89 if (*p == '\n' || p == plast) {
86 l->h = h;
90 l->h = h;
87 h = 0;
91 h = 0;
88 l->len = p - b + 1;
92 l->len = p - b + 1;
89 l->l = b;
93 l->l = b;
90 l->n = INT_MAX;
94 l->n = INT_MAX;
91 l++;
95 l++;
92 b = p + 1;
96 b = p + 1;
93 }
97 }
94 }
98 }
95
99
96 /* set up a sentinel */
100 /* set up a sentinel */
97 l->h = l->len = 0;
101 l->h = l->len = 0;
98 l->l = a + len;
102 l->l = a + len;
99 return i - 1;
103 return i - 1;
100 }
104 }
101
105
102 int inline cmp(struct line *a, struct line *b)
106 int inline cmp(struct line *a, struct line *b)
103 {
107 {
104 return a->h != b->h || a->len != b->len || memcmp(a->l, b->l, a->len);
108 return a->h != b->h || a->len != b->len || memcmp(a->l, b->l, a->len);
105 }
109 }
106
110
107 static int equatelines(struct line *a, int an, struct line *b, int bn)
111 static int equatelines(struct line *a, int an, struct line *b, int bn)
108 {
112 {
109 int i, j, buckets = 1, t, scale;
113 int i, j, buckets = 1, t, scale;
110 struct pos *h = NULL;
114 struct pos *h = NULL;
111
115
112 /* build a hash table of the next highest power of 2 */
116 /* build a hash table of the next highest power of 2 */
113 while (buckets < bn + 1)
117 while (buckets < bn + 1)
114 buckets *= 2;
118 buckets *= 2;
115
119
116 /* try to allocate a large hash table to avoid collisions */
120 /* try to allocate a large hash table to avoid collisions */
117 for (scale = 4; scale; scale /= 2) {
121 for (scale = 4; scale; scale /= 2) {
118 h = (struct pos *)malloc(scale * buckets * sizeof(struct pos));
122 h = (struct pos *)malloc(scale * buckets * sizeof(struct pos));
119 if (h)
123 if (h)
120 break;
124 break;
121 }
125 }
122
126
123 if (!h)
127 if (!h)
124 return 0;
128 return 0;
125
129
126 buckets = buckets * scale - 1;
130 buckets = buckets * scale - 1;
127
131
128 /* clear the hash table */
132 /* clear the hash table */
129 for (i = 0; i <= buckets; i++) {
133 for (i = 0; i <= buckets; i++) {
130 h[i].pos = INT_MAX;
134 h[i].pos = INT_MAX;
131 h[i].len = 0;
135 h[i].len = 0;
132 }
136 }
133
137
134 /* add lines to the hash table chains */
138 /* add lines to the hash table chains */
135 for (i = bn - 1; i >= 0; i--) {
139 for (i = bn - 1; i >= 0; i--) {
136 /* find the equivalence class */
140 /* find the equivalence class */
137 for (j = b[i].h & buckets; h[j].pos != INT_MAX;
141 for (j = b[i].h & buckets; h[j].pos != INT_MAX;
138 j = (j + 1) & buckets)
142 j = (j + 1) & buckets)
139 if (!cmp(b + i, b + h[j].pos))
143 if (!cmp(b + i, b + h[j].pos))
140 break;
144 break;
141
145
142 /* add to the head of the equivalence class */
146 /* add to the head of the equivalence class */
143 b[i].n = h[j].pos;
147 b[i].n = h[j].pos;
144 b[i].e = j;
148 b[i].e = j;
145 h[j].pos = i;
149 h[j].pos = i;
146 h[j].len++; /* keep track of popularity */
150 h[j].len++; /* keep track of popularity */
147 }
151 }
148
152
149 /* compute popularity threshold */
153 /* compute popularity threshold */
150 t = (bn >= 4000) ? bn / 1000 : bn + 1;
154 t = (bn >= 4000) ? bn / 1000 : bn + 1;
151
155
152 /* match items in a to their equivalence class in b */
156 /* match items in a to their equivalence class in b */
153 for (i = 0; i < an; i++) {
157 for (i = 0; i < an; i++) {
154 /* find the equivalence class */
158 /* find the equivalence class */
155 for (j = a[i].h & buckets; h[j].pos != INT_MAX;
159 for (j = a[i].h & buckets; h[j].pos != INT_MAX;
156 j = (j + 1) & buckets)
160 j = (j + 1) & buckets)
157 if (!cmp(a + i, b + h[j].pos))
161 if (!cmp(a + i, b + h[j].pos))
158 break;
162 break;
159
163
160 a[i].e = j; /* use equivalence class for quick compare */
164 a[i].e = j; /* use equivalence class for quick compare */
161 if (h[j].len <= t)
165 if (h[j].len <= t)
162 a[i].n = h[j].pos; /* point to head of match list */
166 a[i].n = h[j].pos; /* point to head of match list */
163 else
167 else
164 a[i].n = INT_MAX; /* too popular */
168 a[i].n = INT_MAX; /* too popular */
165 }
169 }
166
170
167 /* discard hash tables */
171 /* discard hash tables */
168 free(h);
172 free(h);
169 return 1;
173 return 1;
170 }
174 }
171
175
172 static int longest_match(struct line *a, struct line *b, struct pos *pos,
176 static int longest_match(struct line *a, struct line *b, struct pos *pos,
173 int a1, int a2, int b1, int b2, int *omi, int *omj)
177 int a1, int a2, int b1, int b2, int *omi, int *omj)
174 {
178 {
175 int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;
179 int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;
176
180
177 for (i = a1; i < a2; i++) {
181 for (i = a1; i < a2; i++) {
178 /* skip things before the current block */
182 /* skip things before the current block */
179 for (j = a[i].n; j < b1; j = b[j].n)
183 for (j = a[i].n; j < b1; j = b[j].n)
180 ;
184 ;
181
185
182 /* loop through all lines match a[i] in b */
186 /* loop through all lines match a[i] in b */
183 for (; j < b2; j = b[j].n) {
187 for (; j < b2; j = b[j].n) {
184 /* does this extend an earlier match? */
188 /* does this extend an earlier match? */
185 if (i > a1 && j > b1 && pos[j - 1].pos == i - 1)
189 if (i > a1 && j > b1 && pos[j - 1].pos == i - 1)
186 k = pos[j - 1].len + 1;
190 k = pos[j - 1].len + 1;
187 else
191 else
188 k = 1;
192 k = 1;
189 pos[j].pos = i;
193 pos[j].pos = i;
190 pos[j].len = k;
194 pos[j].len = k;
191
195
192 /* best match so far? */
196 /* best match so far? */
193 if (k > mk) {
197 if (k > mk) {
194 mi = i;
198 mi = i;
195 mj = j;
199 mj = j;
196 mk = k;
200 mk = k;
197 }
201 }
198 }
202 }
199 }
203 }
200
204
201 if (mk) {
205 if (mk) {
202 mi = mi - mk + 1;
206 mi = mi - mk + 1;
203 mj = mj - mk + 1;
207 mj = mj - mk + 1;
204 }
208 }
205
209
206 /* expand match to include neighboring popular lines */
210 /* expand match to include neighboring popular lines */
207 while (mi - mb > a1 && mj - mb > b1 &&
211 while (mi - mb > a1 && mj - mb > b1 &&
208 a[mi - mb - 1].e == b[mj - mb - 1].e)
212 a[mi - mb - 1].e == b[mj - mb - 1].e)
209 mb++;
213 mb++;
210 while (mi + mk < a2 && mj + mk < b2 &&
214 while (mi + mk < a2 && mj + mk < b2 &&
211 a[mi + mk].e == b[mj + mk].e)
215 a[mi + mk].e == b[mj + mk].e)
212 mk++;
216 mk++;
213
217
214 *omi = mi - mb;
218 *omi = mi - mb;
215 *omj = mj - mb;
219 *omj = mj - mb;
216
220
217 return mk + mb;
221 return mk + mb;
218 }
222 }
219
223
220 static void recurse(struct line *a, struct line *b, struct pos *pos,
224 static void recurse(struct line *a, struct line *b, struct pos *pos,
221 int a1, int a2, int b1, int b2, struct hunklist *l)
225 int a1, int a2, int b1, int b2, struct hunklist *l)
222 {
226 {
223 int i, j, k;
227 int i, j, k;
224
228
225 /* find the longest match in this chunk */
229 /* find the longest match in this chunk */
226 k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
230 k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
227 if (!k)
231 if (!k)
228 return;
232 return;
229
233
230 /* and recurse on the remaining chunks on either side */
234 /* and recurse on the remaining chunks on either side */
231 recurse(a, b, pos, a1, i, b1, j, l);
235 recurse(a, b, pos, a1, i, b1, j, l);
232 l->head->a1 = i;
236 l->head->a1 = i;
233 l->head->a2 = i + k;
237 l->head->a2 = i + k;
234 l->head->b1 = j;
238 l->head->b1 = j;
235 l->head->b2 = j + k;
239 l->head->b2 = j + k;
236 l->head++;
240 l->head++;
237 recurse(a, b, pos, i + k, a2, j + k, b2, l);
241 recurse(a, b, pos, i + k, a2, j + k, b2, l);
238 }
242 }
239
243
240 static struct hunklist diff(struct line *a, int an, struct line *b, int bn)
244 static struct hunklist diff(struct line *a, int an, struct line *b, int bn)
241 {
245 {
242 struct hunklist l;
246 struct hunklist l;
243 struct hunk *curr;
247 struct hunk *curr;
244 struct pos *pos;
248 struct pos *pos;
245 int t;
249 int t;
246
250
247 /* allocate and fill arrays */
251 /* allocate and fill arrays */
248 t = equatelines(a, an, b, bn);
252 t = equatelines(a, an, b, bn);
249 pos = (struct pos *)calloc(bn ? bn : 1, sizeof(struct pos));
253 pos = (struct pos *)calloc(bn ? bn : 1, sizeof(struct pos));
250 /* we can't have more matches than lines in the shorter file */
254 /* we can't have more matches than lines in the shorter file */
251 l.head = l.base = (struct hunk *)malloc(sizeof(struct hunk) *
255 l.head = l.base = (struct hunk *)malloc(sizeof(struct hunk) *
252 ((an<bn ? an:bn) + 1));
256 ((an<bn ? an:bn) + 1));
253
257
254 if (pos && l.base && t) {
258 if (pos && l.base && t) {
255 /* generate the matching block list */
259 /* generate the matching block list */
256 recurse(a, b, pos, 0, an, 0, bn, &l);
260 recurse(a, b, pos, 0, an, 0, bn, &l);
257 l.head->a1 = l.head->a2 = an;
261 l.head->a1 = l.head->a2 = an;
258 l.head->b1 = l.head->b2 = bn;
262 l.head->b1 = l.head->b2 = bn;
259 l.head++;
263 l.head++;
260 }
264 }
261
265
262 free(pos);
266 free(pos);
263
267
264 /* normalize the hunk list, try to push each hunk towards the end */
268 /* normalize the hunk list, try to push each hunk towards the end */
265 for (curr = l.base; curr != l.head; curr++) {
269 for (curr = l.base; curr != l.head; curr++) {
266 struct hunk *next = curr+1;
270 struct hunk *next = curr+1;
267 int shift = 0;
271 int shift = 0;
268
272
269 if (next == l.head)
273 if (next == l.head)
270 break;
274 break;
271
275
272 if (curr->a2 == next->a1)
276 if (curr->a2 == next->a1)
273 while (curr->a2+shift < an && curr->b2+shift < bn
277 while (curr->a2+shift < an && curr->b2+shift < bn
274 && !cmp(a+curr->a2+shift, b+curr->b2+shift))
278 && !cmp(a+curr->a2+shift, b+curr->b2+shift))
275 shift++;
279 shift++;
276 else if (curr->b2 == next->b1)
280 else if (curr->b2 == next->b1)
277 while (curr->b2+shift < bn && curr->a2+shift < an
281 while (curr->b2+shift < bn && curr->a2+shift < an
278 && !cmp(b+curr->b2+shift, a+curr->a2+shift))
282 && !cmp(b+curr->b2+shift, a+curr->a2+shift))
279 shift++;
283 shift++;
280 if (!shift)
284 if (!shift)
281 continue;
285 continue;
282 curr->b2 += shift;
286 curr->b2 += shift;
283 next->b1 += shift;
287 next->b1 += shift;
284 curr->a2 += shift;
288 curr->a2 += shift;
285 next->a1 += shift;
289 next->a1 += shift;
286 }
290 }
287
291
288 return l;
292 return l;
289 }
293 }
290
294
291 static PyObject *blocks(PyObject *self, PyObject *args)
295 static PyObject *blocks(PyObject *self, PyObject *args)
292 {
296 {
293 PyObject *sa, *sb, *rl = NULL, *m;
297 PyObject *sa, *sb, *rl = NULL, *m;
294 struct line *a, *b;
298 struct line *a, *b;
295 struct hunklist l = {NULL, NULL};
299 struct hunklist l = {NULL, NULL};
296 struct hunk *h;
300 struct hunk *h;
297 int an, bn, pos = 0;
301 int an, bn, pos = 0;
298
302
299 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
303 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
300 return NULL;
304 return NULL;
301
305
302 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &a);
306 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &a);
303 bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &b);
307 bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &b);
304 if (!a || !b)
308 if (!a || !b)
305 goto nomem;
309 goto nomem;
306
310
307 l = diff(a, an, b, bn);
311 l = diff(a, an, b, bn);
308 rl = PyList_New(l.head - l.base);
312 rl = PyList_New(l.head - l.base);
309 if (!l.head || !rl)
313 if (!l.head || !rl)
310 goto nomem;
314 goto nomem;
311
315
312 for (h = l.base; h != l.head; h++) {
316 for (h = l.base; h != l.head; h++) {
313 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
317 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
314 PyList_SetItem(rl, pos, m);
318 PyList_SetItem(rl, pos, m);
315 pos++;
319 pos++;
316 }
320 }
317
321
318 nomem:
322 nomem:
319 free(a);
323 free(a);
320 free(b);
324 free(b);
321 free(l.base);
325 free(l.base);
322 return rl ? rl : PyErr_NoMemory();
326 return rl ? rl : PyErr_NoMemory();
323 }
327 }
324
328
325 static PyObject *bdiff(PyObject *self, PyObject *args)
329 static PyObject *bdiff(PyObject *self, PyObject *args)
326 {
330 {
327 char *sa, *sb;
331 char *sa, *sb;
328 PyObject *result = NULL;
332 PyObject *result = NULL;
329 struct line *al, *bl;
333 struct line *al, *bl;
330 struct hunklist l = {NULL, NULL};
334 struct hunklist l = {NULL, NULL};
331 struct hunk *h;
335 struct hunk *h;
332 char encode[12], *rb;
336 char encode[12], *rb;
333 int an, bn, len = 0, la, lb;
337 int an, bn, len = 0, la, lb;
334
338
335 if (!PyArg_ParseTuple(args, "s#s#:bdiff", &sa, &la, &sb, &lb))
339 if (!PyArg_ParseTuple(args, "s#s#:bdiff", &sa, &la, &sb, &lb))
336 return NULL;
340 return NULL;
337
341
338 an = splitlines(sa, la, &al);
342 an = splitlines(sa, la, &al);
339 bn = splitlines(sb, lb, &bl);
343 bn = splitlines(sb, lb, &bl);
340 if (!al || !bl)
344 if (!al || !bl)
341 goto nomem;
345 goto nomem;
342
346
343 l = diff(al, an, bl, bn);
347 l = diff(al, an, bl, bn);
344 if (!l.head)
348 if (!l.head)
345 goto nomem;
349 goto nomem;
346
350
347 /* calculate length of output */
351 /* calculate length of output */
348 la = lb = 0;
352 la = lb = 0;
349 for (h = l.base; h != l.head; h++) {
353 for (h = l.base; h != l.head; h++) {
350 if (h->a1 != la || h->b1 != lb)
354 if (h->a1 != la || h->b1 != lb)
351 len += 12 + bl[h->b1].l - bl[lb].l;
355 len += 12 + bl[h->b1].l - bl[lb].l;
352 la = h->a2;
356 la = h->a2;
353 lb = h->b2;
357 lb = h->b2;
354 }
358 }
355
359
356 result = PyString_FromStringAndSize(NULL, len);
360 result = PyString_FromStringAndSize(NULL, len);
357 if (!result)
361 if (!result)
358 goto nomem;
362 goto nomem;
359
363
360 /* build binary patch */
364 /* build binary patch */
361 rb = PyString_AsString(result);
365 rb = PyString_AsString(result);
362 la = lb = 0;
366 la = lb = 0;
363
367
364 for (h = l.base; h != l.head; h++) {
368 for (h = l.base; h != l.head; h++) {
365 if (h->a1 != la || h->b1 != lb) {
369 if (h->a1 != la || h->b1 != lb) {
366 len = bl[h->b1].l - bl[lb].l;
370 len = bl[h->b1].l - bl[lb].l;
367 *(uint32_t *)(encode) = htonl(al[la].l - al->l);
371 *(uint32_t *)(encode) = htonl(al[la].l - al->l);
368 *(uint32_t *)(encode + 4) = htonl(al[h->a1].l - al->l);
372 *(uint32_t *)(encode + 4) = htonl(al[h->a1].l - al->l);
369 *(uint32_t *)(encode + 8) = htonl(len);
373 *(uint32_t *)(encode + 8) = htonl(len);
370 memcpy(rb, encode, 12);
374 memcpy(rb, encode, 12);
371 memcpy(rb + 12, bl[lb].l, len);
375 memcpy(rb + 12, bl[lb].l, len);
372 rb += 12 + len;
376 rb += 12 + len;
373 }
377 }
374 la = h->a2;
378 la = h->a2;
375 lb = h->b2;
379 lb = h->b2;
376 }
380 }
377
381
378 nomem:
382 nomem:
379 free(al);
383 free(al);
380 free(bl);
384 free(bl);
381 free(l.base);
385 free(l.base);
382 return result ? result : PyErr_NoMemory();
386 return result ? result : PyErr_NoMemory();
383 }
387 }
384
388
385 static char mdiff_doc[] = "Efficient binary diff.";
389 static char mdiff_doc[] = "Efficient binary diff.";
386
390
387 static PyMethodDef methods[] = {
391 static PyMethodDef methods[] = {
388 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
392 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
389 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
393 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
390 {NULL, NULL}
394 {NULL, NULL}
391 };
395 };
392
396
393 PyMODINIT_FUNC initbdiff(void)
397 PyMODINIT_FUNC initbdiff(void)
394 {
398 {
395 Py_InitModule3("bdiff", methods, mdiff_doc);
399 Py_InitModule3("bdiff", methods, mdiff_doc);
396 }
400 }
397
401
General Comments 0
You need to be logged in to leave comments. Login now