##// END OF EJS Templates
Sunpro compiler patch...
Fabian Otto -
r1759:5afd459d default
parent child Browse files
Show More
@@ -1,356 +1,360 b''
1 1 /*
2 2 bdiff.c - efficient binary diff extension for Mercurial
3 3
4 4 Copyright 2005 Matt Mackall <mpm@selenic.com>
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8
9 9 Based roughly on Python difflib
10 10 */
11 11
12 12 #include <Python.h>
13 13 #include <stdlib.h>
14 14 #include <string.h>
15 15
16 16 #ifdef __hpux
17 17 #define inline
18 18 #endif
19 19
20 #ifdef __SUNPRO_C
21 # define inline
22 #endif
23
20 24 #ifdef _WIN32
21 25 #ifdef _MSC_VER
22 26 #define inline __inline
23 27 typedef unsigned long uint32_t;
24 28 #else
25 29 #include <stdint.h>
26 30 #endif
27 31 static uint32_t htonl(uint32_t x)
28 32 {
29 33 return ((x & 0x000000ffUL) << 24) |
30 34 ((x & 0x0000ff00UL) << 8) |
31 35 ((x & 0x00ff0000UL) >> 8) |
32 36 ((x & 0xff000000UL) >> 24);
33 37 }
34 38 #else
35 39 #include <sys/types.h>
36 40 #include <arpa/inet.h>
37 41 #endif
38 42
39 43 struct line {
40 44 int h, len, n, e;
41 45 const char *l;
42 46 };
43 47
44 48 struct pos {
45 49 int pos, len;
46 50 };
47 51
48 52 struct hunk {
49 53 int a1, a2, b1, b2;
50 54 };
51 55
52 56 struct hunklist {
53 57 struct hunk *base, *head;
54 58 };
55 59
56 60 static inline uint32_t rol32(uint32_t word, unsigned int shift)
57 61 {
58 62 return (word << shift) | (word >> (32 - shift));
59 63 }
60 64
61 65 int splitlines(const char *a, int len, struct line **lr)
62 66 {
63 67 int h, i;
64 68 const char *p, *b = a;
65 69 struct line *l;
66 70
67 71 /* count the lines */
68 72 i = 1; /* extra line for sentinel */
69 73 for (p = a; p < a + len; p++)
70 74 if (*p == '\n' || p == a + len - 1)
71 75 i++;
72 76
73 77 *lr = l = malloc(sizeof(struct line) * i);
74 78 if (!l)
75 79 return -1;
76 80
77 81 /* build the line array and calculate hashes */
78 82 h = 0;
79 83 for (p = a; p < a + len; p++) {
80 84 h = *p + rol32(h, 7); /* a simple hash from GNU diff */
81 85 if (*p == '\n' || p == a + len - 1) {
82 86 l->len = p - b + 1;
83 87 l->h = h * l->len;
84 88 l->l = b;
85 89 l->n = -1;
86 90 l++;
87 91 b = p + 1;
88 92 h = 0;
89 93 }
90 94 }
91 95
92 96 /* set up a sentinel */
93 97 l->h = l->len = 0;
94 98 l->l = a + len;
95 99 return i - 1;
96 100 }
97 101
98 102 int inline cmp(struct line *a, struct line *b)
99 103 {
100 104 return a->h != b->h || a->len != b->len || memcmp(a->l, b->l, a->len);
101 105 }
102 106
103 107 static int equatelines(struct line *a, int an, struct line *b, int bn)
104 108 {
105 109 int i, j, buckets = 1, t;
106 110 struct pos *h;
107 111
108 112 /* build a hash table of the next highest power of 2 */
109 113 while (buckets < bn + 1)
110 114 buckets *= 2;
111 115
112 116 h = malloc(buckets * sizeof(struct pos));
113 117 buckets = buckets - 1;
114 118 if (!h)
115 119 return 0;
116 120
117 121 /* clear the hash table */
118 122 for (i = 0; i <= buckets; i++) {
119 123 h[i].pos = -1;
120 124 h[i].len = 0;
121 125 }
122 126
123 127 /* add lines to the hash table chains */
124 128 for (i = bn - 1; i >= 0; i--) {
125 129 /* find the equivalence class */
126 130 for (j = b[i].h & buckets; h[j].pos != -1;
127 131 j = (j + 1) & buckets)
128 132 if (!cmp(b + i, b + h[j].pos))
129 133 break;
130 134
131 135 /* add to the head of the equivalence class */
132 136 b[i].n = h[j].pos;
133 137 b[i].e = j;
134 138 h[j].pos = i;
135 139 h[j].len++; /* keep track of popularity */
136 140 }
137 141
138 142 /* compute popularity threshold */
139 143 t = (bn >= 200) ? bn / 100 : bn + 1;
140 144
141 145 /* match items in a to their equivalence class in b */
142 146 for (i = 0; i < an; i++) {
143 147 /* find the equivalence class */
144 148 for (j = a[i].h & buckets; h[j].pos != -1;
145 149 j = (j + 1) & buckets)
146 150 if (!cmp(a + i, b + h[j].pos))
147 151 break;
148 152
149 153 a[i].e = j; /* use equivalence class for quick compare */
150 154 if (h[j].len <= t)
151 155 a[i].n = h[j].pos; /* point to head of match list */
152 156 else
153 157 a[i].n = -1; /* too popular */
154 158 }
155 159
156 160 /* discard hash tables */
157 161 free(h);
158 162 return 1;
159 163 }
160 164
161 165 static int longest_match(struct line *a, struct line *b, struct pos *pos,
162 166 int a1, int a2, int b1, int b2, int *omi, int *omj)
163 167 {
164 168 int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;
165 169
166 170 for (i = a1; i < a2; i++) {
167 171 /* skip things before the current block */
168 172 for (j = a[i].n; j != -1 && j < b1; j = b[j].n)
169 173 ;
170 174
171 175 /* loop through all lines match a[i] in b */
172 176 for (; j != -1 && j < b2; j = b[j].n) {
173 177 /* does this extend an earlier match? */
174 178 if (i > a1 && j > b1 && pos[j - 1].pos == i - 1)
175 179 k = pos[j - 1].len + 1;
176 180 else
177 181 k = 1;
178 182 pos[j].pos = i;
179 183 pos[j].len = k;
180 184
181 185 /* best match so far? */
182 186 if (k > mk) {
183 187 mi = i;
184 188 mj = j;
185 189 mk = k;
186 190 }
187 191 }
188 192 }
189 193
190 194 if (mk) {
191 195 mi = mi - mk + 1;
192 196 mj = mj - mk + 1;
193 197 }
194 198
195 199 /* expand match to include neighboring popular lines */
196 200 while (mi - mb > a1 && mj - mb > b1 &&
197 201 a[mi - mb - 1].e == b[mj - mb - 1].e)
198 202 mb++;
199 203 while (mi + mk < a2 && mj + mk < b2 &&
200 204 a[mi + mk].e == b[mj + mk].e)
201 205 mk++;
202 206
203 207 *omi = mi - mb;
204 208 *omj = mj - mb;
205 209 return mk + mb;
206 210 }
207 211
208 212 static void recurse(struct line *a, struct line *b, struct pos *pos,
209 213 int a1, int a2, int b1, int b2, struct hunklist *l)
210 214 {
211 215 int i, j, k;
212 216
213 217 /* find the longest match in this chunk */
214 218 k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
215 219 if (!k)
216 220 return;
217 221
218 222 /* and recurse on the remaining chunks on either side */
219 223 recurse(a, b, pos, a1, i, b1, j, l);
220 224 l->head->a1 = i;
221 225 l->head->a2 = i + k;
222 226 l->head->b1 = j;
223 227 l->head->b2 = j + k;
224 228 l->head++;
225 229 recurse(a, b, pos, i + k, a2, j + k, b2, l);
226 230 }
227 231
228 232 static struct hunklist diff(struct line *a, int an, struct line *b, int bn)
229 233 {
230 234 struct hunklist l;
231 235 struct pos *pos;
232 236 int t;
233 237
234 238 /* allocate and fill arrays */
235 239 t = equatelines(a, an, b, bn);
236 240 pos = calloc(bn, sizeof(struct pos));
237 241 /* we can't have more matches than lines in the shorter file */
238 242 l.head = l.base = malloc(sizeof(struct hunk) * ((an<bn ? an:bn) + 1));
239 243
240 244 if (pos && l.base && t) {
241 245 /* generate the matching block list */
242 246 recurse(a, b, pos, 0, an, 0, bn, &l);
243 247 l.head->a1 = an;
244 248 l.head->b1 = bn;
245 249 l.head++;
246 250 }
247 251
248 252 free(pos);
249 253 return l;
250 254 }
251 255
252 256 static PyObject *blocks(PyObject *self, PyObject *args)
253 257 {
254 258 PyObject *sa, *sb, *rl = NULL, *m;
255 259 struct line *a, *b;
256 260 struct hunklist l = {NULL, NULL};
257 261 struct hunk *h;
258 262 int an, bn, pos = 0;
259 263
260 264 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
261 265 return NULL;
262 266
263 267 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &a);
264 268 bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &b);
265 269 if (!a || !b)
266 270 goto nomem;
267 271
268 272 l = diff(a, an, b, bn);
269 273 rl = PyList_New(l.head - l.base);
270 274 if (!l.head || !rl)
271 275 goto nomem;
272 276
273 277 for (h = l.base; h != l.head; h++) {
274 278 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
275 279 PyList_SetItem(rl, pos, m);
276 280 pos++;
277 281 }
278 282
279 283 nomem:
280 284 free(a);
281 285 free(b);
282 286 free(l.base);
283 287 return rl ? rl : PyErr_NoMemory();
284 288 }
285 289
286 290 static PyObject *bdiff(PyObject *self, PyObject *args)
287 291 {
288 292 PyObject *sa, *sb, *result = NULL;
289 293 struct line *al, *bl;
290 294 struct hunklist l = {NULL, NULL};
291 295 struct hunk *h;
292 296 char encode[12], *rb;
293 297 int an, bn, len = 0, la = 0, lb = 0;
294 298
295 299 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
296 300 return NULL;
297 301
298 302 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &al);
299 303 bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &bl);
300 304 if (!al || !bl)
301 305 goto nomem;
302 306
303 307 l = diff(al, an, bl, bn);
304 308 if (!l.head)
305 309 goto nomem;
306 310
307 311 /* calculate length of output */
308 312 for (h = l.base; h != l.head; h++) {
309 313 if (h->a1 != la || h->b1 != lb)
310 314 len += 12 + bl[h->b1].l - bl[lb].l;
311 315 la = h->a2;
312 316 lb = h->b2;
313 317 }
314 318
315 319 result = PyString_FromStringAndSize(NULL, len);
316 320 if (!result)
317 321 goto nomem;
318 322
319 323 /* build binary patch */
320 324 rb = PyString_AsString(result);
321 325 la = lb = 0;
322 326
323 327 for (h = l.base; h != l.head; h++) {
324 328 if (h->a1 != la || h->b1 != lb) {
325 329 len = bl[h->b1].l - bl[lb].l;
326 330 *(uint32_t *)(encode) = htonl(al[la].l - al->l);
327 331 *(uint32_t *)(encode + 4) = htonl(al[h->a1].l - al->l);
328 332 *(uint32_t *)(encode + 8) = htonl(len);
329 333 memcpy(rb, encode, 12);
330 334 memcpy(rb + 12, bl[lb].l, len);
331 335 rb += 12 + len;
332 336 }
333 337 la = h->a2;
334 338 lb = h->b2;
335 339 }
336 340
337 341 nomem:
338 342 free(al);
339 343 free(bl);
340 344 free(l.base);
341 345 return result ? result : PyErr_NoMemory();
342 346 }
343 347
344 348 static char mdiff_doc[] = "Efficient binary diff.";
345 349
346 350 static PyMethodDef methods[] = {
347 351 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
348 352 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
349 353 {NULL, NULL}
350 354 };
351 355
352 356 PyMODINIT_FUNC initbdiff(void)
353 357 {
354 358 Py_InitModule3("bdiff", methods, mdiff_doc);
355 359 }
356 360
General Comments 0
You need to be logged in to leave comments. Login now