##// END OF EJS Templates
BeOS compatibility support
Andrew Bachmann -
r4073:95ffa36d default
parent child Browse files
Show More
@@ -1,369 +1,373 b''
1 /*
1 /*
2 bdiff.c - efficient binary diff extension for Mercurial
2 bdiff.c - efficient binary diff extension for Mercurial
3
3
4 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8
8
9 Based roughly on Python difflib
9 Based roughly on Python difflib
10 */
10 */
11
11
12 #include <Python.h>
12 #include <Python.h>
13 #include <stdlib.h>
13 #include <stdlib.h>
14 #include <string.h>
14 #include <string.h>
15
15
16 #if defined __hpux || defined __SUNPRO_C || defined _AIX
16 #if defined __hpux || defined __SUNPRO_C || defined _AIX
17 # define inline
17 # define inline
18 #endif
18 #endif
19
19
20 #ifdef _WIN32
20 #ifdef _WIN32
21 #ifdef _MSC_VER
21 #ifdef _MSC_VER
22 #define inline __inline
22 #define inline __inline
23 typedef unsigned long uint32_t;
23 typedef unsigned long uint32_t;
24 #else
24 #else
25 #include <stdint.h>
25 #include <stdint.h>
26 #endif
26 #endif
27 static uint32_t htonl(uint32_t x)
27 static uint32_t htonl(uint32_t x)
28 {
28 {
29 return ((x & 0x000000ffUL) << 24) |
29 return ((x & 0x000000ffUL) << 24) |
30 ((x & 0x0000ff00UL) << 8) |
30 ((x & 0x0000ff00UL) << 8) |
31 ((x & 0x00ff0000UL) >> 8) |
31 ((x & 0x00ff0000UL) >> 8) |
32 ((x & 0xff000000UL) >> 24);
32 ((x & 0xff000000UL) >> 24);
33 }
33 }
34 #else
34 #else
35 #include <sys/types.h>
35 #include <sys/types.h>
36 #ifdef __BEOS__
37 #include <ByteOrder.h>
38 #else
36 #include <arpa/inet.h>
39 #include <arpa/inet.h>
40 #endif
37 #include <inttypes.h>
41 #include <inttypes.h>
38 #endif
42 #endif
39
43
40 struct line {
44 struct line {
41 int h, len, n, e;
45 int h, len, n, e;
42 const char *l;
46 const char *l;
43 };
47 };
44
48
45 struct pos {
49 struct pos {
46 int pos, len;
50 int pos, len;
47 };
51 };
48
52
49 struct hunk {
53 struct hunk {
50 int a1, a2, b1, b2;
54 int a1, a2, b1, b2;
51 };
55 };
52
56
53 struct hunklist {
57 struct hunklist {
54 struct hunk *base, *head;
58 struct hunk *base, *head;
55 };
59 };
56
60
57 static inline uint32_t rol32(uint32_t word, unsigned int shift)
61 static inline uint32_t rol32(uint32_t word, unsigned int shift)
58 {
62 {
59 return (word << shift) | (word >> (32 - shift));
63 return (word << shift) | (word >> (32 - shift));
60 }
64 }
61
65
62 int splitlines(const char *a, int len, struct line **lr)
66 int splitlines(const char *a, int len, struct line **lr)
63 {
67 {
64 int g, h, i;
68 int g, h, i;
65 const char *p, *b = a;
69 const char *p, *b = a;
66 struct line *l;
70 struct line *l;
67
71
68 /* count the lines */
72 /* count the lines */
69 i = 1; /* extra line for sentinel */
73 i = 1; /* extra line for sentinel */
70 for (p = a; p < a + len; p++)
74 for (p = a; p < a + len; p++)
71 if (*p == '\n' || p == a + len - 1)
75 if (*p == '\n' || p == a + len - 1)
72 i++;
76 i++;
73
77
74 *lr = l = (struct line *)malloc(sizeof(struct line) * i);
78 *lr = l = (struct line *)malloc(sizeof(struct line) * i);
75 if (!l)
79 if (!l)
76 return -1;
80 return -1;
77
81
78 /* build the line array and calculate hashes */
82 /* build the line array and calculate hashes */
79 h = 0;
83 h = 0;
80 for (p = a; p < a + len; p++) {
84 for (p = a; p < a + len; p++) {
81 /*
85 /*
82 * a simple hash from GNU diff, with better collision
86 * a simple hash from GNU diff, with better collision
83 * resistance from hashpjw. this slows down common
87 * resistance from hashpjw. this slows down common
84 * case by 10%, but speeds up worst case by 100x.
88 * case by 10%, but speeds up worst case by 100x.
85 */
89 */
86 h = *p + rol32(h, 7);
90 h = *p + rol32(h, 7);
87 if ((g = h & 0xf0000000)) {
91 if ((g = h & 0xf0000000)) {
88 h ^= g >> 24;
92 h ^= g >> 24;
89 h ^= g;
93 h ^= g;
90 }
94 }
91 if (*p == '\n' || p == a + len - 1) {
95 if (*p == '\n' || p == a + len - 1) {
92 l->len = p - b + 1;
96 l->len = p - b + 1;
93 l->h = h * l->len;
97 l->h = h * l->len;
94 l->l = b;
98 l->l = b;
95 l->n = -1;
99 l->n = -1;
96 l++;
100 l++;
97 b = p + 1;
101 b = p + 1;
98 h = 0;
102 h = 0;
99 }
103 }
100 }
104 }
101
105
102 /* set up a sentinel */
106 /* set up a sentinel */
103 l->h = l->len = 0;
107 l->h = l->len = 0;
104 l->l = a + len;
108 l->l = a + len;
105 return i - 1;
109 return i - 1;
106 }
110 }
107
111
108 int inline cmp(struct line *a, struct line *b)
112 int inline cmp(struct line *a, struct line *b)
109 {
113 {
110 return a->h != b->h || a->len != b->len || memcmp(a->l, b->l, a->len);
114 return a->h != b->h || a->len != b->len || memcmp(a->l, b->l, a->len);
111 }
115 }
112
116
113 static int equatelines(struct line *a, int an, struct line *b, int bn)
117 static int equatelines(struct line *a, int an, struct line *b, int bn)
114 {
118 {
115 int i, j, buckets = 1, t;
119 int i, j, buckets = 1, t;
116 struct pos *h;
120 struct pos *h;
117
121
118 /* build a hash table of the next highest power of 2 */
122 /* build a hash table of the next highest power of 2 */
119 while (buckets < bn + 1)
123 while (buckets < bn + 1)
120 buckets *= 2;
124 buckets *= 2;
121
125
122 h = (struct pos *)malloc(buckets * sizeof(struct pos));
126 h = (struct pos *)malloc(buckets * sizeof(struct pos));
123 buckets = buckets - 1;
127 buckets = buckets - 1;
124 if (!h)
128 if (!h)
125 return 0;
129 return 0;
126
130
127 /* clear the hash table */
131 /* clear the hash table */
128 for (i = 0; i <= buckets; i++) {
132 for (i = 0; i <= buckets; i++) {
129 h[i].pos = -1;
133 h[i].pos = -1;
130 h[i].len = 0;
134 h[i].len = 0;
131 }
135 }
132
136
133 /* add lines to the hash table chains */
137 /* add lines to the hash table chains */
134 for (i = bn - 1; i >= 0; i--) {
138 for (i = bn - 1; i >= 0; i--) {
135 /* find the equivalence class */
139 /* find the equivalence class */
136 for (j = b[i].h & buckets; h[j].pos != -1;
140 for (j = b[i].h & buckets; h[j].pos != -1;
137 j = (j + 1) & buckets)
141 j = (j + 1) & buckets)
138 if (!cmp(b + i, b + h[j].pos))
142 if (!cmp(b + i, b + h[j].pos))
139 break;
143 break;
140
144
141 /* add to the head of the equivalence class */
145 /* add to the head of the equivalence class */
142 b[i].n = h[j].pos;
146 b[i].n = h[j].pos;
143 b[i].e = j;
147 b[i].e = j;
144 h[j].pos = i;
148 h[j].pos = i;
145 h[j].len++; /* keep track of popularity */
149 h[j].len++; /* keep track of popularity */
146 }
150 }
147
151
148 /* compute popularity threshold */
152 /* compute popularity threshold */
149 t = (bn >= 200) ? bn / 100 : bn + 1;
153 t = (bn >= 200) ? bn / 100 : bn + 1;
150
154
151 /* match items in a to their equivalence class in b */
155 /* match items in a to their equivalence class in b */
152 for (i = 0; i < an; i++) {
156 for (i = 0; i < an; i++) {
153 /* find the equivalence class */
157 /* find the equivalence class */
154 for (j = a[i].h & buckets; h[j].pos != -1;
158 for (j = a[i].h & buckets; h[j].pos != -1;
155 j = (j + 1) & buckets)
159 j = (j + 1) & buckets)
156 if (!cmp(a + i, b + h[j].pos))
160 if (!cmp(a + i, b + h[j].pos))
157 break;
161 break;
158
162
159 a[i].e = j; /* use equivalence class for quick compare */
163 a[i].e = j; /* use equivalence class for quick compare */
160 if (h[j].len <= t)
164 if (h[j].len <= t)
161 a[i].n = h[j].pos; /* point to head of match list */
165 a[i].n = h[j].pos; /* point to head of match list */
162 else
166 else
163 a[i].n = -1; /* too popular */
167 a[i].n = -1; /* too popular */
164 }
168 }
165
169
166 /* discard hash tables */
170 /* discard hash tables */
167 free(h);
171 free(h);
168 return 1;
172 return 1;
169 }
173 }
170
174
171 static int longest_match(struct line *a, struct line *b, struct pos *pos,
175 static int longest_match(struct line *a, struct line *b, struct pos *pos,
172 int a1, int a2, int b1, int b2, int *omi, int *omj)
176 int a1, int a2, int b1, int b2, int *omi, int *omj)
173 {
177 {
174 int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;
178 int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;
175
179
176 for (i = a1; i < a2; i++) {
180 for (i = a1; i < a2; i++) {
177 /* skip things before the current block */
181 /* skip things before the current block */
178 for (j = a[i].n; j != -1 && j < b1; j = b[j].n)
182 for (j = a[i].n; j != -1 && j < b1; j = b[j].n)
179 ;
183 ;
180
184
181 /* loop through all lines match a[i] in b */
185 /* loop through all lines match a[i] in b */
182 for (; j != -1 && j < b2; j = b[j].n) {
186 for (; j != -1 && j < b2; j = b[j].n) {
183 /* does this extend an earlier match? */
187 /* does this extend an earlier match? */
184 if (i > a1 && j > b1 && pos[j - 1].pos == i - 1)
188 if (i > a1 && j > b1 && pos[j - 1].pos == i - 1)
185 k = pos[j - 1].len + 1;
189 k = pos[j - 1].len + 1;
186 else
190 else
187 k = 1;
191 k = 1;
188 pos[j].pos = i;
192 pos[j].pos = i;
189 pos[j].len = k;
193 pos[j].len = k;
190
194
191 /* best match so far? */
195 /* best match so far? */
192 if (k > mk) {
196 if (k > mk) {
193 mi = i;
197 mi = i;
194 mj = j;
198 mj = j;
195 mk = k;
199 mk = k;
196 }
200 }
197 }
201 }
198 }
202 }
199
203
200 if (mk) {
204 if (mk) {
201 mi = mi - mk + 1;
205 mi = mi - mk + 1;
202 mj = mj - mk + 1;
206 mj = mj - mk + 1;
203 }
207 }
204
208
205 /* expand match to include neighboring popular lines */
209 /* expand match to include neighboring popular lines */
206 while (mi - mb > a1 && mj - mb > b1 &&
210 while (mi - mb > a1 && mj - mb > b1 &&
207 a[mi - mb - 1].e == b[mj - mb - 1].e)
211 a[mi - mb - 1].e == b[mj - mb - 1].e)
208 mb++;
212 mb++;
209 while (mi + mk < a2 && mj + mk < b2 &&
213 while (mi + mk < a2 && mj + mk < b2 &&
210 a[mi + mk].e == b[mj + mk].e)
214 a[mi + mk].e == b[mj + mk].e)
211 mk++;
215 mk++;
212
216
213 *omi = mi - mb;
217 *omi = mi - mb;
214 *omj = mj - mb;
218 *omj = mj - mb;
215 return mk + mb;
219 return mk + mb;
216 }
220 }
217
221
218 static void recurse(struct line *a, struct line *b, struct pos *pos,
222 static void recurse(struct line *a, struct line *b, struct pos *pos,
219 int a1, int a2, int b1, int b2, struct hunklist *l)
223 int a1, int a2, int b1, int b2, struct hunklist *l)
220 {
224 {
221 int i, j, k;
225 int i, j, k;
222
226
223 /* find the longest match in this chunk */
227 /* find the longest match in this chunk */
224 k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
228 k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
225 if (!k)
229 if (!k)
226 return;
230 return;
227
231
228 /* and recurse on the remaining chunks on either side */
232 /* and recurse on the remaining chunks on either side */
229 recurse(a, b, pos, a1, i, b1, j, l);
233 recurse(a, b, pos, a1, i, b1, j, l);
230 l->head->a1 = i;
234 l->head->a1 = i;
231 l->head->a2 = i + k;
235 l->head->a2 = i + k;
232 l->head->b1 = j;
236 l->head->b1 = j;
233 l->head->b2 = j + k;
237 l->head->b2 = j + k;
234 l->head++;
238 l->head++;
235 recurse(a, b, pos, i + k, a2, j + k, b2, l);
239 recurse(a, b, pos, i + k, a2, j + k, b2, l);
236 }
240 }
237
241
238 static struct hunklist diff(struct line *a, int an, struct line *b, int bn)
242 static struct hunklist diff(struct line *a, int an, struct line *b, int bn)
239 {
243 {
240 struct hunklist l;
244 struct hunklist l;
241 struct pos *pos;
245 struct pos *pos;
242 int t;
246 int t;
243
247
244 /* allocate and fill arrays */
248 /* allocate and fill arrays */
245 t = equatelines(a, an, b, bn);
249 t = equatelines(a, an, b, bn);
246 pos = (struct pos *)calloc(bn, sizeof(struct pos));
250 pos = (struct pos *)calloc(bn, sizeof(struct pos));
247 /* we can't have more matches than lines in the shorter file */
251 /* we can't have more matches than lines in the shorter file */
248 l.head = l.base = (struct hunk *)malloc(sizeof(struct hunk) *
252 l.head = l.base = (struct hunk *)malloc(sizeof(struct hunk) *
249 ((an<bn ? an:bn) + 1));
253 ((an<bn ? an:bn) + 1));
250
254
251 if (pos && l.base && t) {
255 if (pos && l.base && t) {
252 /* generate the matching block list */
256 /* generate the matching block list */
253 recurse(a, b, pos, 0, an, 0, bn, &l);
257 recurse(a, b, pos, 0, an, 0, bn, &l);
254 l.head->a1 = an;
258 l.head->a1 = an;
255 l.head->b1 = bn;
259 l.head->b1 = bn;
256 l.head++;
260 l.head++;
257 }
261 }
258
262
259 free(pos);
263 free(pos);
260 return l;
264 return l;
261 }
265 }
262
266
263 static PyObject *blocks(PyObject *self, PyObject *args)
267 static PyObject *blocks(PyObject *self, PyObject *args)
264 {
268 {
265 PyObject *sa, *sb, *rl = NULL, *m;
269 PyObject *sa, *sb, *rl = NULL, *m;
266 struct line *a, *b;
270 struct line *a, *b;
267 struct hunklist l = {NULL, NULL};
271 struct hunklist l = {NULL, NULL};
268 struct hunk *h;
272 struct hunk *h;
269 int an, bn, pos = 0;
273 int an, bn, pos = 0;
270
274
271 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
275 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
272 return NULL;
276 return NULL;
273
277
274 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &a);
278 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &a);
275 bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &b);
279 bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &b);
276 if (!a || !b)
280 if (!a || !b)
277 goto nomem;
281 goto nomem;
278
282
279 l = diff(a, an, b, bn);
283 l = diff(a, an, b, bn);
280 rl = PyList_New(l.head - l.base);
284 rl = PyList_New(l.head - l.base);
281 if (!l.head || !rl)
285 if (!l.head || !rl)
282 goto nomem;
286 goto nomem;
283
287
284 for (h = l.base; h != l.head; h++) {
288 for (h = l.base; h != l.head; h++) {
285 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
289 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
286 PyList_SetItem(rl, pos, m);
290 PyList_SetItem(rl, pos, m);
287 pos++;
291 pos++;
288 }
292 }
289
293
290 nomem:
294 nomem:
291 free(a);
295 free(a);
292 free(b);
296 free(b);
293 free(l.base);
297 free(l.base);
294 return rl ? rl : PyErr_NoMemory();
298 return rl ? rl : PyErr_NoMemory();
295 }
299 }
296
300
297 static PyObject *bdiff(PyObject *self, PyObject *args)
301 static PyObject *bdiff(PyObject *self, PyObject *args)
298 {
302 {
299 char *sa, *sb;
303 char *sa, *sb;
300 PyObject *result = NULL;
304 PyObject *result = NULL;
301 struct line *al, *bl;
305 struct line *al, *bl;
302 struct hunklist l = {NULL, NULL};
306 struct hunklist l = {NULL, NULL};
303 struct hunk *h;
307 struct hunk *h;
304 char encode[12], *rb;
308 char encode[12], *rb;
305 int an, bn, len = 0, la, lb;
309 int an, bn, len = 0, la, lb;
306
310
307 if (!PyArg_ParseTuple(args, "s#s#:bdiff", &sa, &la, &sb, &lb))
311 if (!PyArg_ParseTuple(args, "s#s#:bdiff", &sa, &la, &sb, &lb))
308 return NULL;
312 return NULL;
309
313
310 an = splitlines(sa, la, &al);
314 an = splitlines(sa, la, &al);
311 bn = splitlines(sb, lb, &bl);
315 bn = splitlines(sb, lb, &bl);
312 if (!al || !bl)
316 if (!al || !bl)
313 goto nomem;
317 goto nomem;
314
318
315 l = diff(al, an, bl, bn);
319 l = diff(al, an, bl, bn);
316 if (!l.head)
320 if (!l.head)
317 goto nomem;
321 goto nomem;
318
322
319 /* calculate length of output */
323 /* calculate length of output */
320 la = lb = 0;
324 la = lb = 0;
321 for (h = l.base; h != l.head; h++) {
325 for (h = l.base; h != l.head; h++) {
322 if (h->a1 != la || h->b1 != lb)
326 if (h->a1 != la || h->b1 != lb)
323 len += 12 + bl[h->b1].l - bl[lb].l;
327 len += 12 + bl[h->b1].l - bl[lb].l;
324 la = h->a2;
328 la = h->a2;
325 lb = h->b2;
329 lb = h->b2;
326 }
330 }
327
331
328 result = PyString_FromStringAndSize(NULL, len);
332 result = PyString_FromStringAndSize(NULL, len);
329 if (!result)
333 if (!result)
330 goto nomem;
334 goto nomem;
331
335
332 /* build binary patch */
336 /* build binary patch */
333 rb = PyString_AsString(result);
337 rb = PyString_AsString(result);
334 la = lb = 0;
338 la = lb = 0;
335
339
336 for (h = l.base; h != l.head; h++) {
340 for (h = l.base; h != l.head; h++) {
337 if (h->a1 != la || h->b1 != lb) {
341 if (h->a1 != la || h->b1 != lb) {
338 len = bl[h->b1].l - bl[lb].l;
342 len = bl[h->b1].l - bl[lb].l;
339 *(uint32_t *)(encode) = htonl(al[la].l - al->l);
343 *(uint32_t *)(encode) = htonl(al[la].l - al->l);
340 *(uint32_t *)(encode + 4) = htonl(al[h->a1].l - al->l);
344 *(uint32_t *)(encode + 4) = htonl(al[h->a1].l - al->l);
341 *(uint32_t *)(encode + 8) = htonl(len);
345 *(uint32_t *)(encode + 8) = htonl(len);
342 memcpy(rb, encode, 12);
346 memcpy(rb, encode, 12);
343 memcpy(rb + 12, bl[lb].l, len);
347 memcpy(rb + 12, bl[lb].l, len);
344 rb += 12 + len;
348 rb += 12 + len;
345 }
349 }
346 la = h->a2;
350 la = h->a2;
347 lb = h->b2;
351 lb = h->b2;
348 }
352 }
349
353
350 nomem:
354 nomem:
351 free(al);
355 free(al);
352 free(bl);
356 free(bl);
353 free(l.base);
357 free(l.base);
354 return result ? result : PyErr_NoMemory();
358 return result ? result : PyErr_NoMemory();
355 }
359 }
356
360
357 static char mdiff_doc[] = "Efficient binary diff.";
361 static char mdiff_doc[] = "Efficient binary diff.";
358
362
359 static PyMethodDef methods[] = {
363 static PyMethodDef methods[] = {
360 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
364 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
361 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
365 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
362 {NULL, NULL}
366 {NULL, NULL}
363 };
367 };
364
368
365 PyMODINIT_FUNC initbdiff(void)
369 PyMODINIT_FUNC initbdiff(void)
366 {
370 {
367 Py_InitModule3("bdiff", methods, mdiff_doc);
371 Py_InitModule3("bdiff", methods, mdiff_doc);
368 }
372 }
369
373
@@ -1,411 +1,415 b''
1 /*
1 /*
2 mpatch.c - efficient binary patching for Mercurial
2 mpatch.c - efficient binary patching for Mercurial
3
3
4 This implements a patch algorithm that's O(m + nlog n) where m is the
4 This implements a patch algorithm that's O(m + nlog n) where m is the
5 size of the output and n is the number of patches.
5 size of the output and n is the number of patches.
6
6
7 Given a list of binary patches, it unpacks each into a hunk list,
7 Given a list of binary patches, it unpacks each into a hunk list,
8 then combines the hunk lists with a treewise recursion to form a
8 then combines the hunk lists with a treewise recursion to form a
9 single hunk list. This hunk list is then applied to the original
9 single hunk list. This hunk list is then applied to the original
10 text.
10 text.
11
11
12 The text (or binary) fragments are copied directly from their source
12 The text (or binary) fragments are copied directly from their source
13 Python objects into a preallocated output string to avoid the
13 Python objects into a preallocated output string to avoid the
14 allocation of intermediate Python objects. Working memory is about 2x
14 allocation of intermediate Python objects. Working memory is about 2x
15 the total number of hunks.
15 the total number of hunks.
16
16
17 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
17 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
18
18
19 This software may be used and distributed according to the terms
19 This software may be used and distributed according to the terms
20 of the GNU General Public License, incorporated herein by reference.
20 of the GNU General Public License, incorporated herein by reference.
21 */
21 */
22
22
23 #include <Python.h>
23 #include <Python.h>
24 #include <stdlib.h>
24 #include <stdlib.h>
25 #include <string.h>
25 #include <string.h>
26
26
27 #ifdef _WIN32
27 #ifdef _WIN32
28 # ifdef _MSC_VER
28 # ifdef _MSC_VER
29 /* msvc 6.0 has problems */
29 /* msvc 6.0 has problems */
30 # define inline __inline
30 # define inline __inline
31 typedef unsigned long uint32_t;
31 typedef unsigned long uint32_t;
32 # else
32 # else
33 # include <stdint.h>
33 # include <stdint.h>
34 # endif
34 # endif
35 static uint32_t ntohl(uint32_t x)
35 static uint32_t ntohl(uint32_t x)
36 {
36 {
37 return ((x & 0x000000ffUL) << 24) |
37 return ((x & 0x000000ffUL) << 24) |
38 ((x & 0x0000ff00UL) << 8) |
38 ((x & 0x0000ff00UL) << 8) |
39 ((x & 0x00ff0000UL) >> 8) |
39 ((x & 0x00ff0000UL) >> 8) |
40 ((x & 0xff000000UL) >> 24);
40 ((x & 0xff000000UL) >> 24);
41 }
41 }
42 #else
42 #else
43 /* not windows */
43 /* not windows */
44 # include <sys/types.h>
44 # include <sys/types.h>
45 # include <arpa/inet.h>
45 # ifdef __BEOS__
46 # include <ByteOrder.h>
47 # else
48 # include <arpa/inet.h>
49 # endif
46 # include <inttypes.h>
50 # include <inttypes.h>
47 #endif
51 #endif
48
52
49 static char mpatch_doc[] = "Efficient binary patching.";
53 static char mpatch_doc[] = "Efficient binary patching.";
50 static PyObject *mpatch_Error;
54 static PyObject *mpatch_Error;
51
55
52 struct frag {
56 struct frag {
53 int start, end, len;
57 int start, end, len;
54 char *data;
58 char *data;
55 };
59 };
56
60
57 struct flist {
61 struct flist {
58 struct frag *base, *head, *tail;
62 struct frag *base, *head, *tail;
59 };
63 };
60
64
61 static struct flist *lalloc(int size)
65 static struct flist *lalloc(int size)
62 {
66 {
63 struct flist *a = NULL;
67 struct flist *a = NULL;
64
68
65 if (size < 1)
69 if (size < 1)
66 size = 1;
70 size = 1;
67
71
68 a = (struct flist *)malloc(sizeof(struct flist));
72 a = (struct flist *)malloc(sizeof(struct flist));
69 if (a) {
73 if (a) {
70 a->base = (struct frag *)malloc(sizeof(struct frag) * size);
74 a->base = (struct frag *)malloc(sizeof(struct frag) * size);
71 if (a->base) {
75 if (a->base) {
72 a->head = a->tail = a->base;
76 a->head = a->tail = a->base;
73 return a;
77 return a;
74 }
78 }
75 free(a);
79 free(a);
76 a = NULL;
80 a = NULL;
77 }
81 }
78 if (!PyErr_Occurred())
82 if (!PyErr_Occurred())
79 PyErr_NoMemory();
83 PyErr_NoMemory();
80 return NULL;
84 return NULL;
81 }
85 }
82
86
83 static void lfree(struct flist *a)
87 static void lfree(struct flist *a)
84 {
88 {
85 if (a) {
89 if (a) {
86 free(a->base);
90 free(a->base);
87 free(a);
91 free(a);
88 }
92 }
89 }
93 }
90
94
91 static int lsize(struct flist *a)
95 static int lsize(struct flist *a)
92 {
96 {
93 return a->tail - a->head;
97 return a->tail - a->head;
94 }
98 }
95
99
96 /* move hunks in source that are less cut to dest, compensating
100 /* move hunks in source that are less cut to dest, compensating
97 for changes in offset. the last hunk may be split if necessary.
101 for changes in offset. the last hunk may be split if necessary.
98 */
102 */
99 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
103 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
100 {
104 {
101 struct frag *d = dest->tail, *s = src->head;
105 struct frag *d = dest->tail, *s = src->head;
102 int postend, c, l;
106 int postend, c, l;
103
107
104 while (s != src->tail) {
108 while (s != src->tail) {
105 if (s->start + offset >= cut)
109 if (s->start + offset >= cut)
106 break; /* we've gone far enough */
110 break; /* we've gone far enough */
107
111
108 postend = offset + s->start + s->len;
112 postend = offset + s->start + s->len;
109 if (postend <= cut) {
113 if (postend <= cut) {
110 /* save this hunk */
114 /* save this hunk */
111 offset += s->start + s->len - s->end;
115 offset += s->start + s->len - s->end;
112 *d++ = *s++;
116 *d++ = *s++;
113 }
117 }
114 else {
118 else {
115 /* break up this hunk */
119 /* break up this hunk */
116 c = cut - offset;
120 c = cut - offset;
117 if (s->end < c)
121 if (s->end < c)
118 c = s->end;
122 c = s->end;
119 l = cut - offset - s->start;
123 l = cut - offset - s->start;
120 if (s->len < l)
124 if (s->len < l)
121 l = s->len;
125 l = s->len;
122
126
123 offset += s->start + l - c;
127 offset += s->start + l - c;
124
128
125 d->start = s->start;
129 d->start = s->start;
126 d->end = c;
130 d->end = c;
127 d->len = l;
131 d->len = l;
128 d->data = s->data;
132 d->data = s->data;
129 d++;
133 d++;
130 s->start = c;
134 s->start = c;
131 s->len = s->len - l;
135 s->len = s->len - l;
132 s->data = s->data + l;
136 s->data = s->data + l;
133
137
134 break;
138 break;
135 }
139 }
136 }
140 }
137
141
138 dest->tail = d;
142 dest->tail = d;
139 src->head = s;
143 src->head = s;
140 return offset;
144 return offset;
141 }
145 }
142
146
143 /* like gather, but with no output list */
147 /* like gather, but with no output list */
144 static int discard(struct flist *src, int cut, int offset)
148 static int discard(struct flist *src, int cut, int offset)
145 {
149 {
146 struct frag *s = src->head;
150 struct frag *s = src->head;
147 int postend, c, l;
151 int postend, c, l;
148
152
149 while (s != src->tail) {
153 while (s != src->tail) {
150 if (s->start + offset >= cut)
154 if (s->start + offset >= cut)
151 break;
155 break;
152
156
153 postend = offset + s->start + s->len;
157 postend = offset + s->start + s->len;
154 if (postend <= cut) {
158 if (postend <= cut) {
155 offset += s->start + s->len - s->end;
159 offset += s->start + s->len - s->end;
156 s++;
160 s++;
157 }
161 }
158 else {
162 else {
159 c = cut - offset;
163 c = cut - offset;
160 if (s->end < c)
164 if (s->end < c)
161 c = s->end;
165 c = s->end;
162 l = cut - offset - s->start;
166 l = cut - offset - s->start;
163 if (s->len < l)
167 if (s->len < l)
164 l = s->len;
168 l = s->len;
165
169
166 offset += s->start + l - c;
170 offset += s->start + l - c;
167 s->start = c;
171 s->start = c;
168 s->len = s->len - l;
172 s->len = s->len - l;
169 s->data = s->data + l;
173 s->data = s->data + l;
170
174
171 break;
175 break;
172 }
176 }
173 }
177 }
174
178
175 src->head = s;
179 src->head = s;
176 return offset;
180 return offset;
177 }
181 }
178
182
179 /* combine hunk lists a and b, while adjusting b for offset changes in a/
183 /* combine hunk lists a and b, while adjusting b for offset changes in a/
180 this deletes a and b and returns the resultant list. */
184 this deletes a and b and returns the resultant list. */
181 static struct flist *combine(struct flist *a, struct flist *b)
185 static struct flist *combine(struct flist *a, struct flist *b)
182 {
186 {
183 struct flist *c = NULL;
187 struct flist *c = NULL;
184 struct frag *bh, *ct;
188 struct frag *bh, *ct;
185 int offset = 0, post;
189 int offset = 0, post;
186
190
187 if (a && b)
191 if (a && b)
188 c = lalloc((lsize(a) + lsize(b)) * 2);
192 c = lalloc((lsize(a) + lsize(b)) * 2);
189
193
190 if (c) {
194 if (c) {
191
195
192 for (bh = b->head; bh != b->tail; bh++) {
196 for (bh = b->head; bh != b->tail; bh++) {
193 /* save old hunks */
197 /* save old hunks */
194 offset = gather(c, a, bh->start, offset);
198 offset = gather(c, a, bh->start, offset);
195
199
196 /* discard replaced hunks */
200 /* discard replaced hunks */
197 post = discard(a, bh->end, offset);
201 post = discard(a, bh->end, offset);
198
202
199 /* insert new hunk */
203 /* insert new hunk */
200 ct = c->tail;
204 ct = c->tail;
201 ct->start = bh->start - offset;
205 ct->start = bh->start - offset;
202 ct->end = bh->end - post;
206 ct->end = bh->end - post;
203 ct->len = bh->len;
207 ct->len = bh->len;
204 ct->data = bh->data;
208 ct->data = bh->data;
205 c->tail++;
209 c->tail++;
206 offset = post;
210 offset = post;
207 }
211 }
208
212
209 /* hold on to tail from a */
213 /* hold on to tail from a */
210 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
214 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
211 c->tail += lsize(a);
215 c->tail += lsize(a);
212 }
216 }
213
217
214 lfree(a);
218 lfree(a);
215 lfree(b);
219 lfree(b);
216 return c;
220 return c;
217 }
221 }
218
222
219 /* decode a binary patch into a hunk list */
223 /* decode a binary patch into a hunk list */
220 static struct flist *decode(char *bin, int len)
224 static struct flist *decode(char *bin, int len)
221 {
225 {
222 struct flist *l;
226 struct flist *l;
223 struct frag *lt;
227 struct frag *lt;
224 char *end = bin + len;
228 char *end = bin + len;
225 char decode[12]; /* for dealing with alignment issues */
229 char decode[12]; /* for dealing with alignment issues */
226
230
227 /* assume worst case size, we won't have many of these lists */
231 /* assume worst case size, we won't have many of these lists */
228 l = lalloc(len / 12);
232 l = lalloc(len / 12);
229 if (!l)
233 if (!l)
230 return NULL;
234 return NULL;
231
235
232 lt = l->tail;
236 lt = l->tail;
233
237
234 while (bin < end) {
238 while (bin < end) {
235 memcpy(decode, bin, 12);
239 memcpy(decode, bin, 12);
236 lt->start = ntohl(*(uint32_t *)decode);
240 lt->start = ntohl(*(uint32_t *)decode);
237 lt->end = ntohl(*(uint32_t *)(decode + 4));
241 lt->end = ntohl(*(uint32_t *)(decode + 4));
238 lt->len = ntohl(*(uint32_t *)(decode + 8));
242 lt->len = ntohl(*(uint32_t *)(decode + 8));
239 lt->data = bin + 12;
243 lt->data = bin + 12;
240 bin += 12 + lt->len;
244 bin += 12 + lt->len;
241 lt++;
245 lt++;
242 }
246 }
243
247
244 if (bin != end) {
248 if (bin != end) {
245 if (!PyErr_Occurred())
249 if (!PyErr_Occurred())
246 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
250 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
247 lfree(l);
251 lfree(l);
248 return NULL;
252 return NULL;
249 }
253 }
250
254
251 l->tail = lt;
255 l->tail = lt;
252 return l;
256 return l;
253 }
257 }
254
258
255 /* calculate the size of resultant text */
259 /* calculate the size of resultant text */
256 static int calcsize(int len, struct flist *l)
260 static int calcsize(int len, struct flist *l)
257 {
261 {
258 int outlen = 0, last = 0;
262 int outlen = 0, last = 0;
259 struct frag *f = l->head;
263 struct frag *f = l->head;
260
264
261 while (f != l->tail) {
265 while (f != l->tail) {
262 if (f->start < last || f->end > len) {
266 if (f->start < last || f->end > len) {
263 if (!PyErr_Occurred())
267 if (!PyErr_Occurred())
264 PyErr_SetString(mpatch_Error,
268 PyErr_SetString(mpatch_Error,
265 "invalid patch");
269 "invalid patch");
266 return -1;
270 return -1;
267 }
271 }
268 outlen += f->start - last;
272 outlen += f->start - last;
269 last = f->end;
273 last = f->end;
270 outlen += f->len;
274 outlen += f->len;
271 f++;
275 f++;
272 }
276 }
273
277
274 outlen += len - last;
278 outlen += len - last;
275 return outlen;
279 return outlen;
276 }
280 }
277
281
278 static int apply(char *buf, char *orig, int len, struct flist *l)
282 static int apply(char *buf, char *orig, int len, struct flist *l)
279 {
283 {
280 struct frag *f = l->head;
284 struct frag *f = l->head;
281 int last = 0;
285 int last = 0;
282 char *p = buf;
286 char *p = buf;
283
287
284 while (f != l->tail) {
288 while (f != l->tail) {
285 if (f->start < last || f->end > len) {
289 if (f->start < last || f->end > len) {
286 if (!PyErr_Occurred())
290 if (!PyErr_Occurred())
287 PyErr_SetString(mpatch_Error,
291 PyErr_SetString(mpatch_Error,
288 "invalid patch");
292 "invalid patch");
289 return 0;
293 return 0;
290 }
294 }
291 memcpy(p, orig + last, f->start - last);
295 memcpy(p, orig + last, f->start - last);
292 p += f->start - last;
296 p += f->start - last;
293 memcpy(p, f->data, f->len);
297 memcpy(p, f->data, f->len);
294 last = f->end;
298 last = f->end;
295 p += f->len;
299 p += f->len;
296 f++;
300 f++;
297 }
301 }
298 memcpy(p, orig + last, len - last);
302 memcpy(p, orig + last, len - last);
299 return 1;
303 return 1;
300 }
304 }
301
305
302 /* recursively generate a patch of all bins between start and end */
306 /* recursively generate a patch of all bins between start and end */
303 static struct flist *fold(PyObject *bins, int start, int end)
307 static struct flist *fold(PyObject *bins, int start, int end)
304 {
308 {
305 int len;
309 int len;
306
310
307 if (start + 1 == end) {
311 if (start + 1 == end) {
308 /* trivial case, output a decoded list */
312 /* trivial case, output a decoded list */
309 PyObject *tmp = PyList_GetItem(bins, start);
313 PyObject *tmp = PyList_GetItem(bins, start);
310 if (!tmp)
314 if (!tmp)
311 return NULL;
315 return NULL;
312 return decode(PyString_AsString(tmp), PyString_Size(tmp));
316 return decode(PyString_AsString(tmp), PyString_Size(tmp));
313 }
317 }
314
318
315 /* divide and conquer, memory management is elsewhere */
319 /* divide and conquer, memory management is elsewhere */
316 len = (end - start) / 2;
320 len = (end - start) / 2;
317 return combine(fold(bins, start, start + len),
321 return combine(fold(bins, start, start + len),
318 fold(bins, start + len, end));
322 fold(bins, start + len, end));
319 }
323 }
320
324
321 static PyObject *
325 static PyObject *
322 patches(PyObject *self, PyObject *args)
326 patches(PyObject *self, PyObject *args)
323 {
327 {
324 PyObject *text, *bins, *result;
328 PyObject *text, *bins, *result;
325 struct flist *patch;
329 struct flist *patch;
326 char *in, *out;
330 char *in, *out;
327 int len, outlen;
331 int len, outlen;
328
332
329 if (!PyArg_ParseTuple(args, "SO:mpatch", &text, &bins))
333 if (!PyArg_ParseTuple(args, "SO:mpatch", &text, &bins))
330 return NULL;
334 return NULL;
331
335
332 len = PyList_Size(bins);
336 len = PyList_Size(bins);
333 if (!len) {
337 if (!len) {
334 /* nothing to do */
338 /* nothing to do */
335 Py_INCREF(text);
339 Py_INCREF(text);
336 return text;
340 return text;
337 }
341 }
338
342
339 patch = fold(bins, 0, len);
343 patch = fold(bins, 0, len);
340 if (!patch)
344 if (!patch)
341 return NULL;
345 return NULL;
342
346
343 outlen = calcsize(PyString_Size(text), patch);
347 outlen = calcsize(PyString_Size(text), patch);
344 if (outlen < 0) {
348 if (outlen < 0) {
345 result = NULL;
349 result = NULL;
346 goto cleanup;
350 goto cleanup;
347 }
351 }
348 result = PyString_FromStringAndSize(NULL, outlen);
352 result = PyString_FromStringAndSize(NULL, outlen);
349 if (!result) {
353 if (!result) {
350 result = NULL;
354 result = NULL;
351 goto cleanup;
355 goto cleanup;
352 }
356 }
353 in = PyString_AsString(text);
357 in = PyString_AsString(text);
354 out = PyString_AsString(result);
358 out = PyString_AsString(result);
355 if (!apply(out, in, PyString_Size(text), patch)) {
359 if (!apply(out, in, PyString_Size(text), patch)) {
356 Py_DECREF(result);
360 Py_DECREF(result);
357 result = NULL;
361 result = NULL;
358 }
362 }
359 cleanup:
363 cleanup:
360 lfree(patch);
364 lfree(patch);
361 return result;
365 return result;
362 }
366 }
363
367
364 /* calculate size of a patched file directly */
368 /* calculate size of a patched file directly */
365 static PyObject *
369 static PyObject *
366 patchedsize(PyObject *self, PyObject *args)
370 patchedsize(PyObject *self, PyObject *args)
367 {
371 {
368 long orig, start, end, len, outlen = 0, last = 0;
372 long orig, start, end, len, outlen = 0, last = 0;
369 int patchlen;
373 int patchlen;
370 char *bin, *binend;
374 char *bin, *binend;
371 char decode[12]; /* for dealing with alignment issues */
375 char decode[12]; /* for dealing with alignment issues */
372
376
373 if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen))
377 if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen))
374 return NULL;
378 return NULL;
375
379
376 binend = bin + patchlen;
380 binend = bin + patchlen;
377
381
378 while (bin < binend) {
382 while (bin < binend) {
379 memcpy(decode, bin, 12);
383 memcpy(decode, bin, 12);
380 start = ntohl(*(uint32_t *)decode);
384 start = ntohl(*(uint32_t *)decode);
381 end = ntohl(*(uint32_t *)(decode + 4));
385 end = ntohl(*(uint32_t *)(decode + 4));
382 len = ntohl(*(uint32_t *)(decode + 8));
386 len = ntohl(*(uint32_t *)(decode + 8));
383 bin += 12 + len;
387 bin += 12 + len;
384 outlen += start - last;
388 outlen += start - last;
385 last = end;
389 last = end;
386 outlen += len;
390 outlen += len;
387 }
391 }
388
392
389 if (bin != binend) {
393 if (bin != binend) {
390 if (!PyErr_Occurred())
394 if (!PyErr_Occurred())
391 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
395 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
392 return NULL;
396 return NULL;
393 }
397 }
394
398
395 outlen += orig - last;
399 outlen += orig - last;
396 return Py_BuildValue("l", outlen);
400 return Py_BuildValue("l", outlen);
397 }
401 }
398
402
399 static PyMethodDef methods[] = {
403 static PyMethodDef methods[] = {
400 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
404 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
401 {"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"},
405 {"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"},
402 {NULL, NULL}
406 {NULL, NULL}
403 };
407 };
404
408
405 PyMODINIT_FUNC
409 PyMODINIT_FUNC
406 initmpatch(void)
410 initmpatch(void)
407 {
411 {
408 Py_InitModule3("mpatch", methods, mpatch_doc);
412 Py_InitModule3("mpatch", methods, mpatch_doc);
409 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
413 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
410 }
414 }
411
415
General Comments 0
You need to be logged in to leave comments. Login now