##// END OF EJS Templates
do proper typecasting on malloc() and calloc() calls...
TK Soh -
r1978:10606ee6 default
parent child Browse files
Show More
@@ -1,360 +1,361
1 /*
1 /*
2 bdiff.c - efficient binary diff extension for Mercurial
2 bdiff.c - efficient binary diff extension for Mercurial
3
3
4 Copyright 2005 Matt Mackall <mpm@selenic.com>
4 Copyright 2005 Matt Mackall <mpm@selenic.com>
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8
8
9 Based roughly on Python difflib
9 Based roughly on Python difflib
10 */
10 */
11
11
12 #include <Python.h>
12 #include <Python.h>
13 #include <stdlib.h>
13 #include <stdlib.h>
14 #include <string.h>
14 #include <string.h>
15
15
16 #ifdef __hpux
16 #ifdef __hpux
17 #define inline
17 #define inline
18 #endif
18 #endif
19
19
20 #ifdef __SUNPRO_C
20 #ifdef __SUNPRO_C
21 # define inline
21 # define inline
22 #endif
22 #endif
23
23
24 #ifdef _WIN32
24 #ifdef _WIN32
25 #ifdef _MSC_VER
25 #ifdef _MSC_VER
26 #define inline __inline
26 #define inline __inline
27 typedef unsigned long uint32_t;
27 typedef unsigned long uint32_t;
28 #else
28 #else
29 #include <stdint.h>
29 #include <stdint.h>
30 #endif
30 #endif
31 static uint32_t htonl(uint32_t x)
31 static uint32_t htonl(uint32_t x)
32 {
32 {
33 return ((x & 0x000000ffUL) << 24) |
33 return ((x & 0x000000ffUL) << 24) |
34 ((x & 0x0000ff00UL) << 8) |
34 ((x & 0x0000ff00UL) << 8) |
35 ((x & 0x00ff0000UL) >> 8) |
35 ((x & 0x00ff0000UL) >> 8) |
36 ((x & 0xff000000UL) >> 24);
36 ((x & 0xff000000UL) >> 24);
37 }
37 }
38 #else
38 #else
39 #include <sys/types.h>
39 #include <sys/types.h>
40 #include <arpa/inet.h>
40 #include <arpa/inet.h>
41 #endif
41 #endif
42
42
43 struct line {
43 struct line {
44 int h, len, n, e;
44 int h, len, n, e;
45 const char *l;
45 const char *l;
46 };
46 };
47
47
48 struct pos {
48 struct pos {
49 int pos, len;
49 int pos, len;
50 };
50 };
51
51
52 struct hunk {
52 struct hunk {
53 int a1, a2, b1, b2;
53 int a1, a2, b1, b2;
54 };
54 };
55
55
56 struct hunklist {
56 struct hunklist {
57 struct hunk *base, *head;
57 struct hunk *base, *head;
58 };
58 };
59
59
60 static inline uint32_t rol32(uint32_t word, unsigned int shift)
60 static inline uint32_t rol32(uint32_t word, unsigned int shift)
61 {
61 {
62 return (word << shift) | (word >> (32 - shift));
62 return (word << shift) | (word >> (32 - shift));
63 }
63 }
64
64
65 int splitlines(const char *a, int len, struct line **lr)
65 int splitlines(const char *a, int len, struct line **lr)
66 {
66 {
67 int h, i;
67 int h, i;
68 const char *p, *b = a;
68 const char *p, *b = a;
69 struct line *l;
69 struct line *l;
70
70
71 /* count the lines */
71 /* count the lines */
72 i = 1; /* extra line for sentinel */
72 i = 1; /* extra line for sentinel */
73 for (p = a; p < a + len; p++)
73 for (p = a; p < a + len; p++)
74 if (*p == '\n' || p == a + len - 1)
74 if (*p == '\n' || p == a + len - 1)
75 i++;
75 i++;
76
76
77 *lr = l = malloc(sizeof(struct line) * i);
77 *lr = l = (struct line *)malloc(sizeof(struct line) * i);
78 if (!l)
78 if (!l)
79 return -1;
79 return -1;
80
80
81 /* build the line array and calculate hashes */
81 /* build the line array and calculate hashes */
82 h = 0;
82 h = 0;
83 for (p = a; p < a + len; p++) {
83 for (p = a; p < a + len; p++) {
84 h = *p + rol32(h, 7); /* a simple hash from GNU diff */
84 h = *p + rol32(h, 7); /* a simple hash from GNU diff */
85 if (*p == '\n' || p == a + len - 1) {
85 if (*p == '\n' || p == a + len - 1) {
86 l->len = p - b + 1;
86 l->len = p - b + 1;
87 l->h = h * l->len;
87 l->h = h * l->len;
88 l->l = b;
88 l->l = b;
89 l->n = -1;
89 l->n = -1;
90 l++;
90 l++;
91 b = p + 1;
91 b = p + 1;
92 h = 0;
92 h = 0;
93 }
93 }
94 }
94 }
95
95
96 /* set up a sentinel */
96 /* set up a sentinel */
97 l->h = l->len = 0;
97 l->h = l->len = 0;
98 l->l = a + len;
98 l->l = a + len;
99 return i - 1;
99 return i - 1;
100 }
100 }
101
101
102 int inline cmp(struct line *a, struct line *b)
102 int inline cmp(struct line *a, struct line *b)
103 {
103 {
104 return a->h != b->h || a->len != b->len || memcmp(a->l, b->l, a->len);
104 return a->h != b->h || a->len != b->len || memcmp(a->l, b->l, a->len);
105 }
105 }
106
106
107 static int equatelines(struct line *a, int an, struct line *b, int bn)
107 static int equatelines(struct line *a, int an, struct line *b, int bn)
108 {
108 {
109 int i, j, buckets = 1, t;
109 int i, j, buckets = 1, t;
110 struct pos *h;
110 struct pos *h;
111
111
112 /* build a hash table of the next highest power of 2 */
112 /* build a hash table of the next highest power of 2 */
113 while (buckets < bn + 1)
113 while (buckets < bn + 1)
114 buckets *= 2;
114 buckets *= 2;
115
115
116 h = malloc(buckets * sizeof(struct pos));
116 h = (struct pos *)malloc(buckets * sizeof(struct pos));
117 buckets = buckets - 1;
117 buckets = buckets - 1;
118 if (!h)
118 if (!h)
119 return 0;
119 return 0;
120
120
121 /* clear the hash table */
121 /* clear the hash table */
122 for (i = 0; i <= buckets; i++) {
122 for (i = 0; i <= buckets; i++) {
123 h[i].pos = -1;
123 h[i].pos = -1;
124 h[i].len = 0;
124 h[i].len = 0;
125 }
125 }
126
126
127 /* add lines to the hash table chains */
127 /* add lines to the hash table chains */
128 for (i = bn - 1; i >= 0; i--) {
128 for (i = bn - 1; i >= 0; i--) {
129 /* find the equivalence class */
129 /* find the equivalence class */
130 for (j = b[i].h & buckets; h[j].pos != -1;
130 for (j = b[i].h & buckets; h[j].pos != -1;
131 j = (j + 1) & buckets)
131 j = (j + 1) & buckets)
132 if (!cmp(b + i, b + h[j].pos))
132 if (!cmp(b + i, b + h[j].pos))
133 break;
133 break;
134
134
135 /* add to the head of the equivalence class */
135 /* add to the head of the equivalence class */
136 b[i].n = h[j].pos;
136 b[i].n = h[j].pos;
137 b[i].e = j;
137 b[i].e = j;
138 h[j].pos = i;
138 h[j].pos = i;
139 h[j].len++; /* keep track of popularity */
139 h[j].len++; /* keep track of popularity */
140 }
140 }
141
141
142 /* compute popularity threshold */
142 /* compute popularity threshold */
143 t = (bn >= 200) ? bn / 100 : bn + 1;
143 t = (bn >= 200) ? bn / 100 : bn + 1;
144
144
145 /* match items in a to their equivalence class in b */
145 /* match items in a to their equivalence class in b */
146 for (i = 0; i < an; i++) {
146 for (i = 0; i < an; i++) {
147 /* find the equivalence class */
147 /* find the equivalence class */
148 for (j = a[i].h & buckets; h[j].pos != -1;
148 for (j = a[i].h & buckets; h[j].pos != -1;
149 j = (j + 1) & buckets)
149 j = (j + 1) & buckets)
150 if (!cmp(a + i, b + h[j].pos))
150 if (!cmp(a + i, b + h[j].pos))
151 break;
151 break;
152
152
153 a[i].e = j; /* use equivalence class for quick compare */
153 a[i].e = j; /* use equivalence class for quick compare */
154 if (h[j].len <= t)
154 if (h[j].len <= t)
155 a[i].n = h[j].pos; /* point to head of match list */
155 a[i].n = h[j].pos; /* point to head of match list */
156 else
156 else
157 a[i].n = -1; /* too popular */
157 a[i].n = -1; /* too popular */
158 }
158 }
159
159
160 /* discard hash tables */
160 /* discard hash tables */
161 free(h);
161 free(h);
162 return 1;
162 return 1;
163 }
163 }
164
164
165 static int longest_match(struct line *a, struct line *b, struct pos *pos,
165 static int longest_match(struct line *a, struct line *b, struct pos *pos,
166 int a1, int a2, int b1, int b2, int *omi, int *omj)
166 int a1, int a2, int b1, int b2, int *omi, int *omj)
167 {
167 {
168 int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;
168 int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;
169
169
170 for (i = a1; i < a2; i++) {
170 for (i = a1; i < a2; i++) {
171 /* skip things before the current block */
171 /* skip things before the current block */
172 for (j = a[i].n; j != -1 && j < b1; j = b[j].n)
172 for (j = a[i].n; j != -1 && j < b1; j = b[j].n)
173 ;
173 ;
174
174
175 /* loop through all lines match a[i] in b */
175 /* loop through all lines match a[i] in b */
176 for (; j != -1 && j < b2; j = b[j].n) {
176 for (; j != -1 && j < b2; j = b[j].n) {
177 /* does this extend an earlier match? */
177 /* does this extend an earlier match? */
178 if (i > a1 && j > b1 && pos[j - 1].pos == i - 1)
178 if (i > a1 && j > b1 && pos[j - 1].pos == i - 1)
179 k = pos[j - 1].len + 1;
179 k = pos[j - 1].len + 1;
180 else
180 else
181 k = 1;
181 k = 1;
182 pos[j].pos = i;
182 pos[j].pos = i;
183 pos[j].len = k;
183 pos[j].len = k;
184
184
185 /* best match so far? */
185 /* best match so far? */
186 if (k > mk) {
186 if (k > mk) {
187 mi = i;
187 mi = i;
188 mj = j;
188 mj = j;
189 mk = k;
189 mk = k;
190 }
190 }
191 }
191 }
192 }
192 }
193
193
194 if (mk) {
194 if (mk) {
195 mi = mi - mk + 1;
195 mi = mi - mk + 1;
196 mj = mj - mk + 1;
196 mj = mj - mk + 1;
197 }
197 }
198
198
199 /* expand match to include neighboring popular lines */
199 /* expand match to include neighboring popular lines */
200 while (mi - mb > a1 && mj - mb > b1 &&
200 while (mi - mb > a1 && mj - mb > b1 &&
201 a[mi - mb - 1].e == b[mj - mb - 1].e)
201 a[mi - mb - 1].e == b[mj - mb - 1].e)
202 mb++;
202 mb++;
203 while (mi + mk < a2 && mj + mk < b2 &&
203 while (mi + mk < a2 && mj + mk < b2 &&
204 a[mi + mk].e == b[mj + mk].e)
204 a[mi + mk].e == b[mj + mk].e)
205 mk++;
205 mk++;
206
206
207 *omi = mi - mb;
207 *omi = mi - mb;
208 *omj = mj - mb;
208 *omj = mj - mb;
209 return mk + mb;
209 return mk + mb;
210 }
210 }
211
211
212 static void recurse(struct line *a, struct line *b, struct pos *pos,
212 static void recurse(struct line *a, struct line *b, struct pos *pos,
213 int a1, int a2, int b1, int b2, struct hunklist *l)
213 int a1, int a2, int b1, int b2, struct hunklist *l)
214 {
214 {
215 int i, j, k;
215 int i, j, k;
216
216
217 /* find the longest match in this chunk */
217 /* find the longest match in this chunk */
218 k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
218 k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
219 if (!k)
219 if (!k)
220 return;
220 return;
221
221
222 /* and recurse on the remaining chunks on either side */
222 /* and recurse on the remaining chunks on either side */
223 recurse(a, b, pos, a1, i, b1, j, l);
223 recurse(a, b, pos, a1, i, b1, j, l);
224 l->head->a1 = i;
224 l->head->a1 = i;
225 l->head->a2 = i + k;
225 l->head->a2 = i + k;
226 l->head->b1 = j;
226 l->head->b1 = j;
227 l->head->b2 = j + k;
227 l->head->b2 = j + k;
228 l->head++;
228 l->head++;
229 recurse(a, b, pos, i + k, a2, j + k, b2, l);
229 recurse(a, b, pos, i + k, a2, j + k, b2, l);
230 }
230 }
231
231
232 static struct hunklist diff(struct line *a, int an, struct line *b, int bn)
232 static struct hunklist diff(struct line *a, int an, struct line *b, int bn)
233 {
233 {
234 struct hunklist l;
234 struct hunklist l;
235 struct pos *pos;
235 struct pos *pos;
236 int t;
236 int t;
237
237
238 /* allocate and fill arrays */
238 /* allocate and fill arrays */
239 t = equatelines(a, an, b, bn);
239 t = equatelines(a, an, b, bn);
240 pos = calloc(bn, sizeof(struct pos));
240 pos = (struct pos *)calloc(bn, sizeof(struct pos));
241 /* we can't have more matches than lines in the shorter file */
241 /* we can't have more matches than lines in the shorter file */
242 l.head = l.base = malloc(sizeof(struct hunk) * ((an<bn ? an:bn) + 1));
242 l.head = l.base = (struct hunk *)malloc(sizeof(struct hunk) *
243 ((an<bn ? an:bn) + 1));
243
244
244 if (pos && l.base && t) {
245 if (pos && l.base && t) {
245 /* generate the matching block list */
246 /* generate the matching block list */
246 recurse(a, b, pos, 0, an, 0, bn, &l);
247 recurse(a, b, pos, 0, an, 0, bn, &l);
247 l.head->a1 = an;
248 l.head->a1 = an;
248 l.head->b1 = bn;
249 l.head->b1 = bn;
249 l.head++;
250 l.head++;
250 }
251 }
251
252
252 free(pos);
253 free(pos);
253 return l;
254 return l;
254 }
255 }
255
256
256 static PyObject *blocks(PyObject *self, PyObject *args)
257 static PyObject *blocks(PyObject *self, PyObject *args)
257 {
258 {
258 PyObject *sa, *sb, *rl = NULL, *m;
259 PyObject *sa, *sb, *rl = NULL, *m;
259 struct line *a, *b;
260 struct line *a, *b;
260 struct hunklist l = {NULL, NULL};
261 struct hunklist l = {NULL, NULL};
261 struct hunk *h;
262 struct hunk *h;
262 int an, bn, pos = 0;
263 int an, bn, pos = 0;
263
264
264 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
265 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
265 return NULL;
266 return NULL;
266
267
267 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &a);
268 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &a);
268 bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &b);
269 bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &b);
269 if (!a || !b)
270 if (!a || !b)
270 goto nomem;
271 goto nomem;
271
272
272 l = diff(a, an, b, bn);
273 l = diff(a, an, b, bn);
273 rl = PyList_New(l.head - l.base);
274 rl = PyList_New(l.head - l.base);
274 if (!l.head || !rl)
275 if (!l.head || !rl)
275 goto nomem;
276 goto nomem;
276
277
277 for (h = l.base; h != l.head; h++) {
278 for (h = l.base; h != l.head; h++) {
278 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
279 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
279 PyList_SetItem(rl, pos, m);
280 PyList_SetItem(rl, pos, m);
280 pos++;
281 pos++;
281 }
282 }
282
283
283 nomem:
284 nomem:
284 free(a);
285 free(a);
285 free(b);
286 free(b);
286 free(l.base);
287 free(l.base);
287 return rl ? rl : PyErr_NoMemory();
288 return rl ? rl : PyErr_NoMemory();
288 }
289 }
289
290
290 static PyObject *bdiff(PyObject *self, PyObject *args)
291 static PyObject *bdiff(PyObject *self, PyObject *args)
291 {
292 {
292 PyObject *sa, *sb, *result = NULL;
293 PyObject *sa, *sb, *result = NULL;
293 struct line *al, *bl;
294 struct line *al, *bl;
294 struct hunklist l = {NULL, NULL};
295 struct hunklist l = {NULL, NULL};
295 struct hunk *h;
296 struct hunk *h;
296 char encode[12], *rb;
297 char encode[12], *rb;
297 int an, bn, len = 0, la = 0, lb = 0;
298 int an, bn, len = 0, la = 0, lb = 0;
298
299
299 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
300 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
300 return NULL;
301 return NULL;
301
302
302 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &al);
303 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &al);
303 bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &bl);
304 bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &bl);
304 if (!al || !bl)
305 if (!al || !bl)
305 goto nomem;
306 goto nomem;
306
307
307 l = diff(al, an, bl, bn);
308 l = diff(al, an, bl, bn);
308 if (!l.head)
309 if (!l.head)
309 goto nomem;
310 goto nomem;
310
311
311 /* calculate length of output */
312 /* calculate length of output */
312 for (h = l.base; h != l.head; h++) {
313 for (h = l.base; h != l.head; h++) {
313 if (h->a1 != la || h->b1 != lb)
314 if (h->a1 != la || h->b1 != lb)
314 len += 12 + bl[h->b1].l - bl[lb].l;
315 len += 12 + bl[h->b1].l - bl[lb].l;
315 la = h->a2;
316 la = h->a2;
316 lb = h->b2;
317 lb = h->b2;
317 }
318 }
318
319
319 result = PyString_FromStringAndSize(NULL, len);
320 result = PyString_FromStringAndSize(NULL, len);
320 if (!result)
321 if (!result)
321 goto nomem;
322 goto nomem;
322
323
323 /* build binary patch */
324 /* build binary patch */
324 rb = PyString_AsString(result);
325 rb = PyString_AsString(result);
325 la = lb = 0;
326 la = lb = 0;
326
327
327 for (h = l.base; h != l.head; h++) {
328 for (h = l.base; h != l.head; h++) {
328 if (h->a1 != la || h->b1 != lb) {
329 if (h->a1 != la || h->b1 != lb) {
329 len = bl[h->b1].l - bl[lb].l;
330 len = bl[h->b1].l - bl[lb].l;
330 *(uint32_t *)(encode) = htonl(al[la].l - al->l);
331 *(uint32_t *)(encode) = htonl(al[la].l - al->l);
331 *(uint32_t *)(encode + 4) = htonl(al[h->a1].l - al->l);
332 *(uint32_t *)(encode + 4) = htonl(al[h->a1].l - al->l);
332 *(uint32_t *)(encode + 8) = htonl(len);
333 *(uint32_t *)(encode + 8) = htonl(len);
333 memcpy(rb, encode, 12);
334 memcpy(rb, encode, 12);
334 memcpy(rb + 12, bl[lb].l, len);
335 memcpy(rb + 12, bl[lb].l, len);
335 rb += 12 + len;
336 rb += 12 + len;
336 }
337 }
337 la = h->a2;
338 la = h->a2;
338 lb = h->b2;
339 lb = h->b2;
339 }
340 }
340
341
341 nomem:
342 nomem:
342 free(al);
343 free(al);
343 free(bl);
344 free(bl);
344 free(l.base);
345 free(l.base);
345 return result ? result : PyErr_NoMemory();
346 return result ? result : PyErr_NoMemory();
346 }
347 }
347
348
348 static char mdiff_doc[] = "Efficient binary diff.";
349 static char mdiff_doc[] = "Efficient binary diff.";
349
350
350 static PyMethodDef methods[] = {
351 static PyMethodDef methods[] = {
351 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
352 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
352 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
353 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
353 {NULL, NULL}
354 {NULL, NULL}
354 };
355 };
355
356
356 PyMODINIT_FUNC initbdiff(void)
357 PyMODINIT_FUNC initbdiff(void)
357 {
358 {
358 Py_InitModule3("bdiff", methods, mdiff_doc);
359 Py_InitModule3("bdiff", methods, mdiff_doc);
359 }
360 }
360
361
@@ -1,368 +1,368
1 /*
1 /*
2 mpatch.c - efficient binary patching for Mercurial
2 mpatch.c - efficient binary patching for Mercurial
3
3
4 This implements a patch algorithm that's O(m + nlog n) where m is the
4 This implements a patch algorithm that's O(m + nlog n) where m is the
5 size of the output and n is the number of patches.
5 size of the output and n is the number of patches.
6
6
7 Given a list of binary patches, it unpacks each into a hunk list,
7 Given a list of binary patches, it unpacks each into a hunk list,
8 then combines the hunk lists with a treewise recursion to form a
8 then combines the hunk lists with a treewise recursion to form a
9 single hunk list. This hunk list is then applied to the original
9 single hunk list. This hunk list is then applied to the original
10 text.
10 text.
11
11
12 The text (or binary) fragments are copied directly from their source
12 The text (or binary) fragments are copied directly from their source
13 Python objects into a preallocated output string to avoid the
13 Python objects into a preallocated output string to avoid the
14 allocation of intermediate Python objects. Working memory is about 2x
14 allocation of intermediate Python objects. Working memory is about 2x
15 the total number of hunks.
15 the total number of hunks.
16
16
17 Copyright 2005 Matt Mackall <mpm@selenic.com>
17 Copyright 2005 Matt Mackall <mpm@selenic.com>
18
18
19 This software may be used and distributed according to the terms
19 This software may be used and distributed according to the terms
20 of the GNU General Public License, incorporated herein by reference.
20 of the GNU General Public License, incorporated herein by reference.
21 */
21 */
22
22
23 #include <Python.h>
23 #include <Python.h>
24 #include <stdlib.h>
24 #include <stdlib.h>
25 #include <string.h>
25 #include <string.h>
26 #ifdef _WIN32
26 #ifdef _WIN32
27 #ifdef _MSC_VER
27 #ifdef _MSC_VER
28 #define inline __inline
28 #define inline __inline
29 typedef unsigned long uint32_t;
29 typedef unsigned long uint32_t;
30 #else
30 #else
31 #include <stdint.h>
31 #include <stdint.h>
32 #endif
32 #endif
33 static uint32_t ntohl(uint32_t x)
33 static uint32_t ntohl(uint32_t x)
34 {
34 {
35 return ((x & 0x000000ffUL) << 24) |
35 return ((x & 0x000000ffUL) << 24) |
36 ((x & 0x0000ff00UL) << 8) |
36 ((x & 0x0000ff00UL) << 8) |
37 ((x & 0x00ff0000UL) >> 8) |
37 ((x & 0x00ff0000UL) >> 8) |
38 ((x & 0xff000000UL) >> 24);
38 ((x & 0xff000000UL) >> 24);
39 }
39 }
40 #else
40 #else
41 #include <sys/types.h>
41 #include <sys/types.h>
42 #include <arpa/inet.h>
42 #include <arpa/inet.h>
43 #endif
43 #endif
44
44
45 static char mpatch_doc[] = "Efficient binary patching.";
45 static char mpatch_doc[] = "Efficient binary patching.";
46 static PyObject *mpatch_Error;
46 static PyObject *mpatch_Error;
47
47
48 struct frag {
48 struct frag {
49 int start, end, len;
49 int start, end, len;
50 char *data;
50 char *data;
51 };
51 };
52
52
53 struct flist {
53 struct flist {
54 struct frag *base, *head, *tail;
54 struct frag *base, *head, *tail;
55 };
55 };
56
56
57 static struct flist *lalloc(int size)
57 static struct flist *lalloc(int size)
58 {
58 {
59 struct flist *a = NULL;
59 struct flist *a = NULL;
60
60
61 a = malloc(sizeof(struct flist));
61 a = (struct flist *)malloc(sizeof(struct flist));
62 if (a) {
62 if (a) {
63 a->base = malloc(sizeof(struct frag) * size);
63 a->base = (struct frag *)malloc(sizeof(struct frag) * size);
64 if (!a->base) {
64 if (!a->base) {
65 free(a);
65 free(a);
66 a = NULL;
66 a = NULL;
67 } else
67 } else
68 a->head = a->tail = a->base;
68 a->head = a->tail = a->base;
69 return a;
69 return a;
70 }
70 }
71 if (!PyErr_Occurred())
71 if (!PyErr_Occurred())
72 PyErr_NoMemory();
72 PyErr_NoMemory();
73 return NULL;
73 return NULL;
74 }
74 }
75
75
76 static void lfree(struct flist *a)
76 static void lfree(struct flist *a)
77 {
77 {
78 if (a) {
78 if (a) {
79 free(a->base);
79 free(a->base);
80 free(a);
80 free(a);
81 }
81 }
82 }
82 }
83
83
84 static int lsize(struct flist *a)
84 static int lsize(struct flist *a)
85 {
85 {
86 return a->tail - a->head;
86 return a->tail - a->head;
87 }
87 }
88
88
89 /* move hunks in source that are less cut to dest, compensating
89 /* move hunks in source that are less cut to dest, compensating
90 for changes in offset. the last hunk may be split if necessary.
90 for changes in offset. the last hunk may be split if necessary.
91 */
91 */
92 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
92 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
93 {
93 {
94 struct frag *d = dest->tail, *s = src->head;
94 struct frag *d = dest->tail, *s = src->head;
95 int postend, c, l;
95 int postend, c, l;
96
96
97 while (s != src->tail) {
97 while (s != src->tail) {
98 if (s->start + offset >= cut)
98 if (s->start + offset >= cut)
99 break; /* we've gone far enough */
99 break; /* we've gone far enough */
100
100
101 postend = offset + s->start + s->len;
101 postend = offset + s->start + s->len;
102 if (postend <= cut) {
102 if (postend <= cut) {
103 /* save this hunk */
103 /* save this hunk */
104 offset += s->start + s->len - s->end;
104 offset += s->start + s->len - s->end;
105 *d++ = *s++;
105 *d++ = *s++;
106 }
106 }
107 else {
107 else {
108 /* break up this hunk */
108 /* break up this hunk */
109 c = cut - offset;
109 c = cut - offset;
110 if (s->end < c)
110 if (s->end < c)
111 c = s->end;
111 c = s->end;
112 l = cut - offset - s->start;
112 l = cut - offset - s->start;
113 if (s->len < l)
113 if (s->len < l)
114 l = s->len;
114 l = s->len;
115
115
116 offset += s->start + l - c;
116 offset += s->start + l - c;
117
117
118 d->start = s->start;
118 d->start = s->start;
119 d->end = c;
119 d->end = c;
120 d->len = l;
120 d->len = l;
121 d->data = s->data;
121 d->data = s->data;
122 d++;
122 d++;
123 s->start = c;
123 s->start = c;
124 s->len = s->len - l;
124 s->len = s->len - l;
125 s->data = s->data + l;
125 s->data = s->data + l;
126
126
127 break;
127 break;
128 }
128 }
129 }
129 }
130
130
131 dest->tail = d;
131 dest->tail = d;
132 src->head = s;
132 src->head = s;
133 return offset;
133 return offset;
134 }
134 }
135
135
136 /* like gather, but with no output list */
136 /* like gather, but with no output list */
137 static int discard(struct flist *src, int cut, int offset)
137 static int discard(struct flist *src, int cut, int offset)
138 {
138 {
139 struct frag *s = src->head;
139 struct frag *s = src->head;
140 int postend, c, l;
140 int postend, c, l;
141
141
142 while (s != src->tail) {
142 while (s != src->tail) {
143 if (s->start + offset >= cut)
143 if (s->start + offset >= cut)
144 break;
144 break;
145
145
146 postend = offset + s->start + s->len;
146 postend = offset + s->start + s->len;
147 if (postend <= cut) {
147 if (postend <= cut) {
148 offset += s->start + s->len - s->end;
148 offset += s->start + s->len - s->end;
149 s++;
149 s++;
150 }
150 }
151 else {
151 else {
152 c = cut - offset;
152 c = cut - offset;
153 if (s->end < c)
153 if (s->end < c)
154 c = s->end;
154 c = s->end;
155 l = cut - offset - s->start;
155 l = cut - offset - s->start;
156 if (s->len < l)
156 if (s->len < l)
157 l = s->len;
157 l = s->len;
158
158
159 offset += s->start + l - c;
159 offset += s->start + l - c;
160 s->start = c;
160 s->start = c;
161 s->len = s->len - l;
161 s->len = s->len - l;
162 s->data = s->data + l;
162 s->data = s->data + l;
163
163
164 break;
164 break;
165 }
165 }
166 }
166 }
167
167
168 src->head = s;
168 src->head = s;
169 return offset;
169 return offset;
170 }
170 }
171
171
172 /* combine hunk lists a and b, while adjusting b for offset changes in a/
172 /* combine hunk lists a and b, while adjusting b for offset changes in a/
173 this deletes a and b and returns the resultant list. */
173 this deletes a and b and returns the resultant list. */
174 static struct flist *combine(struct flist *a, struct flist *b)
174 static struct flist *combine(struct flist *a, struct flist *b)
175 {
175 {
176 struct flist *c = NULL;
176 struct flist *c = NULL;
177 struct frag *bh, *ct;
177 struct frag *bh, *ct;
178 int offset = 0, post;
178 int offset = 0, post;
179
179
180 if (a && b)
180 if (a && b)
181 c = lalloc((lsize(a) + lsize(b)) * 2);
181 c = lalloc((lsize(a) + lsize(b)) * 2);
182
182
183 if (c) {
183 if (c) {
184
184
185 for (bh = b->head; bh != b->tail; bh++) {
185 for (bh = b->head; bh != b->tail; bh++) {
186 /* save old hunks */
186 /* save old hunks */
187 offset = gather(c, a, bh->start, offset);
187 offset = gather(c, a, bh->start, offset);
188
188
189 /* discard replaced hunks */
189 /* discard replaced hunks */
190 post = discard(a, bh->end, offset);
190 post = discard(a, bh->end, offset);
191
191
192 /* insert new hunk */
192 /* insert new hunk */
193 ct = c->tail;
193 ct = c->tail;
194 ct->start = bh->start - offset;
194 ct->start = bh->start - offset;
195 ct->end = bh->end - post;
195 ct->end = bh->end - post;
196 ct->len = bh->len;
196 ct->len = bh->len;
197 ct->data = bh->data;
197 ct->data = bh->data;
198 c->tail++;
198 c->tail++;
199 offset = post;
199 offset = post;
200 }
200 }
201
201
202 /* hold on to tail from a */
202 /* hold on to tail from a */
203 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
203 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
204 c->tail += lsize(a);
204 c->tail += lsize(a);
205 }
205 }
206
206
207 lfree(a);
207 lfree(a);
208 lfree(b);
208 lfree(b);
209 return c;
209 return c;
210 }
210 }
211
211
212 /* decode a binary patch into a hunk list */
212 /* decode a binary patch into a hunk list */
213 static struct flist *decode(char *bin, int len)
213 static struct flist *decode(char *bin, int len)
214 {
214 {
215 struct flist *l;
215 struct flist *l;
216 struct frag *lt;
216 struct frag *lt;
217 char *end = bin + len;
217 char *end = bin + len;
218 char decode[12]; /* for dealing with alignment issues */
218 char decode[12]; /* for dealing with alignment issues */
219
219
220 /* assume worst case size, we won't have many of these lists */
220 /* assume worst case size, we won't have many of these lists */
221 l = lalloc(len / 12);
221 l = lalloc(len / 12);
222 if (!l)
222 if (!l)
223 return NULL;
223 return NULL;
224
224
225 lt = l->tail;
225 lt = l->tail;
226
226
227 while (bin < end) {
227 while (bin < end) {
228 memcpy(decode, bin, 12);
228 memcpy(decode, bin, 12);
229 lt->start = ntohl(*(uint32_t *)decode);
229 lt->start = ntohl(*(uint32_t *)decode);
230 lt->end = ntohl(*(uint32_t *)(decode + 4));
230 lt->end = ntohl(*(uint32_t *)(decode + 4));
231 lt->len = ntohl(*(uint32_t *)(decode + 8));
231 lt->len = ntohl(*(uint32_t *)(decode + 8));
232 lt->data = bin + 12;
232 lt->data = bin + 12;
233 bin += 12 + lt->len;
233 bin += 12 + lt->len;
234 lt++;
234 lt++;
235 }
235 }
236
236
237 if (bin != end) {
237 if (bin != end) {
238 if (!PyErr_Occurred())
238 if (!PyErr_Occurred())
239 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
239 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
240 lfree(l);
240 lfree(l);
241 return NULL;
241 return NULL;
242 }
242 }
243
243
244 l->tail = lt;
244 l->tail = lt;
245 return l;
245 return l;
246 }
246 }
247
247
248 /* calculate the size of resultant text */
248 /* calculate the size of resultant text */
249 static int calcsize(int len, struct flist *l)
249 static int calcsize(int len, struct flist *l)
250 {
250 {
251 int outlen = 0, last = 0;
251 int outlen = 0, last = 0;
252 struct frag *f = l->head;
252 struct frag *f = l->head;
253
253
254 while (f != l->tail) {
254 while (f != l->tail) {
255 if (f->start < last || f->end > len) {
255 if (f->start < last || f->end > len) {
256 if (!PyErr_Occurred())
256 if (!PyErr_Occurred())
257 PyErr_SetString(mpatch_Error,
257 PyErr_SetString(mpatch_Error,
258 "invalid patch");
258 "invalid patch");
259 return -1;
259 return -1;
260 }
260 }
261 outlen += f->start - last;
261 outlen += f->start - last;
262 last = f->end;
262 last = f->end;
263 outlen += f->len;
263 outlen += f->len;
264 f++;
264 f++;
265 }
265 }
266
266
267 outlen += len - last;
267 outlen += len - last;
268 return outlen;
268 return outlen;
269 }
269 }
270
270
271 static int apply(char *buf, char *orig, int len, struct flist *l)
271 static int apply(char *buf, char *orig, int len, struct flist *l)
272 {
272 {
273 struct frag *f = l->head;
273 struct frag *f = l->head;
274 int last = 0;
274 int last = 0;
275 char *p = buf;
275 char *p = buf;
276
276
277 while (f != l->tail) {
277 while (f != l->tail) {
278 if (f->start < last || f->end > len) {
278 if (f->start < last || f->end > len) {
279 if (!PyErr_Occurred())
279 if (!PyErr_Occurred())
280 PyErr_SetString(mpatch_Error,
280 PyErr_SetString(mpatch_Error,
281 "invalid patch");
281 "invalid patch");
282 return 0;
282 return 0;
283 }
283 }
284 memcpy(p, orig + last, f->start - last);
284 memcpy(p, orig + last, f->start - last);
285 p += f->start - last;
285 p += f->start - last;
286 memcpy(p, f->data, f->len);
286 memcpy(p, f->data, f->len);
287 last = f->end;
287 last = f->end;
288 p += f->len;
288 p += f->len;
289 f++;
289 f++;
290 }
290 }
291 memcpy(p, orig + last, len - last);
291 memcpy(p, orig + last, len - last);
292 return 1;
292 return 1;
293 }
293 }
294
294
295 /* recursively generate a patch of all bins between start and end */
295 /* recursively generate a patch of all bins between start and end */
296 static struct flist *fold(PyObject *bins, int start, int end)
296 static struct flist *fold(PyObject *bins, int start, int end)
297 {
297 {
298 int len;
298 int len;
299
299
300 if (start + 1 == end) {
300 if (start + 1 == end) {
301 /* trivial case, output a decoded list */
301 /* trivial case, output a decoded list */
302 PyObject *tmp = PyList_GetItem(bins, start);
302 PyObject *tmp = PyList_GetItem(bins, start);
303 if (!tmp)
303 if (!tmp)
304 return NULL;
304 return NULL;
305 return decode(PyString_AsString(tmp), PyString_Size(tmp));
305 return decode(PyString_AsString(tmp), PyString_Size(tmp));
306 }
306 }
307
307
308 /* divide and conquer, memory management is elsewhere */
308 /* divide and conquer, memory management is elsewhere */
309 len = (end - start) / 2;
309 len = (end - start) / 2;
310 return combine(fold(bins, start, start + len),
310 return combine(fold(bins, start, start + len),
311 fold(bins, start + len, end));
311 fold(bins, start + len, end));
312 }
312 }
313
313
314 static PyObject *
314 static PyObject *
315 patches(PyObject *self, PyObject *args)
315 patches(PyObject *self, PyObject *args)
316 {
316 {
317 PyObject *text, *bins, *result;
317 PyObject *text, *bins, *result;
318 struct flist *patch;
318 struct flist *patch;
319 char *in, *out;
319 char *in, *out;
320 int len, outlen;
320 int len, outlen;
321
321
322 if (!PyArg_ParseTuple(args, "SO:mpatch", &text, &bins))
322 if (!PyArg_ParseTuple(args, "SO:mpatch", &text, &bins))
323 return NULL;
323 return NULL;
324
324
325 len = PyList_Size(bins);
325 len = PyList_Size(bins);
326 if (!len) {
326 if (!len) {
327 /* nothing to do */
327 /* nothing to do */
328 Py_INCREF(text);
328 Py_INCREF(text);
329 return text;
329 return text;
330 }
330 }
331
331
332 patch = fold(bins, 0, len);
332 patch = fold(bins, 0, len);
333 if (!patch)
333 if (!patch)
334 return NULL;
334 return NULL;
335
335
336 outlen = calcsize(PyString_Size(text), patch);
336 outlen = calcsize(PyString_Size(text), patch);
337 if (outlen < 0) {
337 if (outlen < 0) {
338 result = NULL;
338 result = NULL;
339 goto cleanup;
339 goto cleanup;
340 }
340 }
341 result = PyString_FromStringAndSize(NULL, outlen);
341 result = PyString_FromStringAndSize(NULL, outlen);
342 if (!result) {
342 if (!result) {
343 result = NULL;
343 result = NULL;
344 goto cleanup;
344 goto cleanup;
345 }
345 }
346 in = PyString_AsString(text);
346 in = PyString_AsString(text);
347 out = PyString_AsString(result);
347 out = PyString_AsString(result);
348 if (!apply(out, in, PyString_Size(text), patch)) {
348 if (!apply(out, in, PyString_Size(text), patch)) {
349 Py_DECREF(result);
349 Py_DECREF(result);
350 result = NULL;
350 result = NULL;
351 }
351 }
352 cleanup:
352 cleanup:
353 lfree(patch);
353 lfree(patch);
354 return result;
354 return result;
355 }
355 }
356
356
357 static PyMethodDef methods[] = {
357 static PyMethodDef methods[] = {
358 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
358 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
359 {NULL, NULL}
359 {NULL, NULL}
360 };
360 };
361
361
362 PyMODINIT_FUNC
362 PyMODINIT_FUNC
363 initmpatch(void)
363 initmpatch(void)
364 {
364 {
365 Py_InitModule3("mpatch", methods, mpatch_doc);
365 Py_InitModule3("mpatch", methods, mpatch_doc);
366 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
366 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
367 }
367 }
368
368
General Comments 0
You need to be logged in to leave comments. Login now