##// END OF EJS Templates
Fix segfaults when parsing bdiff hunks in mpatch.decode() and .patchedsize()...
Thomas Arendsen Hein -
r4358:11dc22eb default
parent child Browse files
Show More
@@ -1,411 +1,422 b''
1 /*
1 /*
2 mpatch.c - efficient binary patching for Mercurial
2 mpatch.c - efficient binary patching for Mercurial
3
3
4 This implements a patch algorithm that's O(m + nlog n) where m is the
4 This implements a patch algorithm that's O(m + nlog n) where m is the
5 size of the output and n is the number of patches.
5 size of the output and n is the number of patches.
6
6
7 Given a list of binary patches, it unpacks each into a hunk list,
7 Given a list of binary patches, it unpacks each into a hunk list,
8 then combines the hunk lists with a treewise recursion to form a
8 then combines the hunk lists with a treewise recursion to form a
9 single hunk list. This hunk list is then applied to the original
9 single hunk list. This hunk list is then applied to the original
10 text.
10 text.
11
11
12 The text (or binary) fragments are copied directly from their source
12 The text (or binary) fragments are copied directly from their source
13 Python objects into a preallocated output string to avoid the
13 Python objects into a preallocated output string to avoid the
14 allocation of intermediate Python objects. Working memory is about 2x
14 allocation of intermediate Python objects. Working memory is about 2x
15 the total number of hunks.
15 the total number of hunks.
16
16
17 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
17 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
18
18
19 This software may be used and distributed according to the terms
19 This software may be used and distributed according to the terms
20 of the GNU General Public License, incorporated herein by reference.
20 of the GNU General Public License, incorporated herein by reference.
21 */
21 */
22
22
23 #include <Python.h>
23 #include <Python.h>
24 #include <stdlib.h>
24 #include <stdlib.h>
25 #include <string.h>
25 #include <string.h>
26
26
27 #ifdef _WIN32
27 #ifdef _WIN32
28 # ifdef _MSC_VER
28 # ifdef _MSC_VER
29 /* msvc 6.0 has problems */
29 /* msvc 6.0 has problems */
30 # define inline __inline
30 # define inline __inline
31 typedef unsigned long uint32_t;
31 typedef unsigned long uint32_t;
32 # else
32 # else
33 # include <stdint.h>
33 # include <stdint.h>
34 # endif
34 # endif
35 static uint32_t ntohl(uint32_t x)
35 static uint32_t ntohl(uint32_t x)
36 {
36 {
37 return ((x & 0x000000ffUL) << 24) |
37 return ((x & 0x000000ffUL) << 24) |
38 ((x & 0x0000ff00UL) << 8) |
38 ((x & 0x0000ff00UL) << 8) |
39 ((x & 0x00ff0000UL) >> 8) |
39 ((x & 0x00ff0000UL) >> 8) |
40 ((x & 0xff000000UL) >> 24);
40 ((x & 0xff000000UL) >> 24);
41 }
41 }
42 #else
42 #else
43 /* not windows */
43 /* not windows */
44 # include <sys/types.h>
44 # include <sys/types.h>
45 # include <arpa/inet.h>
45 # include <arpa/inet.h>
46 # include <inttypes.h>
46 # include <inttypes.h>
47 #endif
47 #endif
48
48
49 static char mpatch_doc[] = "Efficient binary patching.";
49 static char mpatch_doc[] = "Efficient binary patching.";
50 static PyObject *mpatch_Error;
50 static PyObject *mpatch_Error;
51
51
52 struct frag {
52 struct frag {
53 int start, end, len;
53 int start, end, len;
54 char *data;
54 char *data;
55 };
55 };
56
56
57 struct flist {
57 struct flist {
58 struct frag *base, *head, *tail;
58 struct frag *base, *head, *tail;
59 };
59 };
60
60
61 static struct flist *lalloc(int size)
61 static struct flist *lalloc(int size)
62 {
62 {
63 struct flist *a = NULL;
63 struct flist *a = NULL;
64
64
65 if (size < 1)
65 if (size < 1)
66 size = 1;
66 size = 1;
67
67
68 a = (struct flist *)malloc(sizeof(struct flist));
68 a = (struct flist *)malloc(sizeof(struct flist));
69 if (a) {
69 if (a) {
70 a->base = (struct frag *)malloc(sizeof(struct frag) * size);
70 a->base = (struct frag *)malloc(sizeof(struct frag) * size);
71 if (a->base) {
71 if (a->base) {
72 a->head = a->tail = a->base;
72 a->head = a->tail = a->base;
73 return a;
73 return a;
74 }
74 }
75 free(a);
75 free(a);
76 a = NULL;
76 a = NULL;
77 }
77 }
78 if (!PyErr_Occurred())
78 if (!PyErr_Occurred())
79 PyErr_NoMemory();
79 PyErr_NoMemory();
80 return NULL;
80 return NULL;
81 }
81 }
82
82
83 static void lfree(struct flist *a)
83 static void lfree(struct flist *a)
84 {
84 {
85 if (a) {
85 if (a) {
86 free(a->base);
86 free(a->base);
87 free(a);
87 free(a);
88 }
88 }
89 }
89 }
90
90
91 static int lsize(struct flist *a)
91 static int lsize(struct flist *a)
92 {
92 {
93 return a->tail - a->head;
93 return a->tail - a->head;
94 }
94 }
95
95
96 /* move hunks in source that are less cut to dest, compensating
96 /* move hunks in source that are less cut to dest, compensating
97 for changes in offset. the last hunk may be split if necessary.
97 for changes in offset. the last hunk may be split if necessary.
98 */
98 */
99 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
99 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
100 {
100 {
101 struct frag *d = dest->tail, *s = src->head;
101 struct frag *d = dest->tail, *s = src->head;
102 int postend, c, l;
102 int postend, c, l;
103
103
104 while (s != src->tail) {
104 while (s != src->tail) {
105 if (s->start + offset >= cut)
105 if (s->start + offset >= cut)
106 break; /* we've gone far enough */
106 break; /* we've gone far enough */
107
107
108 postend = offset + s->start + s->len;
108 postend = offset + s->start + s->len;
109 if (postend <= cut) {
109 if (postend <= cut) {
110 /* save this hunk */
110 /* save this hunk */
111 offset += s->start + s->len - s->end;
111 offset += s->start + s->len - s->end;
112 *d++ = *s++;
112 *d++ = *s++;
113 }
113 }
114 else {
114 else {
115 /* break up this hunk */
115 /* break up this hunk */
116 c = cut - offset;
116 c = cut - offset;
117 if (s->end < c)
117 if (s->end < c)
118 c = s->end;
118 c = s->end;
119 l = cut - offset - s->start;
119 l = cut - offset - s->start;
120 if (s->len < l)
120 if (s->len < l)
121 l = s->len;
121 l = s->len;
122
122
123 offset += s->start + l - c;
123 offset += s->start + l - c;
124
124
125 d->start = s->start;
125 d->start = s->start;
126 d->end = c;
126 d->end = c;
127 d->len = l;
127 d->len = l;
128 d->data = s->data;
128 d->data = s->data;
129 d++;
129 d++;
130 s->start = c;
130 s->start = c;
131 s->len = s->len - l;
131 s->len = s->len - l;
132 s->data = s->data + l;
132 s->data = s->data + l;
133
133
134 break;
134 break;
135 }
135 }
136 }
136 }
137
137
138 dest->tail = d;
138 dest->tail = d;
139 src->head = s;
139 src->head = s;
140 return offset;
140 return offset;
141 }
141 }
142
142
143 /* like gather, but with no output list */
143 /* like gather, but with no output list */
144 static int discard(struct flist *src, int cut, int offset)
144 static int discard(struct flist *src, int cut, int offset)
145 {
145 {
146 struct frag *s = src->head;
146 struct frag *s = src->head;
147 int postend, c, l;
147 int postend, c, l;
148
148
149 while (s != src->tail) {
149 while (s != src->tail) {
150 if (s->start + offset >= cut)
150 if (s->start + offset >= cut)
151 break;
151 break;
152
152
153 postend = offset + s->start + s->len;
153 postend = offset + s->start + s->len;
154 if (postend <= cut) {
154 if (postend <= cut) {
155 offset += s->start + s->len - s->end;
155 offset += s->start + s->len - s->end;
156 s++;
156 s++;
157 }
157 }
158 else {
158 else {
159 c = cut - offset;
159 c = cut - offset;
160 if (s->end < c)
160 if (s->end < c)
161 c = s->end;
161 c = s->end;
162 l = cut - offset - s->start;
162 l = cut - offset - s->start;
163 if (s->len < l)
163 if (s->len < l)
164 l = s->len;
164 l = s->len;
165
165
166 offset += s->start + l - c;
166 offset += s->start + l - c;
167 s->start = c;
167 s->start = c;
168 s->len = s->len - l;
168 s->len = s->len - l;
169 s->data = s->data + l;
169 s->data = s->data + l;
170
170
171 break;
171 break;
172 }
172 }
173 }
173 }
174
174
175 src->head = s;
175 src->head = s;
176 return offset;
176 return offset;
177 }
177 }
178
178
179 /* combine hunk lists a and b, while adjusting b for offset changes in a/
179 /* combine hunk lists a and b, while adjusting b for offset changes in a/
180 this deletes a and b and returns the resultant list. */
180 this deletes a and b and returns the resultant list. */
181 static struct flist *combine(struct flist *a, struct flist *b)
181 static struct flist *combine(struct flist *a, struct flist *b)
182 {
182 {
183 struct flist *c = NULL;
183 struct flist *c = NULL;
184 struct frag *bh, *ct;
184 struct frag *bh, *ct;
185 int offset = 0, post;
185 int offset = 0, post;
186
186
187 if (a && b)
187 if (a && b)
188 c = lalloc((lsize(a) + lsize(b)) * 2);
188 c = lalloc((lsize(a) + lsize(b)) * 2);
189
189
190 if (c) {
190 if (c) {
191
191
192 for (bh = b->head; bh != b->tail; bh++) {
192 for (bh = b->head; bh != b->tail; bh++) {
193 /* save old hunks */
193 /* save old hunks */
194 offset = gather(c, a, bh->start, offset);
194 offset = gather(c, a, bh->start, offset);
195
195
196 /* discard replaced hunks */
196 /* discard replaced hunks */
197 post = discard(a, bh->end, offset);
197 post = discard(a, bh->end, offset);
198
198
199 /* insert new hunk */
199 /* insert new hunk */
200 ct = c->tail;
200 ct = c->tail;
201 ct->start = bh->start - offset;
201 ct->start = bh->start - offset;
202 ct->end = bh->end - post;
202 ct->end = bh->end - post;
203 ct->len = bh->len;
203 ct->len = bh->len;
204 ct->data = bh->data;
204 ct->data = bh->data;
205 c->tail++;
205 c->tail++;
206 offset = post;
206 offset = post;
207 }
207 }
208
208
209 /* hold on to tail from a */
209 /* hold on to tail from a */
210 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
210 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
211 c->tail += lsize(a);
211 c->tail += lsize(a);
212 }
212 }
213
213
214 lfree(a);
214 lfree(a);
215 lfree(b);
215 lfree(b);
216 return c;
216 return c;
217 }
217 }
218
218
219 /* decode a binary patch into a hunk list */
219 /* decode a binary patch into a hunk list */
220 static struct flist *decode(char *bin, int len)
220 static struct flist *decode(char *bin, int len)
221 {
221 {
222 struct flist *l;
222 struct flist *l;
223 struct frag *lt;
223 struct frag *lt;
224 char *end = bin + len;
224 char *data = bin + 12, *end = bin + len;
225 char decode[12]; /* for dealing with alignment issues */
225 char decode[12]; /* for dealing with alignment issues */
226
226
227 /* assume worst case size, we won't have many of these lists */
227 /* assume worst case size, we won't have many of these lists */
228 l = lalloc(len / 12);
228 l = lalloc(len / 12);
229 if (!l)
229 if (!l)
230 return NULL;
230 return NULL;
231
231
232 lt = l->tail;
232 lt = l->tail;
233
233
234 while (bin < end) {
234 while (data <= end) {
235 memcpy(decode, bin, 12);
235 memcpy(decode, bin, 12);
236 lt->start = ntohl(*(uint32_t *)decode);
236 lt->start = ntohl(*(uint32_t *)decode);
237 lt->end = ntohl(*(uint32_t *)(decode + 4));
237 lt->end = ntohl(*(uint32_t *)(decode + 4));
238 lt->len = ntohl(*(uint32_t *)(decode + 8));
238 lt->len = ntohl(*(uint32_t *)(decode + 8));
239 lt->data = bin + 12;
239 if (lt->start > lt->end)
240 bin += 12 + lt->len;
240 break; /* sanity check */
241 bin = data + lt->len;
242 if (bin < data)
243 break; /* big data + big (bogus) len can wrap around */
244 lt->data = data;
245 data = bin + 12;
241 lt++;
246 lt++;
242 }
247 }
243
248
244 if (bin != end) {
249 if (bin != end) {
245 if (!PyErr_Occurred())
250 if (!PyErr_Occurred())
246 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
251 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
247 lfree(l);
252 lfree(l);
248 return NULL;
253 return NULL;
249 }
254 }
250
255
251 l->tail = lt;
256 l->tail = lt;
252 return l;
257 return l;
253 }
258 }
254
259
255 /* calculate the size of resultant text */
260 /* calculate the size of resultant text */
256 static int calcsize(int len, struct flist *l)
261 static int calcsize(int len, struct flist *l)
257 {
262 {
258 int outlen = 0, last = 0;
263 int outlen = 0, last = 0;
259 struct frag *f = l->head;
264 struct frag *f = l->head;
260
265
261 while (f != l->tail) {
266 while (f != l->tail) {
262 if (f->start < last || f->end > len) {
267 if (f->start < last || f->end > len) {
263 if (!PyErr_Occurred())
268 if (!PyErr_Occurred())
264 PyErr_SetString(mpatch_Error,
269 PyErr_SetString(mpatch_Error,
265 "invalid patch");
270 "invalid patch");
266 return -1;
271 return -1;
267 }
272 }
268 outlen += f->start - last;
273 outlen += f->start - last;
269 last = f->end;
274 last = f->end;
270 outlen += f->len;
275 outlen += f->len;
271 f++;
276 f++;
272 }
277 }
273
278
274 outlen += len - last;
279 outlen += len - last;
275 return outlen;
280 return outlen;
276 }
281 }
277
282
278 static int apply(char *buf, char *orig, int len, struct flist *l)
283 static int apply(char *buf, char *orig, int len, struct flist *l)
279 {
284 {
280 struct frag *f = l->head;
285 struct frag *f = l->head;
281 int last = 0;
286 int last = 0;
282 char *p = buf;
287 char *p = buf;
283
288
284 while (f != l->tail) {
289 while (f != l->tail) {
285 if (f->start < last || f->end > len) {
290 if (f->start < last || f->end > len) {
286 if (!PyErr_Occurred())
291 if (!PyErr_Occurred())
287 PyErr_SetString(mpatch_Error,
292 PyErr_SetString(mpatch_Error,
288 "invalid patch");
293 "invalid patch");
289 return 0;
294 return 0;
290 }
295 }
291 memcpy(p, orig + last, f->start - last);
296 memcpy(p, orig + last, f->start - last);
292 p += f->start - last;
297 p += f->start - last;
293 memcpy(p, f->data, f->len);
298 memcpy(p, f->data, f->len);
294 last = f->end;
299 last = f->end;
295 p += f->len;
300 p += f->len;
296 f++;
301 f++;
297 }
302 }
298 memcpy(p, orig + last, len - last);
303 memcpy(p, orig + last, len - last);
299 return 1;
304 return 1;
300 }
305 }
301
306
302 /* recursively generate a patch of all bins between start and end */
307 /* recursively generate a patch of all bins between start and end */
303 static struct flist *fold(PyObject *bins, int start, int end)
308 static struct flist *fold(PyObject *bins, int start, int end)
304 {
309 {
305 int len;
310 int len;
306
311
307 if (start + 1 == end) {
312 if (start + 1 == end) {
308 /* trivial case, output a decoded list */
313 /* trivial case, output a decoded list */
309 PyObject *tmp = PyList_GetItem(bins, start);
314 PyObject *tmp = PyList_GetItem(bins, start);
310 if (!tmp)
315 if (!tmp)
311 return NULL;
316 return NULL;
312 return decode(PyString_AsString(tmp), PyString_Size(tmp));
317 return decode(PyString_AsString(tmp), PyString_Size(tmp));
313 }
318 }
314
319
315 /* divide and conquer, memory management is elsewhere */
320 /* divide and conquer, memory management is elsewhere */
316 len = (end - start) / 2;
321 len = (end - start) / 2;
317 return combine(fold(bins, start, start + len),
322 return combine(fold(bins, start, start + len),
318 fold(bins, start + len, end));
323 fold(bins, start + len, end));
319 }
324 }
320
325
321 static PyObject *
326 static PyObject *
322 patches(PyObject *self, PyObject *args)
327 patches(PyObject *self, PyObject *args)
323 {
328 {
324 PyObject *text, *bins, *result;
329 PyObject *text, *bins, *result;
325 struct flist *patch;
330 struct flist *patch;
326 char *in, *out;
331 char *in, *out;
327 int len, outlen;
332 int len, outlen;
328
333
329 if (!PyArg_ParseTuple(args, "SO:mpatch", &text, &bins))
334 if (!PyArg_ParseTuple(args, "SO:mpatch", &text, &bins))
330 return NULL;
335 return NULL;
331
336
332 len = PyList_Size(bins);
337 len = PyList_Size(bins);
333 if (!len) {
338 if (!len) {
334 /* nothing to do */
339 /* nothing to do */
335 Py_INCREF(text);
340 Py_INCREF(text);
336 return text;
341 return text;
337 }
342 }
338
343
339 patch = fold(bins, 0, len);
344 patch = fold(bins, 0, len);
340 if (!patch)
345 if (!patch)
341 return NULL;
346 return NULL;
342
347
343 outlen = calcsize(PyString_Size(text), patch);
348 outlen = calcsize(PyString_Size(text), patch);
344 if (outlen < 0) {
349 if (outlen < 0) {
345 result = NULL;
350 result = NULL;
346 goto cleanup;
351 goto cleanup;
347 }
352 }
348 result = PyString_FromStringAndSize(NULL, outlen);
353 result = PyString_FromStringAndSize(NULL, outlen);
349 if (!result) {
354 if (!result) {
350 result = NULL;
355 result = NULL;
351 goto cleanup;
356 goto cleanup;
352 }
357 }
353 in = PyString_AsString(text);
358 in = PyString_AsString(text);
354 out = PyString_AsString(result);
359 out = PyString_AsString(result);
355 if (!apply(out, in, PyString_Size(text), patch)) {
360 if (!apply(out, in, PyString_Size(text), patch)) {
356 Py_DECREF(result);
361 Py_DECREF(result);
357 result = NULL;
362 result = NULL;
358 }
363 }
359 cleanup:
364 cleanup:
360 lfree(patch);
365 lfree(patch);
361 return result;
366 return result;
362 }
367 }
363
368
364 /* calculate size of a patched file directly */
369 /* calculate size of a patched file directly */
365 static PyObject *
370 static PyObject *
366 patchedsize(PyObject *self, PyObject *args)
371 patchedsize(PyObject *self, PyObject *args)
367 {
372 {
368 long orig, start, end, len, outlen = 0, last = 0;
373 long orig, start, end, len, outlen = 0, last = 0;
369 int patchlen;
374 int patchlen;
370 char *bin, *binend;
375 char *bin, *binend, *data;
371 char decode[12]; /* for dealing with alignment issues */
376 char decode[12]; /* for dealing with alignment issues */
372
377
373 if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen))
378 if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen))
374 return NULL;
379 return NULL;
375
380
376 binend = bin + patchlen;
381 binend = bin + patchlen;
382 data = bin + 12;
377
383
378 while (bin < binend) {
384 while (data <= binend) {
379 memcpy(decode, bin, 12);
385 memcpy(decode, bin, 12);
380 start = ntohl(*(uint32_t *)decode);
386 start = ntohl(*(uint32_t *)decode);
381 end = ntohl(*(uint32_t *)(decode + 4));
387 end = ntohl(*(uint32_t *)(decode + 4));
382 len = ntohl(*(uint32_t *)(decode + 8));
388 len = ntohl(*(uint32_t *)(decode + 8));
383 bin += 12 + len;
389 if (start > end)
390 break; /* sanity check */
391 bin = data + len;
392 if (bin < data)
393 break; /* big data + big (bogus) len can wrap around */
394 data = bin + 12;
384 outlen += start - last;
395 outlen += start - last;
385 last = end;
396 last = end;
386 outlen += len;
397 outlen += len;
387 }
398 }
388
399
389 if (bin != binend) {
400 if (bin != binend) {
390 if (!PyErr_Occurred())
401 if (!PyErr_Occurred())
391 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
402 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
392 return NULL;
403 return NULL;
393 }
404 }
394
405
395 outlen += orig - last;
406 outlen += orig - last;
396 return Py_BuildValue("l", outlen);
407 return Py_BuildValue("l", outlen);
397 }
408 }
398
409
399 static PyMethodDef methods[] = {
410 static PyMethodDef methods[] = {
400 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
411 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
401 {"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"},
412 {"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"},
402 {NULL, NULL}
413 {NULL, NULL}
403 };
414 };
404
415
405 PyMODINIT_FUNC
416 PyMODINIT_FUNC
406 initmpatch(void)
417 initmpatch(void)
407 {
418 {
408 Py_InitModule3("mpatch", methods, mpatch_doc);
419 Py_InitModule3("mpatch", methods, mpatch_doc);
409 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
420 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
410 }
421 }
411
422
General Comments 0
You need to be logged in to leave comments. Login now