##// END OF EJS Templates
Fix segfaults when parsing bdiff hunks in mpatch.decode() and .patchedsize()...
Thomas Arendsen Hein -
r4358:11dc22eb default
parent child Browse files
Show More
@@ -1,411 +1,422 b''
1 1 /*
2 2 mpatch.c - efficient binary patching for Mercurial
3 3
4 4 This implements a patch algorithm that's O(m + nlog n) where m is the
5 5 size of the output and n is the number of patches.
6 6
7 7 Given a list of binary patches, it unpacks each into a hunk list,
8 8 then combines the hunk lists with a treewise recursion to form a
9 9 single hunk list. This hunk list is then applied to the original
10 10 text.
11 11
12 12 The text (or binary) fragments are copied directly from their source
13 13 Python objects into a preallocated output string to avoid the
14 14 allocation of intermediate Python objects. Working memory is about 2x
15 15 the total number of hunks.
16 16
17 17 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
18 18
19 19 This software may be used and distributed according to the terms
20 20 of the GNU General Public License, incorporated herein by reference.
21 21 */
22 22
23 23 #include <Python.h>
24 24 #include <stdlib.h>
25 25 #include <string.h>
26 26
27 27 #ifdef _WIN32
28 28 # ifdef _MSC_VER
29 29 /* msvc 6.0 has problems */
30 30 # define inline __inline
31 31 typedef unsigned long uint32_t;
32 32 # else
33 33 # include <stdint.h>
34 34 # endif
35 35 static uint32_t ntohl(uint32_t x)
36 36 {
37 37 return ((x & 0x000000ffUL) << 24) |
38 38 ((x & 0x0000ff00UL) << 8) |
39 39 ((x & 0x00ff0000UL) >> 8) |
40 40 ((x & 0xff000000UL) >> 24);
41 41 }
42 42 #else
43 43 /* not windows */
44 44 # include <sys/types.h>
45 45 # include <arpa/inet.h>
46 46 # include <inttypes.h>
47 47 #endif
48 48
49 49 static char mpatch_doc[] = "Efficient binary patching.";
50 50 static PyObject *mpatch_Error;
51 51
52 52 struct frag {
53 53 int start, end, len;
54 54 char *data;
55 55 };
56 56
57 57 struct flist {
58 58 struct frag *base, *head, *tail;
59 59 };
60 60
61 61 static struct flist *lalloc(int size)
62 62 {
63 63 struct flist *a = NULL;
64 64
65 65 if (size < 1)
66 66 size = 1;
67 67
68 68 a = (struct flist *)malloc(sizeof(struct flist));
69 69 if (a) {
70 70 a->base = (struct frag *)malloc(sizeof(struct frag) * size);
71 71 if (a->base) {
72 72 a->head = a->tail = a->base;
73 73 return a;
74 74 }
75 75 free(a);
76 76 a = NULL;
77 77 }
78 78 if (!PyErr_Occurred())
79 79 PyErr_NoMemory();
80 80 return NULL;
81 81 }
82 82
83 83 static void lfree(struct flist *a)
84 84 {
85 85 if (a) {
86 86 free(a->base);
87 87 free(a);
88 88 }
89 89 }
90 90
91 91 static int lsize(struct flist *a)
92 92 {
93 93 return a->tail - a->head;
94 94 }
95 95
96 96 /* move hunks in source that are less cut to dest, compensating
97 97 for changes in offset. the last hunk may be split if necessary.
98 98 */
99 99 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
100 100 {
101 101 struct frag *d = dest->tail, *s = src->head;
102 102 int postend, c, l;
103 103
104 104 while (s != src->tail) {
105 105 if (s->start + offset >= cut)
106 106 break; /* we've gone far enough */
107 107
108 108 postend = offset + s->start + s->len;
109 109 if (postend <= cut) {
110 110 /* save this hunk */
111 111 offset += s->start + s->len - s->end;
112 112 *d++ = *s++;
113 113 }
114 114 else {
115 115 /* break up this hunk */
116 116 c = cut - offset;
117 117 if (s->end < c)
118 118 c = s->end;
119 119 l = cut - offset - s->start;
120 120 if (s->len < l)
121 121 l = s->len;
122 122
123 123 offset += s->start + l - c;
124 124
125 125 d->start = s->start;
126 126 d->end = c;
127 127 d->len = l;
128 128 d->data = s->data;
129 129 d++;
130 130 s->start = c;
131 131 s->len = s->len - l;
132 132 s->data = s->data + l;
133 133
134 134 break;
135 135 }
136 136 }
137 137
138 138 dest->tail = d;
139 139 src->head = s;
140 140 return offset;
141 141 }
142 142
143 143 /* like gather, but with no output list */
144 144 static int discard(struct flist *src, int cut, int offset)
145 145 {
146 146 struct frag *s = src->head;
147 147 int postend, c, l;
148 148
149 149 while (s != src->tail) {
150 150 if (s->start + offset >= cut)
151 151 break;
152 152
153 153 postend = offset + s->start + s->len;
154 154 if (postend <= cut) {
155 155 offset += s->start + s->len - s->end;
156 156 s++;
157 157 }
158 158 else {
159 159 c = cut - offset;
160 160 if (s->end < c)
161 161 c = s->end;
162 162 l = cut - offset - s->start;
163 163 if (s->len < l)
164 164 l = s->len;
165 165
166 166 offset += s->start + l - c;
167 167 s->start = c;
168 168 s->len = s->len - l;
169 169 s->data = s->data + l;
170 170
171 171 break;
172 172 }
173 173 }
174 174
175 175 src->head = s;
176 176 return offset;
177 177 }
178 178
179 179 /* combine hunk lists a and b, while adjusting b for offset changes in a/
180 180 this deletes a and b and returns the resultant list. */
181 181 static struct flist *combine(struct flist *a, struct flist *b)
182 182 {
183 183 struct flist *c = NULL;
184 184 struct frag *bh, *ct;
185 185 int offset = 0, post;
186 186
187 187 if (a && b)
188 188 c = lalloc((lsize(a) + lsize(b)) * 2);
189 189
190 190 if (c) {
191 191
192 192 for (bh = b->head; bh != b->tail; bh++) {
193 193 /* save old hunks */
194 194 offset = gather(c, a, bh->start, offset);
195 195
196 196 /* discard replaced hunks */
197 197 post = discard(a, bh->end, offset);
198 198
199 199 /* insert new hunk */
200 200 ct = c->tail;
201 201 ct->start = bh->start - offset;
202 202 ct->end = bh->end - post;
203 203 ct->len = bh->len;
204 204 ct->data = bh->data;
205 205 c->tail++;
206 206 offset = post;
207 207 }
208 208
209 209 /* hold on to tail from a */
210 210 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
211 211 c->tail += lsize(a);
212 212 }
213 213
214 214 lfree(a);
215 215 lfree(b);
216 216 return c;
217 217 }
218 218
219 219 /* decode a binary patch into a hunk list */
220 220 static struct flist *decode(char *bin, int len)
221 221 {
222 222 struct flist *l;
223 223 struct frag *lt;
224 char *end = bin + len;
224 char *data = bin + 12, *end = bin + len;
225 225 char decode[12]; /* for dealing with alignment issues */
226 226
227 227 /* assume worst case size, we won't have many of these lists */
228 228 l = lalloc(len / 12);
229 229 if (!l)
230 230 return NULL;
231 231
232 232 lt = l->tail;
233 233
234 while (bin < end) {
234 while (data <= end) {
235 235 memcpy(decode, bin, 12);
236 236 lt->start = ntohl(*(uint32_t *)decode);
237 237 lt->end = ntohl(*(uint32_t *)(decode + 4));
238 238 lt->len = ntohl(*(uint32_t *)(decode + 8));
239 lt->data = bin + 12;
240 bin += 12 + lt->len;
239 if (lt->start > lt->end)
240 break; /* sanity check */
241 bin = data + lt->len;
242 if (bin < data)
243 break; /* big data + big (bogus) len can wrap around */
244 lt->data = data;
245 data = bin + 12;
241 246 lt++;
242 247 }
243 248
244 249 if (bin != end) {
245 250 if (!PyErr_Occurred())
246 251 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
247 252 lfree(l);
248 253 return NULL;
249 254 }
250 255
251 256 l->tail = lt;
252 257 return l;
253 258 }
254 259
255 260 /* calculate the size of resultant text */
256 261 static int calcsize(int len, struct flist *l)
257 262 {
258 263 int outlen = 0, last = 0;
259 264 struct frag *f = l->head;
260 265
261 266 while (f != l->tail) {
262 267 if (f->start < last || f->end > len) {
263 268 if (!PyErr_Occurred())
264 269 PyErr_SetString(mpatch_Error,
265 270 "invalid patch");
266 271 return -1;
267 272 }
268 273 outlen += f->start - last;
269 274 last = f->end;
270 275 outlen += f->len;
271 276 f++;
272 277 }
273 278
274 279 outlen += len - last;
275 280 return outlen;
276 281 }
277 282
278 283 static int apply(char *buf, char *orig, int len, struct flist *l)
279 284 {
280 285 struct frag *f = l->head;
281 286 int last = 0;
282 287 char *p = buf;
283 288
284 289 while (f != l->tail) {
285 290 if (f->start < last || f->end > len) {
286 291 if (!PyErr_Occurred())
287 292 PyErr_SetString(mpatch_Error,
288 293 "invalid patch");
289 294 return 0;
290 295 }
291 296 memcpy(p, orig + last, f->start - last);
292 297 p += f->start - last;
293 298 memcpy(p, f->data, f->len);
294 299 last = f->end;
295 300 p += f->len;
296 301 f++;
297 302 }
298 303 memcpy(p, orig + last, len - last);
299 304 return 1;
300 305 }
301 306
302 307 /* recursively generate a patch of all bins between start and end */
303 308 static struct flist *fold(PyObject *bins, int start, int end)
304 309 {
305 310 int len;
306 311
307 312 if (start + 1 == end) {
308 313 /* trivial case, output a decoded list */
309 314 PyObject *tmp = PyList_GetItem(bins, start);
310 315 if (!tmp)
311 316 return NULL;
312 317 return decode(PyString_AsString(tmp), PyString_Size(tmp));
313 318 }
314 319
315 320 /* divide and conquer, memory management is elsewhere */
316 321 len = (end - start) / 2;
317 322 return combine(fold(bins, start, start + len),
318 323 fold(bins, start + len, end));
319 324 }
320 325
321 326 static PyObject *
322 327 patches(PyObject *self, PyObject *args)
323 328 {
324 329 PyObject *text, *bins, *result;
325 330 struct flist *patch;
326 331 char *in, *out;
327 332 int len, outlen;
328 333
329 334 if (!PyArg_ParseTuple(args, "SO:mpatch", &text, &bins))
330 335 return NULL;
331 336
332 337 len = PyList_Size(bins);
333 338 if (!len) {
334 339 /* nothing to do */
335 340 Py_INCREF(text);
336 341 return text;
337 342 }
338 343
339 344 patch = fold(bins, 0, len);
340 345 if (!patch)
341 346 return NULL;
342 347
343 348 outlen = calcsize(PyString_Size(text), patch);
344 349 if (outlen < 0) {
345 350 result = NULL;
346 351 goto cleanup;
347 352 }
348 353 result = PyString_FromStringAndSize(NULL, outlen);
349 354 if (!result) {
350 355 result = NULL;
351 356 goto cleanup;
352 357 }
353 358 in = PyString_AsString(text);
354 359 out = PyString_AsString(result);
355 360 if (!apply(out, in, PyString_Size(text), patch)) {
356 361 Py_DECREF(result);
357 362 result = NULL;
358 363 }
359 364 cleanup:
360 365 lfree(patch);
361 366 return result;
362 367 }
363 368
364 369 /* calculate size of a patched file directly */
365 370 static PyObject *
366 371 patchedsize(PyObject *self, PyObject *args)
367 372 {
368 373 long orig, start, end, len, outlen = 0, last = 0;
369 374 int patchlen;
370 char *bin, *binend;
375 char *bin, *binend, *data;
371 376 char decode[12]; /* for dealing with alignment issues */
372 377
373 378 if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen))
374 379 return NULL;
375 380
376 381 binend = bin + patchlen;
382 data = bin + 12;
377 383
378 while (bin < binend) {
384 while (data <= binend) {
379 385 memcpy(decode, bin, 12);
380 386 start = ntohl(*(uint32_t *)decode);
381 387 end = ntohl(*(uint32_t *)(decode + 4));
382 388 len = ntohl(*(uint32_t *)(decode + 8));
383 bin += 12 + len;
389 if (start > end)
390 break; /* sanity check */
391 bin = data + len;
392 if (bin < data)
393 break; /* big data + big (bogus) len can wrap around */
394 data = bin + 12;
384 395 outlen += start - last;
385 396 last = end;
386 397 outlen += len;
387 398 }
388 399
389 400 if (bin != binend) {
390 401 if (!PyErr_Occurred())
391 402 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
392 403 return NULL;
393 404 }
394 405
395 406 outlen += orig - last;
396 407 return Py_BuildValue("l", outlen);
397 408 }
398 409
399 410 static PyMethodDef methods[] = {
400 411 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
401 412 {"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"},
402 413 {NULL, NULL}
403 414 };
404 415
405 416 PyMODINIT_FUNC
406 417 initmpatch(void)
407 418 {
408 419 Py_InitModule3("mpatch", methods, mpatch_doc);
409 420 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
410 421 }
411 422
General Comments 0
You need to be logged in to leave comments. Login now