##// END OF EJS Templates
mpatch: Fix for malloc corner case on AIX
Matt Mackall -
r3138:cc856c4d default
parent child Browse files
Show More
@@ -1,408 +1,411 b''
1 /*
1 /*
2 mpatch.c - efficient binary patching for Mercurial
2 mpatch.c - efficient binary patching for Mercurial
3
3
4 This implements a patch algorithm that's O(m + nlog n) where m is the
4 This implements a patch algorithm that's O(m + nlog n) where m is the
5 size of the output and n is the number of patches.
5 size of the output and n is the number of patches.
6
6
7 Given a list of binary patches, it unpacks each into a hunk list,
7 Given a list of binary patches, it unpacks each into a hunk list,
8 then combines the hunk lists with a treewise recursion to form a
8 then combines the hunk lists with a treewise recursion to form a
9 single hunk list. This hunk list is then applied to the original
9 single hunk list. This hunk list is then applied to the original
10 text.
10 text.
11
11
12 The text (or binary) fragments are copied directly from their source
12 The text (or binary) fragments are copied directly from their source
13 Python objects into a preallocated output string to avoid the
13 Python objects into a preallocated output string to avoid the
14 allocation of intermediate Python objects. Working memory is about 2x
14 allocation of intermediate Python objects. Working memory is about 2x
15 the total number of hunks.
15 the total number of hunks.
16
16
17 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
17 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
18
18
19 This software may be used and distributed according to the terms
19 This software may be used and distributed according to the terms
20 of the GNU General Public License, incorporated herein by reference.
20 of the GNU General Public License, incorporated herein by reference.
21 */
21 */
22
22
23 #include <Python.h>
23 #include <Python.h>
24 #include <stdlib.h>
24 #include <stdlib.h>
25 #include <string.h>
25 #include <string.h>
26
26
27 #ifdef _WIN32
27 #ifdef _WIN32
28 # ifdef _MSC_VER
28 # ifdef _MSC_VER
29 /* msvc 6.0 has problems */
29 /* msvc 6.0 has problems */
30 # define inline __inline
30 # define inline __inline
31 typedef unsigned long uint32_t;
31 typedef unsigned long uint32_t;
32 # else
32 # else
33 # include <stdint.h>
33 # include <stdint.h>
34 # endif
34 # endif
35 static uint32_t ntohl(uint32_t x)
35 static uint32_t ntohl(uint32_t x)
36 {
36 {
37 return ((x & 0x000000ffUL) << 24) |
37 return ((x & 0x000000ffUL) << 24) |
38 ((x & 0x0000ff00UL) << 8) |
38 ((x & 0x0000ff00UL) << 8) |
39 ((x & 0x00ff0000UL) >> 8) |
39 ((x & 0x00ff0000UL) >> 8) |
40 ((x & 0xff000000UL) >> 24);
40 ((x & 0xff000000UL) >> 24);
41 }
41 }
42 #else
42 #else
43 /* not windows */
43 /* not windows */
44 # include <sys/types.h>
44 # include <sys/types.h>
45 # include <arpa/inet.h>
45 # include <arpa/inet.h>
46 # include <inttypes.h>
46 # include <inttypes.h>
47 #endif
47 #endif
48
48
49 static char mpatch_doc[] = "Efficient binary patching.";
49 static char mpatch_doc[] = "Efficient binary patching.";
50 static PyObject *mpatch_Error;
50 static PyObject *mpatch_Error;
51
51
52 struct frag {
52 struct frag {
53 int start, end, len;
53 int start, end, len;
54 char *data;
54 char *data;
55 };
55 };
56
56
57 struct flist {
57 struct flist {
58 struct frag *base, *head, *tail;
58 struct frag *base, *head, *tail;
59 };
59 };
60
60
61 static struct flist *lalloc(int size)
61 static struct flist *lalloc(int size)
62 {
62 {
63 struct flist *a = NULL;
63 struct flist *a = NULL;
64
64
65 if (size < 1)
66 size = 1;
67
65 a = (struct flist *)malloc(sizeof(struct flist));
68 a = (struct flist *)malloc(sizeof(struct flist));
66 if (a) {
69 if (a) {
67 a->base = (struct frag *)malloc(sizeof(struct frag) * size);
70 a->base = (struct frag *)malloc(sizeof(struct frag) * size);
68 if (a->base) {
71 if (a->base) {
69 a->head = a->tail = a->base;
72 a->head = a->tail = a->base;
70 return a;
73 return a;
71 }
74 }
72 free(a);
75 free(a);
73 a = NULL;
76 a = NULL;
74 }
77 }
75 if (!PyErr_Occurred())
78 if (!PyErr_Occurred())
76 PyErr_NoMemory();
79 PyErr_NoMemory();
77 return NULL;
80 return NULL;
78 }
81 }
79
82
80 static void lfree(struct flist *a)
83 static void lfree(struct flist *a)
81 {
84 {
82 if (a) {
85 if (a) {
83 free(a->base);
86 free(a->base);
84 free(a);
87 free(a);
85 }
88 }
86 }
89 }
87
90
88 static int lsize(struct flist *a)
91 static int lsize(struct flist *a)
89 {
92 {
90 return a->tail - a->head;
93 return a->tail - a->head;
91 }
94 }
92
95
93 /* move hunks in source that are less cut to dest, compensating
96 /* move hunks in source that are less cut to dest, compensating
94 for changes in offset. the last hunk may be split if necessary.
97 for changes in offset. the last hunk may be split if necessary.
95 */
98 */
96 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
99 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
97 {
100 {
98 struct frag *d = dest->tail, *s = src->head;
101 struct frag *d = dest->tail, *s = src->head;
99 int postend, c, l;
102 int postend, c, l;
100
103
101 while (s != src->tail) {
104 while (s != src->tail) {
102 if (s->start + offset >= cut)
105 if (s->start + offset >= cut)
103 break; /* we've gone far enough */
106 break; /* we've gone far enough */
104
107
105 postend = offset + s->start + s->len;
108 postend = offset + s->start + s->len;
106 if (postend <= cut) {
109 if (postend <= cut) {
107 /* save this hunk */
110 /* save this hunk */
108 offset += s->start + s->len - s->end;
111 offset += s->start + s->len - s->end;
109 *d++ = *s++;
112 *d++ = *s++;
110 }
113 }
111 else {
114 else {
112 /* break up this hunk */
115 /* break up this hunk */
113 c = cut - offset;
116 c = cut - offset;
114 if (s->end < c)
117 if (s->end < c)
115 c = s->end;
118 c = s->end;
116 l = cut - offset - s->start;
119 l = cut - offset - s->start;
117 if (s->len < l)
120 if (s->len < l)
118 l = s->len;
121 l = s->len;
119
122
120 offset += s->start + l - c;
123 offset += s->start + l - c;
121
124
122 d->start = s->start;
125 d->start = s->start;
123 d->end = c;
126 d->end = c;
124 d->len = l;
127 d->len = l;
125 d->data = s->data;
128 d->data = s->data;
126 d++;
129 d++;
127 s->start = c;
130 s->start = c;
128 s->len = s->len - l;
131 s->len = s->len - l;
129 s->data = s->data + l;
132 s->data = s->data + l;
130
133
131 break;
134 break;
132 }
135 }
133 }
136 }
134
137
135 dest->tail = d;
138 dest->tail = d;
136 src->head = s;
139 src->head = s;
137 return offset;
140 return offset;
138 }
141 }
139
142
140 /* like gather, but with no output list */
143 /* like gather, but with no output list */
141 static int discard(struct flist *src, int cut, int offset)
144 static int discard(struct flist *src, int cut, int offset)
142 {
145 {
143 struct frag *s = src->head;
146 struct frag *s = src->head;
144 int postend, c, l;
147 int postend, c, l;
145
148
146 while (s != src->tail) {
149 while (s != src->tail) {
147 if (s->start + offset >= cut)
150 if (s->start + offset >= cut)
148 break;
151 break;
149
152
150 postend = offset + s->start + s->len;
153 postend = offset + s->start + s->len;
151 if (postend <= cut) {
154 if (postend <= cut) {
152 offset += s->start + s->len - s->end;
155 offset += s->start + s->len - s->end;
153 s++;
156 s++;
154 }
157 }
155 else {
158 else {
156 c = cut - offset;
159 c = cut - offset;
157 if (s->end < c)
160 if (s->end < c)
158 c = s->end;
161 c = s->end;
159 l = cut - offset - s->start;
162 l = cut - offset - s->start;
160 if (s->len < l)
163 if (s->len < l)
161 l = s->len;
164 l = s->len;
162
165
163 offset += s->start + l - c;
166 offset += s->start + l - c;
164 s->start = c;
167 s->start = c;
165 s->len = s->len - l;
168 s->len = s->len - l;
166 s->data = s->data + l;
169 s->data = s->data + l;
167
170
168 break;
171 break;
169 }
172 }
170 }
173 }
171
174
172 src->head = s;
175 src->head = s;
173 return offset;
176 return offset;
174 }
177 }
175
178
176 /* combine hunk lists a and b, while adjusting b for offset changes in a/
179 /* combine hunk lists a and b, while adjusting b for offset changes in a/
177 this deletes a and b and returns the resultant list. */
180 this deletes a and b and returns the resultant list. */
178 static struct flist *combine(struct flist *a, struct flist *b)
181 static struct flist *combine(struct flist *a, struct flist *b)
179 {
182 {
180 struct flist *c = NULL;
183 struct flist *c = NULL;
181 struct frag *bh, *ct;
184 struct frag *bh, *ct;
182 int offset = 0, post;
185 int offset = 0, post;
183
186
184 if (a && b)
187 if (a && b)
185 c = lalloc((lsize(a) + lsize(b)) * 2);
188 c = lalloc((lsize(a) + lsize(b)) * 2);
186
189
187 if (c) {
190 if (c) {
188
191
189 for (bh = b->head; bh != b->tail; bh++) {
192 for (bh = b->head; bh != b->tail; bh++) {
190 /* save old hunks */
193 /* save old hunks */
191 offset = gather(c, a, bh->start, offset);
194 offset = gather(c, a, bh->start, offset);
192
195
193 /* discard replaced hunks */
196 /* discard replaced hunks */
194 post = discard(a, bh->end, offset);
197 post = discard(a, bh->end, offset);
195
198
196 /* insert new hunk */
199 /* insert new hunk */
197 ct = c->tail;
200 ct = c->tail;
198 ct->start = bh->start - offset;
201 ct->start = bh->start - offset;
199 ct->end = bh->end - post;
202 ct->end = bh->end - post;
200 ct->len = bh->len;
203 ct->len = bh->len;
201 ct->data = bh->data;
204 ct->data = bh->data;
202 c->tail++;
205 c->tail++;
203 offset = post;
206 offset = post;
204 }
207 }
205
208
206 /* hold on to tail from a */
209 /* hold on to tail from a */
207 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
210 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
208 c->tail += lsize(a);
211 c->tail += lsize(a);
209 }
212 }
210
213
211 lfree(a);
214 lfree(a);
212 lfree(b);
215 lfree(b);
213 return c;
216 return c;
214 }
217 }
215
218
216 /* decode a binary patch into a hunk list */
219 /* decode a binary patch into a hunk list */
217 static struct flist *decode(char *bin, int len)
220 static struct flist *decode(char *bin, int len)
218 {
221 {
219 struct flist *l;
222 struct flist *l;
220 struct frag *lt;
223 struct frag *lt;
221 char *end = bin + len;
224 char *end = bin + len;
222 char decode[12]; /* for dealing with alignment issues */
225 char decode[12]; /* for dealing with alignment issues */
223
226
224 /* assume worst case size, we won't have many of these lists */
227 /* assume worst case size, we won't have many of these lists */
225 l = lalloc(len / 12);
228 l = lalloc(len / 12);
226 if (!l)
229 if (!l)
227 return NULL;
230 return NULL;
228
231
229 lt = l->tail;
232 lt = l->tail;
230
233
231 while (bin < end) {
234 while (bin < end) {
232 memcpy(decode, bin, 12);
235 memcpy(decode, bin, 12);
233 lt->start = ntohl(*(uint32_t *)decode);
236 lt->start = ntohl(*(uint32_t *)decode);
234 lt->end = ntohl(*(uint32_t *)(decode + 4));
237 lt->end = ntohl(*(uint32_t *)(decode + 4));
235 lt->len = ntohl(*(uint32_t *)(decode + 8));
238 lt->len = ntohl(*(uint32_t *)(decode + 8));
236 lt->data = bin + 12;
239 lt->data = bin + 12;
237 bin += 12 + lt->len;
240 bin += 12 + lt->len;
238 lt++;
241 lt++;
239 }
242 }
240
243
241 if (bin != end) {
244 if (bin != end) {
242 if (!PyErr_Occurred())
245 if (!PyErr_Occurred())
243 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
246 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
244 lfree(l);
247 lfree(l);
245 return NULL;
248 return NULL;
246 }
249 }
247
250
248 l->tail = lt;
251 l->tail = lt;
249 return l;
252 return l;
250 }
253 }
251
254
252 /* calculate the size of resultant text */
255 /* calculate the size of resultant text */
253 static int calcsize(int len, struct flist *l)
256 static int calcsize(int len, struct flist *l)
254 {
257 {
255 int outlen = 0, last = 0;
258 int outlen = 0, last = 0;
256 struct frag *f = l->head;
259 struct frag *f = l->head;
257
260
258 while (f != l->tail) {
261 while (f != l->tail) {
259 if (f->start < last || f->end > len) {
262 if (f->start < last || f->end > len) {
260 if (!PyErr_Occurred())
263 if (!PyErr_Occurred())
261 PyErr_SetString(mpatch_Error,
264 PyErr_SetString(mpatch_Error,
262 "invalid patch");
265 "invalid patch");
263 return -1;
266 return -1;
264 }
267 }
265 outlen += f->start - last;
268 outlen += f->start - last;
266 last = f->end;
269 last = f->end;
267 outlen += f->len;
270 outlen += f->len;
268 f++;
271 f++;
269 }
272 }
270
273
271 outlen += len - last;
274 outlen += len - last;
272 return outlen;
275 return outlen;
273 }
276 }
274
277
275 static int apply(char *buf, char *orig, int len, struct flist *l)
278 static int apply(char *buf, char *orig, int len, struct flist *l)
276 {
279 {
277 struct frag *f = l->head;
280 struct frag *f = l->head;
278 int last = 0;
281 int last = 0;
279 char *p = buf;
282 char *p = buf;
280
283
281 while (f != l->tail) {
284 while (f != l->tail) {
282 if (f->start < last || f->end > len) {
285 if (f->start < last || f->end > len) {
283 if (!PyErr_Occurred())
286 if (!PyErr_Occurred())
284 PyErr_SetString(mpatch_Error,
287 PyErr_SetString(mpatch_Error,
285 "invalid patch");
288 "invalid patch");
286 return 0;
289 return 0;
287 }
290 }
288 memcpy(p, orig + last, f->start - last);
291 memcpy(p, orig + last, f->start - last);
289 p += f->start - last;
292 p += f->start - last;
290 memcpy(p, f->data, f->len);
293 memcpy(p, f->data, f->len);
291 last = f->end;
294 last = f->end;
292 p += f->len;
295 p += f->len;
293 f++;
296 f++;
294 }
297 }
295 memcpy(p, orig + last, len - last);
298 memcpy(p, orig + last, len - last);
296 return 1;
299 return 1;
297 }
300 }
298
301
299 /* recursively generate a patch of all bins between start and end */
302 /* recursively generate a patch of all bins between start and end */
300 static struct flist *fold(PyObject *bins, int start, int end)
303 static struct flist *fold(PyObject *bins, int start, int end)
301 {
304 {
302 int len;
305 int len;
303
306
304 if (start + 1 == end) {
307 if (start + 1 == end) {
305 /* trivial case, output a decoded list */
308 /* trivial case, output a decoded list */
306 PyObject *tmp = PyList_GetItem(bins, start);
309 PyObject *tmp = PyList_GetItem(bins, start);
307 if (!tmp)
310 if (!tmp)
308 return NULL;
311 return NULL;
309 return decode(PyString_AsString(tmp), PyString_Size(tmp));
312 return decode(PyString_AsString(tmp), PyString_Size(tmp));
310 }
313 }
311
314
312 /* divide and conquer, memory management is elsewhere */
315 /* divide and conquer, memory management is elsewhere */
313 len = (end - start) / 2;
316 len = (end - start) / 2;
314 return combine(fold(bins, start, start + len),
317 return combine(fold(bins, start, start + len),
315 fold(bins, start + len, end));
318 fold(bins, start + len, end));
316 }
319 }
317
320
318 static PyObject *
321 static PyObject *
319 patches(PyObject *self, PyObject *args)
322 patches(PyObject *self, PyObject *args)
320 {
323 {
321 PyObject *text, *bins, *result;
324 PyObject *text, *bins, *result;
322 struct flist *patch;
325 struct flist *patch;
323 char *in, *out;
326 char *in, *out;
324 int len, outlen;
327 int len, outlen;
325
328
326 if (!PyArg_ParseTuple(args, "SO:mpatch", &text, &bins))
329 if (!PyArg_ParseTuple(args, "SO:mpatch", &text, &bins))
327 return NULL;
330 return NULL;
328
331
329 len = PyList_Size(bins);
332 len = PyList_Size(bins);
330 if (!len) {
333 if (!len) {
331 /* nothing to do */
334 /* nothing to do */
332 Py_INCREF(text);
335 Py_INCREF(text);
333 return text;
336 return text;
334 }
337 }
335
338
336 patch = fold(bins, 0, len);
339 patch = fold(bins, 0, len);
337 if (!patch)
340 if (!patch)
338 return NULL;
341 return NULL;
339
342
340 outlen = calcsize(PyString_Size(text), patch);
343 outlen = calcsize(PyString_Size(text), patch);
341 if (outlen < 0) {
344 if (outlen < 0) {
342 result = NULL;
345 result = NULL;
343 goto cleanup;
346 goto cleanup;
344 }
347 }
345 result = PyString_FromStringAndSize(NULL, outlen);
348 result = PyString_FromStringAndSize(NULL, outlen);
346 if (!result) {
349 if (!result) {
347 result = NULL;
350 result = NULL;
348 goto cleanup;
351 goto cleanup;
349 }
352 }
350 in = PyString_AsString(text);
353 in = PyString_AsString(text);
351 out = PyString_AsString(result);
354 out = PyString_AsString(result);
352 if (!apply(out, in, PyString_Size(text), patch)) {
355 if (!apply(out, in, PyString_Size(text), patch)) {
353 Py_DECREF(result);
356 Py_DECREF(result);
354 result = NULL;
357 result = NULL;
355 }
358 }
356 cleanup:
359 cleanup:
357 lfree(patch);
360 lfree(patch);
358 return result;
361 return result;
359 }
362 }
360
363
361 /* calculate size of a patched file directly */
364 /* calculate size of a patched file directly */
362 static PyObject *
365 static PyObject *
363 patchedsize(PyObject *self, PyObject *args)
366 patchedsize(PyObject *self, PyObject *args)
364 {
367 {
365 long orig, start, end, len, outlen = 0, last = 0;
368 long orig, start, end, len, outlen = 0, last = 0;
366 int patchlen;
369 int patchlen;
367 char *bin, *binend;
370 char *bin, *binend;
368 char decode[12]; /* for dealing with alignment issues */
371 char decode[12]; /* for dealing with alignment issues */
369
372
370 if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen))
373 if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen))
371 return NULL;
374 return NULL;
372
375
373 binend = bin + patchlen;
376 binend = bin + patchlen;
374
377
375 while (bin < binend) {
378 while (bin < binend) {
376 memcpy(decode, bin, 12);
379 memcpy(decode, bin, 12);
377 start = ntohl(*(uint32_t *)decode);
380 start = ntohl(*(uint32_t *)decode);
378 end = ntohl(*(uint32_t *)(decode + 4));
381 end = ntohl(*(uint32_t *)(decode + 4));
379 len = ntohl(*(uint32_t *)(decode + 8));
382 len = ntohl(*(uint32_t *)(decode + 8));
380 bin += 12 + len;
383 bin += 12 + len;
381 outlen += start - last;
384 outlen += start - last;
382 last = end;
385 last = end;
383 outlen += len;
386 outlen += len;
384 }
387 }
385
388
386 if (bin != binend) {
389 if (bin != binend) {
387 if (!PyErr_Occurred())
390 if (!PyErr_Occurred())
388 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
391 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
389 return NULL;
392 return NULL;
390 }
393 }
391
394
392 outlen += orig - last;
395 outlen += orig - last;
393 return Py_BuildValue("l", outlen);
396 return Py_BuildValue("l", outlen);
394 }
397 }
395
398
396 static PyMethodDef methods[] = {
399 static PyMethodDef methods[] = {
397 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
400 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
398 {"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"},
401 {"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"},
399 {NULL, NULL}
402 {NULL, NULL}
400 };
403 };
401
404
402 PyMODINIT_FUNC
405 PyMODINIT_FUNC
403 initmpatch(void)
406 initmpatch(void)
404 {
407 {
405 Py_InitModule3("mpatch", methods, mpatch_doc);
408 Py_InitModule3("mpatch", methods, mpatch_doc);
406 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
409 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
407 }
410 }
408
411
General Comments 0
You need to be logged in to leave comments. Login now