##// END OF EJS Templates
mpatch: Fix for malloc corner case on AIX
Matt Mackall -
r3138:cc856c4d default
parent child Browse files
Show More
@@ -1,408 +1,411 b''
1 1 /*
2 2 mpatch.c - efficient binary patching for Mercurial
3 3
4 4 This implements a patch algorithm that's O(m + nlog n) where m is the
5 5 size of the output and n is the number of patches.
6 6
7 7 Given a list of binary patches, it unpacks each into a hunk list,
8 8 then combines the hunk lists with a treewise recursion to form a
9 9 single hunk list. This hunk list is then applied to the original
10 10 text.
11 11
12 12 The text (or binary) fragments are copied directly from their source
13 13 Python objects into a preallocated output string to avoid the
14 14 allocation of intermediate Python objects. Working memory is about 2x
15 15 the total number of hunks.
16 16
17 17 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
18 18
19 19 This software may be used and distributed according to the terms
20 20 of the GNU General Public License, incorporated herein by reference.
21 21 */
22 22
23 23 #include <Python.h>
24 24 #include <stdlib.h>
25 25 #include <string.h>
26 26
27 27 #ifdef _WIN32
28 28 # ifdef _MSC_VER
29 29 /* msvc 6.0 has problems */
30 30 # define inline __inline
31 31 typedef unsigned long uint32_t;
32 32 # else
33 33 # include <stdint.h>
34 34 # endif
35 35 static uint32_t ntohl(uint32_t x)
36 36 {
37 37 return ((x & 0x000000ffUL) << 24) |
38 38 ((x & 0x0000ff00UL) << 8) |
39 39 ((x & 0x00ff0000UL) >> 8) |
40 40 ((x & 0xff000000UL) >> 24);
41 41 }
42 42 #else
43 43 /* not windows */
44 44 # include <sys/types.h>
45 45 # include <arpa/inet.h>
46 46 # include <inttypes.h>
47 47 #endif
48 48
49 49 static char mpatch_doc[] = "Efficient binary patching.";
50 50 static PyObject *mpatch_Error;
51 51
52 52 struct frag {
53 53 int start, end, len;
54 54 char *data;
55 55 };
56 56
57 57 struct flist {
58 58 struct frag *base, *head, *tail;
59 59 };
60 60
61 61 static struct flist *lalloc(int size)
62 62 {
63 63 struct flist *a = NULL;
64 64
65 if (size < 1)
66 size = 1;
67
65 68 a = (struct flist *)malloc(sizeof(struct flist));
66 69 if (a) {
67 70 a->base = (struct frag *)malloc(sizeof(struct frag) * size);
68 71 if (a->base) {
69 72 a->head = a->tail = a->base;
70 73 return a;
71 74 }
72 75 free(a);
73 76 a = NULL;
74 77 }
75 78 if (!PyErr_Occurred())
76 79 PyErr_NoMemory();
77 80 return NULL;
78 81 }
79 82
80 83 static void lfree(struct flist *a)
81 84 {
82 85 if (a) {
83 86 free(a->base);
84 87 free(a);
85 88 }
86 89 }
87 90
88 91 static int lsize(struct flist *a)
89 92 {
90 93 return a->tail - a->head;
91 94 }
92 95
93 96 /* move hunks in source that are less cut to dest, compensating
94 97 for changes in offset. the last hunk may be split if necessary.
95 98 */
96 99 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
97 100 {
98 101 struct frag *d = dest->tail, *s = src->head;
99 102 int postend, c, l;
100 103
101 104 while (s != src->tail) {
102 105 if (s->start + offset >= cut)
103 106 break; /* we've gone far enough */
104 107
105 108 postend = offset + s->start + s->len;
106 109 if (postend <= cut) {
107 110 /* save this hunk */
108 111 offset += s->start + s->len - s->end;
109 112 *d++ = *s++;
110 113 }
111 114 else {
112 115 /* break up this hunk */
113 116 c = cut - offset;
114 117 if (s->end < c)
115 118 c = s->end;
116 119 l = cut - offset - s->start;
117 120 if (s->len < l)
118 121 l = s->len;
119 122
120 123 offset += s->start + l - c;
121 124
122 125 d->start = s->start;
123 126 d->end = c;
124 127 d->len = l;
125 128 d->data = s->data;
126 129 d++;
127 130 s->start = c;
128 131 s->len = s->len - l;
129 132 s->data = s->data + l;
130 133
131 134 break;
132 135 }
133 136 }
134 137
135 138 dest->tail = d;
136 139 src->head = s;
137 140 return offset;
138 141 }
139 142
140 143 /* like gather, but with no output list */
141 144 static int discard(struct flist *src, int cut, int offset)
142 145 {
143 146 struct frag *s = src->head;
144 147 int postend, c, l;
145 148
146 149 while (s != src->tail) {
147 150 if (s->start + offset >= cut)
148 151 break;
149 152
150 153 postend = offset + s->start + s->len;
151 154 if (postend <= cut) {
152 155 offset += s->start + s->len - s->end;
153 156 s++;
154 157 }
155 158 else {
156 159 c = cut - offset;
157 160 if (s->end < c)
158 161 c = s->end;
159 162 l = cut - offset - s->start;
160 163 if (s->len < l)
161 164 l = s->len;
162 165
163 166 offset += s->start + l - c;
164 167 s->start = c;
165 168 s->len = s->len - l;
166 169 s->data = s->data + l;
167 170
168 171 break;
169 172 }
170 173 }
171 174
172 175 src->head = s;
173 176 return offset;
174 177 }
175 178
176 179 /* combine hunk lists a and b, while adjusting b for offset changes in a/
177 180 this deletes a and b and returns the resultant list. */
178 181 static struct flist *combine(struct flist *a, struct flist *b)
179 182 {
180 183 struct flist *c = NULL;
181 184 struct frag *bh, *ct;
182 185 int offset = 0, post;
183 186
184 187 if (a && b)
185 188 c = lalloc((lsize(a) + lsize(b)) * 2);
186 189
187 190 if (c) {
188 191
189 192 for (bh = b->head; bh != b->tail; bh++) {
190 193 /* save old hunks */
191 194 offset = gather(c, a, bh->start, offset);
192 195
193 196 /* discard replaced hunks */
194 197 post = discard(a, bh->end, offset);
195 198
196 199 /* insert new hunk */
197 200 ct = c->tail;
198 201 ct->start = bh->start - offset;
199 202 ct->end = bh->end - post;
200 203 ct->len = bh->len;
201 204 ct->data = bh->data;
202 205 c->tail++;
203 206 offset = post;
204 207 }
205 208
206 209 /* hold on to tail from a */
207 210 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
208 211 c->tail += lsize(a);
209 212 }
210 213
211 214 lfree(a);
212 215 lfree(b);
213 216 return c;
214 217 }
215 218
216 219 /* decode a binary patch into a hunk list */
217 220 static struct flist *decode(char *bin, int len)
218 221 {
219 222 struct flist *l;
220 223 struct frag *lt;
221 224 char *end = bin + len;
222 225 char decode[12]; /* for dealing with alignment issues */
223 226
224 227 /* assume worst case size, we won't have many of these lists */
225 228 l = lalloc(len / 12);
226 229 if (!l)
227 230 return NULL;
228 231
229 232 lt = l->tail;
230 233
231 234 while (bin < end) {
232 235 memcpy(decode, bin, 12);
233 236 lt->start = ntohl(*(uint32_t *)decode);
234 237 lt->end = ntohl(*(uint32_t *)(decode + 4));
235 238 lt->len = ntohl(*(uint32_t *)(decode + 8));
236 239 lt->data = bin + 12;
237 240 bin += 12 + lt->len;
238 241 lt++;
239 242 }
240 243
241 244 if (bin != end) {
242 245 if (!PyErr_Occurred())
243 246 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
244 247 lfree(l);
245 248 return NULL;
246 249 }
247 250
248 251 l->tail = lt;
249 252 return l;
250 253 }
251 254
252 255 /* calculate the size of resultant text */
253 256 static int calcsize(int len, struct flist *l)
254 257 {
255 258 int outlen = 0, last = 0;
256 259 struct frag *f = l->head;
257 260
258 261 while (f != l->tail) {
259 262 if (f->start < last || f->end > len) {
260 263 if (!PyErr_Occurred())
261 264 PyErr_SetString(mpatch_Error,
262 265 "invalid patch");
263 266 return -1;
264 267 }
265 268 outlen += f->start - last;
266 269 last = f->end;
267 270 outlen += f->len;
268 271 f++;
269 272 }
270 273
271 274 outlen += len - last;
272 275 return outlen;
273 276 }
274 277
275 278 static int apply(char *buf, char *orig, int len, struct flist *l)
276 279 {
277 280 struct frag *f = l->head;
278 281 int last = 0;
279 282 char *p = buf;
280 283
281 284 while (f != l->tail) {
282 285 if (f->start < last || f->end > len) {
283 286 if (!PyErr_Occurred())
284 287 PyErr_SetString(mpatch_Error,
285 288 "invalid patch");
286 289 return 0;
287 290 }
288 291 memcpy(p, orig + last, f->start - last);
289 292 p += f->start - last;
290 293 memcpy(p, f->data, f->len);
291 294 last = f->end;
292 295 p += f->len;
293 296 f++;
294 297 }
295 298 memcpy(p, orig + last, len - last);
296 299 return 1;
297 300 }
298 301
299 302 /* recursively generate a patch of all bins between start and end */
300 303 static struct flist *fold(PyObject *bins, int start, int end)
301 304 {
302 305 int len;
303 306
304 307 if (start + 1 == end) {
305 308 /* trivial case, output a decoded list */
306 309 PyObject *tmp = PyList_GetItem(bins, start);
307 310 if (!tmp)
308 311 return NULL;
309 312 return decode(PyString_AsString(tmp), PyString_Size(tmp));
310 313 }
311 314
312 315 /* divide and conquer, memory management is elsewhere */
313 316 len = (end - start) / 2;
314 317 return combine(fold(bins, start, start + len),
315 318 fold(bins, start + len, end));
316 319 }
317 320
318 321 static PyObject *
319 322 patches(PyObject *self, PyObject *args)
320 323 {
321 324 PyObject *text, *bins, *result;
322 325 struct flist *patch;
323 326 char *in, *out;
324 327 int len, outlen;
325 328
326 329 if (!PyArg_ParseTuple(args, "SO:mpatch", &text, &bins))
327 330 return NULL;
328 331
329 332 len = PyList_Size(bins);
330 333 if (!len) {
331 334 /* nothing to do */
332 335 Py_INCREF(text);
333 336 return text;
334 337 }
335 338
336 339 patch = fold(bins, 0, len);
337 340 if (!patch)
338 341 return NULL;
339 342
340 343 outlen = calcsize(PyString_Size(text), patch);
341 344 if (outlen < 0) {
342 345 result = NULL;
343 346 goto cleanup;
344 347 }
345 348 result = PyString_FromStringAndSize(NULL, outlen);
346 349 if (!result) {
347 350 result = NULL;
348 351 goto cleanup;
349 352 }
350 353 in = PyString_AsString(text);
351 354 out = PyString_AsString(result);
352 355 if (!apply(out, in, PyString_Size(text), patch)) {
353 356 Py_DECREF(result);
354 357 result = NULL;
355 358 }
356 359 cleanup:
357 360 lfree(patch);
358 361 return result;
359 362 }
360 363
361 364 /* calculate size of a patched file directly */
362 365 static PyObject *
363 366 patchedsize(PyObject *self, PyObject *args)
364 367 {
365 368 long orig, start, end, len, outlen = 0, last = 0;
366 369 int patchlen;
367 370 char *bin, *binend;
368 371 char decode[12]; /* for dealing with alignment issues */
369 372
370 373 if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen))
371 374 return NULL;
372 375
373 376 binend = bin + patchlen;
374 377
375 378 while (bin < binend) {
376 379 memcpy(decode, bin, 12);
377 380 start = ntohl(*(uint32_t *)decode);
378 381 end = ntohl(*(uint32_t *)(decode + 4));
379 382 len = ntohl(*(uint32_t *)(decode + 8));
380 383 bin += 12 + len;
381 384 outlen += start - last;
382 385 last = end;
383 386 outlen += len;
384 387 }
385 388
386 389 if (bin != binend) {
387 390 if (!PyErr_Occurred())
388 391 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
389 392 return NULL;
390 393 }
391 394
392 395 outlen += orig - last;
393 396 return Py_BuildValue("l", outlen);
394 397 }
395 398
396 399 static PyMethodDef methods[] = {
397 400 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
398 401 {"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"},
399 402 {NULL, NULL}
400 403 };
401 404
402 405 PyMODINIT_FUNC
403 406 initmpatch(void)
404 407 {
405 408 Py_InitModule3("mpatch", methods, mpatch_doc);
406 409 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
407 410 }
408 411
General Comments 0
You need to be logged in to leave comments. Login now