##// END OF EJS Templates
merge with stable
Thomas Arendsen Hein -
r4377:4759da3e merge default
parent child Browse files
Show More
@@ -1,415 +1,426 b''
1 /*
1 /*
2 mpatch.c - efficient binary patching for Mercurial
2 mpatch.c - efficient binary patching for Mercurial
3
3
4 This implements a patch algorithm that's O(m + nlog n) where m is the
4 This implements a patch algorithm that's O(m + nlog n) where m is the
5 size of the output and n is the number of patches.
5 size of the output and n is the number of patches.
6
6
7 Given a list of binary patches, it unpacks each into a hunk list,
7 Given a list of binary patches, it unpacks each into a hunk list,
8 then combines the hunk lists with a treewise recursion to form a
8 then combines the hunk lists with a treewise recursion to form a
9 single hunk list. This hunk list is then applied to the original
9 single hunk list. This hunk list is then applied to the original
10 text.
10 text.
11
11
12 The text (or binary) fragments are copied directly from their source
12 The text (or binary) fragments are copied directly from their source
13 Python objects into a preallocated output string to avoid the
13 Python objects into a preallocated output string to avoid the
14 allocation of intermediate Python objects. Working memory is about 2x
14 allocation of intermediate Python objects. Working memory is about 2x
15 the total number of hunks.
15 the total number of hunks.
16
16
17 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
17 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
18
18
19 This software may be used and distributed according to the terms
19 This software may be used and distributed according to the terms
20 of the GNU General Public License, incorporated herein by reference.
20 of the GNU General Public License, incorporated herein by reference.
21 */
21 */
22
22
23 #include <Python.h>
23 #include <Python.h>
24 #include <stdlib.h>
24 #include <stdlib.h>
25 #include <string.h>
25 #include <string.h>
26
26
27 #ifdef _WIN32
27 #ifdef _WIN32
28 # ifdef _MSC_VER
28 # ifdef _MSC_VER
29 /* msvc 6.0 has problems */
29 /* msvc 6.0 has problems */
30 # define inline __inline
30 # define inline __inline
31 typedef unsigned long uint32_t;
31 typedef unsigned long uint32_t;
32 # else
32 # else
33 # include <stdint.h>
33 # include <stdint.h>
34 # endif
34 # endif
35 static uint32_t ntohl(uint32_t x)
35 static uint32_t ntohl(uint32_t x)
36 {
36 {
37 return ((x & 0x000000ffUL) << 24) |
37 return ((x & 0x000000ffUL) << 24) |
38 ((x & 0x0000ff00UL) << 8) |
38 ((x & 0x0000ff00UL) << 8) |
39 ((x & 0x00ff0000UL) >> 8) |
39 ((x & 0x00ff0000UL) >> 8) |
40 ((x & 0xff000000UL) >> 24);
40 ((x & 0xff000000UL) >> 24);
41 }
41 }
42 #else
42 #else
43 /* not windows */
43 /* not windows */
44 # include <sys/types.h>
44 # include <sys/types.h>
45 # ifdef __BEOS__
45 # ifdef __BEOS__
46 # include <ByteOrder.h>
46 # include <ByteOrder.h>
47 # else
47 # else
48 # include <arpa/inet.h>
48 # include <arpa/inet.h>
49 # endif
49 # endif
50 # include <inttypes.h>
50 # include <inttypes.h>
51 #endif
51 #endif
52
52
53 static char mpatch_doc[] = "Efficient binary patching.";
53 static char mpatch_doc[] = "Efficient binary patching.";
54 static PyObject *mpatch_Error;
54 static PyObject *mpatch_Error;
55
55
56 struct frag {
56 struct frag {
57 int start, end, len;
57 int start, end, len;
58 char *data;
58 char *data;
59 };
59 };
60
60
61 struct flist {
61 struct flist {
62 struct frag *base, *head, *tail;
62 struct frag *base, *head, *tail;
63 };
63 };
64
64
65 static struct flist *lalloc(int size)
65 static struct flist *lalloc(int size)
66 {
66 {
67 struct flist *a = NULL;
67 struct flist *a = NULL;
68
68
69 if (size < 1)
69 if (size < 1)
70 size = 1;
70 size = 1;
71
71
72 a = (struct flist *)malloc(sizeof(struct flist));
72 a = (struct flist *)malloc(sizeof(struct flist));
73 if (a) {
73 if (a) {
74 a->base = (struct frag *)malloc(sizeof(struct frag) * size);
74 a->base = (struct frag *)malloc(sizeof(struct frag) * size);
75 if (a->base) {
75 if (a->base) {
76 a->head = a->tail = a->base;
76 a->head = a->tail = a->base;
77 return a;
77 return a;
78 }
78 }
79 free(a);
79 free(a);
80 a = NULL;
80 a = NULL;
81 }
81 }
82 if (!PyErr_Occurred())
82 if (!PyErr_Occurred())
83 PyErr_NoMemory();
83 PyErr_NoMemory();
84 return NULL;
84 return NULL;
85 }
85 }
86
86
87 static void lfree(struct flist *a)
87 static void lfree(struct flist *a)
88 {
88 {
89 if (a) {
89 if (a) {
90 free(a->base);
90 free(a->base);
91 free(a);
91 free(a);
92 }
92 }
93 }
93 }
94
94
95 static int lsize(struct flist *a)
95 static int lsize(struct flist *a)
96 {
96 {
97 return a->tail - a->head;
97 return a->tail - a->head;
98 }
98 }
99
99
100 /* move hunks in source that are less cut to dest, compensating
100 /* move hunks in source that are less cut to dest, compensating
101 for changes in offset. the last hunk may be split if necessary.
101 for changes in offset. the last hunk may be split if necessary.
102 */
102 */
103 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
103 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
104 {
104 {
105 struct frag *d = dest->tail, *s = src->head;
105 struct frag *d = dest->tail, *s = src->head;
106 int postend, c, l;
106 int postend, c, l;
107
107
108 while (s != src->tail) {
108 while (s != src->tail) {
109 if (s->start + offset >= cut)
109 if (s->start + offset >= cut)
110 break; /* we've gone far enough */
110 break; /* we've gone far enough */
111
111
112 postend = offset + s->start + s->len;
112 postend = offset + s->start + s->len;
113 if (postend <= cut) {
113 if (postend <= cut) {
114 /* save this hunk */
114 /* save this hunk */
115 offset += s->start + s->len - s->end;
115 offset += s->start + s->len - s->end;
116 *d++ = *s++;
116 *d++ = *s++;
117 }
117 }
118 else {
118 else {
119 /* break up this hunk */
119 /* break up this hunk */
120 c = cut - offset;
120 c = cut - offset;
121 if (s->end < c)
121 if (s->end < c)
122 c = s->end;
122 c = s->end;
123 l = cut - offset - s->start;
123 l = cut - offset - s->start;
124 if (s->len < l)
124 if (s->len < l)
125 l = s->len;
125 l = s->len;
126
126
127 offset += s->start + l - c;
127 offset += s->start + l - c;
128
128
129 d->start = s->start;
129 d->start = s->start;
130 d->end = c;
130 d->end = c;
131 d->len = l;
131 d->len = l;
132 d->data = s->data;
132 d->data = s->data;
133 d++;
133 d++;
134 s->start = c;
134 s->start = c;
135 s->len = s->len - l;
135 s->len = s->len - l;
136 s->data = s->data + l;
136 s->data = s->data + l;
137
137
138 break;
138 break;
139 }
139 }
140 }
140 }
141
141
142 dest->tail = d;
142 dest->tail = d;
143 src->head = s;
143 src->head = s;
144 return offset;
144 return offset;
145 }
145 }
146
146
147 /* like gather, but with no output list */
147 /* like gather, but with no output list */
148 static int discard(struct flist *src, int cut, int offset)
148 static int discard(struct flist *src, int cut, int offset)
149 {
149 {
150 struct frag *s = src->head;
150 struct frag *s = src->head;
151 int postend, c, l;
151 int postend, c, l;
152
152
153 while (s != src->tail) {
153 while (s != src->tail) {
154 if (s->start + offset >= cut)
154 if (s->start + offset >= cut)
155 break;
155 break;
156
156
157 postend = offset + s->start + s->len;
157 postend = offset + s->start + s->len;
158 if (postend <= cut) {
158 if (postend <= cut) {
159 offset += s->start + s->len - s->end;
159 offset += s->start + s->len - s->end;
160 s++;
160 s++;
161 }
161 }
162 else {
162 else {
163 c = cut - offset;
163 c = cut - offset;
164 if (s->end < c)
164 if (s->end < c)
165 c = s->end;
165 c = s->end;
166 l = cut - offset - s->start;
166 l = cut - offset - s->start;
167 if (s->len < l)
167 if (s->len < l)
168 l = s->len;
168 l = s->len;
169
169
170 offset += s->start + l - c;
170 offset += s->start + l - c;
171 s->start = c;
171 s->start = c;
172 s->len = s->len - l;
172 s->len = s->len - l;
173 s->data = s->data + l;
173 s->data = s->data + l;
174
174
175 break;
175 break;
176 }
176 }
177 }
177 }
178
178
179 src->head = s;
179 src->head = s;
180 return offset;
180 return offset;
181 }
181 }
182
182
183 /* combine hunk lists a and b, while adjusting b for offset changes in a/
183 /* combine hunk lists a and b, while adjusting b for offset changes in a/
184 this deletes a and b and returns the resultant list. */
184 this deletes a and b and returns the resultant list. */
185 static struct flist *combine(struct flist *a, struct flist *b)
185 static struct flist *combine(struct flist *a, struct flist *b)
186 {
186 {
187 struct flist *c = NULL;
187 struct flist *c = NULL;
188 struct frag *bh, *ct;
188 struct frag *bh, *ct;
189 int offset = 0, post;
189 int offset = 0, post;
190
190
191 if (a && b)
191 if (a && b)
192 c = lalloc((lsize(a) + lsize(b)) * 2);
192 c = lalloc((lsize(a) + lsize(b)) * 2);
193
193
194 if (c) {
194 if (c) {
195
195
196 for (bh = b->head; bh != b->tail; bh++) {
196 for (bh = b->head; bh != b->tail; bh++) {
197 /* save old hunks */
197 /* save old hunks */
198 offset = gather(c, a, bh->start, offset);
198 offset = gather(c, a, bh->start, offset);
199
199
200 /* discard replaced hunks */
200 /* discard replaced hunks */
201 post = discard(a, bh->end, offset);
201 post = discard(a, bh->end, offset);
202
202
203 /* insert new hunk */
203 /* insert new hunk */
204 ct = c->tail;
204 ct = c->tail;
205 ct->start = bh->start - offset;
205 ct->start = bh->start - offset;
206 ct->end = bh->end - post;
206 ct->end = bh->end - post;
207 ct->len = bh->len;
207 ct->len = bh->len;
208 ct->data = bh->data;
208 ct->data = bh->data;
209 c->tail++;
209 c->tail++;
210 offset = post;
210 offset = post;
211 }
211 }
212
212
213 /* hold on to tail from a */
213 /* hold on to tail from a */
214 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
214 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
215 c->tail += lsize(a);
215 c->tail += lsize(a);
216 }
216 }
217
217
218 lfree(a);
218 lfree(a);
219 lfree(b);
219 lfree(b);
220 return c;
220 return c;
221 }
221 }
222
222
223 /* decode a binary patch into a hunk list */
223 /* decode a binary patch into a hunk list */
224 static struct flist *decode(char *bin, int len)
224 static struct flist *decode(char *bin, int len)
225 {
225 {
226 struct flist *l;
226 struct flist *l;
227 struct frag *lt;
227 struct frag *lt;
228 char *end = bin + len;
228 char *data = bin + 12, *end = bin + len;
229 char decode[12]; /* for dealing with alignment issues */
229 char decode[12]; /* for dealing with alignment issues */
230
230
231 /* assume worst case size, we won't have many of these lists */
231 /* assume worst case size, we won't have many of these lists */
232 l = lalloc(len / 12);
232 l = lalloc(len / 12);
233 if (!l)
233 if (!l)
234 return NULL;
234 return NULL;
235
235
236 lt = l->tail;
236 lt = l->tail;
237
237
238 while (bin < end) {
238 while (data <= end) {
239 memcpy(decode, bin, 12);
239 memcpy(decode, bin, 12);
240 lt->start = ntohl(*(uint32_t *)decode);
240 lt->start = ntohl(*(uint32_t *)decode);
241 lt->end = ntohl(*(uint32_t *)(decode + 4));
241 lt->end = ntohl(*(uint32_t *)(decode + 4));
242 lt->len = ntohl(*(uint32_t *)(decode + 8));
242 lt->len = ntohl(*(uint32_t *)(decode + 8));
243 lt->data = bin + 12;
243 if (lt->start > lt->end)
244 bin += 12 + lt->len;
244 break; /* sanity check */
245 bin = data + lt->len;
246 if (bin < data)
247 break; /* big data + big (bogus) len can wrap around */
248 lt->data = data;
249 data = bin + 12;
245 lt++;
250 lt++;
246 }
251 }
247
252
248 if (bin != end) {
253 if (bin != end) {
249 if (!PyErr_Occurred())
254 if (!PyErr_Occurred())
250 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
255 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
251 lfree(l);
256 lfree(l);
252 return NULL;
257 return NULL;
253 }
258 }
254
259
255 l->tail = lt;
260 l->tail = lt;
256 return l;
261 return l;
257 }
262 }
258
263
259 /* calculate the size of resultant text */
264 /* calculate the size of resultant text */
260 static int calcsize(int len, struct flist *l)
265 static int calcsize(int len, struct flist *l)
261 {
266 {
262 int outlen = 0, last = 0;
267 int outlen = 0, last = 0;
263 struct frag *f = l->head;
268 struct frag *f = l->head;
264
269
265 while (f != l->tail) {
270 while (f != l->tail) {
266 if (f->start < last || f->end > len) {
271 if (f->start < last || f->end > len) {
267 if (!PyErr_Occurred())
272 if (!PyErr_Occurred())
268 PyErr_SetString(mpatch_Error,
273 PyErr_SetString(mpatch_Error,
269 "invalid patch");
274 "invalid patch");
270 return -1;
275 return -1;
271 }
276 }
272 outlen += f->start - last;
277 outlen += f->start - last;
273 last = f->end;
278 last = f->end;
274 outlen += f->len;
279 outlen += f->len;
275 f++;
280 f++;
276 }
281 }
277
282
278 outlen += len - last;
283 outlen += len - last;
279 return outlen;
284 return outlen;
280 }
285 }
281
286
282 static int apply(char *buf, char *orig, int len, struct flist *l)
287 static int apply(char *buf, char *orig, int len, struct flist *l)
283 {
288 {
284 struct frag *f = l->head;
289 struct frag *f = l->head;
285 int last = 0;
290 int last = 0;
286 char *p = buf;
291 char *p = buf;
287
292
288 while (f != l->tail) {
293 while (f != l->tail) {
289 if (f->start < last || f->end > len) {
294 if (f->start < last || f->end > len) {
290 if (!PyErr_Occurred())
295 if (!PyErr_Occurred())
291 PyErr_SetString(mpatch_Error,
296 PyErr_SetString(mpatch_Error,
292 "invalid patch");
297 "invalid patch");
293 return 0;
298 return 0;
294 }
299 }
295 memcpy(p, orig + last, f->start - last);
300 memcpy(p, orig + last, f->start - last);
296 p += f->start - last;
301 p += f->start - last;
297 memcpy(p, f->data, f->len);
302 memcpy(p, f->data, f->len);
298 last = f->end;
303 last = f->end;
299 p += f->len;
304 p += f->len;
300 f++;
305 f++;
301 }
306 }
302 memcpy(p, orig + last, len - last);
307 memcpy(p, orig + last, len - last);
303 return 1;
308 return 1;
304 }
309 }
305
310
306 /* recursively generate a patch of all bins between start and end */
311 /* recursively generate a patch of all bins between start and end */
307 static struct flist *fold(PyObject *bins, int start, int end)
312 static struct flist *fold(PyObject *bins, int start, int end)
308 {
313 {
309 int len;
314 int len;
310
315
311 if (start + 1 == end) {
316 if (start + 1 == end) {
312 /* trivial case, output a decoded list */
317 /* trivial case, output a decoded list */
313 PyObject *tmp = PyList_GetItem(bins, start);
318 PyObject *tmp = PyList_GetItem(bins, start);
314 if (!tmp)
319 if (!tmp)
315 return NULL;
320 return NULL;
316 return decode(PyString_AsString(tmp), PyString_Size(tmp));
321 return decode(PyString_AsString(tmp), PyString_Size(tmp));
317 }
322 }
318
323
319 /* divide and conquer, memory management is elsewhere */
324 /* divide and conquer, memory management is elsewhere */
320 len = (end - start) / 2;
325 len = (end - start) / 2;
321 return combine(fold(bins, start, start + len),
326 return combine(fold(bins, start, start + len),
322 fold(bins, start + len, end));
327 fold(bins, start + len, end));
323 }
328 }
324
329
325 static PyObject *
330 static PyObject *
326 patches(PyObject *self, PyObject *args)
331 patches(PyObject *self, PyObject *args)
327 {
332 {
328 PyObject *text, *bins, *result;
333 PyObject *text, *bins, *result;
329 struct flist *patch;
334 struct flist *patch;
330 char *in, *out;
335 char *in, *out;
331 int len, outlen;
336 int len, outlen;
332
337
333 if (!PyArg_ParseTuple(args, "SO:mpatch", &text, &bins))
338 if (!PyArg_ParseTuple(args, "SO:mpatch", &text, &bins))
334 return NULL;
339 return NULL;
335
340
336 len = PyList_Size(bins);
341 len = PyList_Size(bins);
337 if (!len) {
342 if (!len) {
338 /* nothing to do */
343 /* nothing to do */
339 Py_INCREF(text);
344 Py_INCREF(text);
340 return text;
345 return text;
341 }
346 }
342
347
343 patch = fold(bins, 0, len);
348 patch = fold(bins, 0, len);
344 if (!patch)
349 if (!patch)
345 return NULL;
350 return NULL;
346
351
347 outlen = calcsize(PyString_Size(text), patch);
352 outlen = calcsize(PyString_Size(text), patch);
348 if (outlen < 0) {
353 if (outlen < 0) {
349 result = NULL;
354 result = NULL;
350 goto cleanup;
355 goto cleanup;
351 }
356 }
352 result = PyString_FromStringAndSize(NULL, outlen);
357 result = PyString_FromStringAndSize(NULL, outlen);
353 if (!result) {
358 if (!result) {
354 result = NULL;
359 result = NULL;
355 goto cleanup;
360 goto cleanup;
356 }
361 }
357 in = PyString_AsString(text);
362 in = PyString_AsString(text);
358 out = PyString_AsString(result);
363 out = PyString_AsString(result);
359 if (!apply(out, in, PyString_Size(text), patch)) {
364 if (!apply(out, in, PyString_Size(text), patch)) {
360 Py_DECREF(result);
365 Py_DECREF(result);
361 result = NULL;
366 result = NULL;
362 }
367 }
363 cleanup:
368 cleanup:
364 lfree(patch);
369 lfree(patch);
365 return result;
370 return result;
366 }
371 }
367
372
368 /* calculate size of a patched file directly */
373 /* calculate size of a patched file directly */
369 static PyObject *
374 static PyObject *
370 patchedsize(PyObject *self, PyObject *args)
375 patchedsize(PyObject *self, PyObject *args)
371 {
376 {
372 long orig, start, end, len, outlen = 0, last = 0;
377 long orig, start, end, len, outlen = 0, last = 0;
373 int patchlen;
378 int patchlen;
374 char *bin, *binend;
379 char *bin, *binend, *data;
375 char decode[12]; /* for dealing with alignment issues */
380 char decode[12]; /* for dealing with alignment issues */
376
381
377 if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen))
382 if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen))
378 return NULL;
383 return NULL;
379
384
380 binend = bin + patchlen;
385 binend = bin + patchlen;
386 data = bin + 12;
381
387
382 while (bin < binend) {
388 while (data <= binend) {
383 memcpy(decode, bin, 12);
389 memcpy(decode, bin, 12);
384 start = ntohl(*(uint32_t *)decode);
390 start = ntohl(*(uint32_t *)decode);
385 end = ntohl(*(uint32_t *)(decode + 4));
391 end = ntohl(*(uint32_t *)(decode + 4));
386 len = ntohl(*(uint32_t *)(decode + 8));
392 len = ntohl(*(uint32_t *)(decode + 8));
387 bin += 12 + len;
393 if (start > end)
394 break; /* sanity check */
395 bin = data + len;
396 if (bin < data)
397 break; /* big data + big (bogus) len can wrap around */
398 data = bin + 12;
388 outlen += start - last;
399 outlen += start - last;
389 last = end;
400 last = end;
390 outlen += len;
401 outlen += len;
391 }
402 }
392
403
393 if (bin != binend) {
404 if (bin != binend) {
394 if (!PyErr_Occurred())
405 if (!PyErr_Occurred())
395 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
406 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
396 return NULL;
407 return NULL;
397 }
408 }
398
409
399 outlen += orig - last;
410 outlen += orig - last;
400 return Py_BuildValue("l", outlen);
411 return Py_BuildValue("l", outlen);
401 }
412 }
402
413
403 static PyMethodDef methods[] = {
414 static PyMethodDef methods[] = {
404 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
415 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
405 {"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"},
416 {"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"},
406 {NULL, NULL}
417 {NULL, NULL}
407 };
418 };
408
419
409 PyMODINIT_FUNC
420 PyMODINIT_FUNC
410 initmpatch(void)
421 initmpatch(void)
411 {
422 {
412 Py_InitModule3("mpatch", methods, mpatch_doc);
423 Py_InitModule3("mpatch", methods, mpatch_doc);
413 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
424 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
414 }
425 }
415
426
General Comments 0
You need to be logged in to leave comments. Login now