##// END OF EJS Templates
mpatch: allow buffer objects for input
Matt Mackall -
r5444:a0952e4e default
parent child Browse files
Show More
@@ -1,426 +1,434
1 /*
1 /*
2 mpatch.c - efficient binary patching for Mercurial
2 mpatch.c - efficient binary patching for Mercurial
3
3
4 This implements a patch algorithm that's O(m + nlog n) where m is the
4 This implements a patch algorithm that's O(m + nlog n) where m is the
5 size of the output and n is the number of patches.
5 size of the output and n is the number of patches.
6
6
7 Given a list of binary patches, it unpacks each into a hunk list,
7 Given a list of binary patches, it unpacks each into a hunk list,
8 then combines the hunk lists with a treewise recursion to form a
8 then combines the hunk lists with a treewise recursion to form a
9 single hunk list. This hunk list is then applied to the original
9 single hunk list. This hunk list is then applied to the original
10 text.
10 text.
11
11
12 The text (or binary) fragments are copied directly from their source
12 The text (or binary) fragments are copied directly from their source
13 Python objects into a preallocated output string to avoid the
13 Python objects into a preallocated output string to avoid the
14 allocation of intermediate Python objects. Working memory is about 2x
14 allocation of intermediate Python objects. Working memory is about 2x
15 the total number of hunks.
15 the total number of hunks.
16
16
17 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
17 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
18
18
19 This software may be used and distributed according to the terms
19 This software may be used and distributed according to the terms
20 of the GNU General Public License, incorporated herein by reference.
20 of the GNU General Public License, incorporated herein by reference.
21 */
21 */
22
22
23 #include <Python.h>
23 #include <Python.h>
24 #include <stdlib.h>
24 #include <stdlib.h>
25 #include <string.h>
25 #include <string.h>
26
26
27 #ifdef _WIN32
27 #ifdef _WIN32
28 # ifdef _MSC_VER
28 # ifdef _MSC_VER
29 /* msvc 6.0 has problems */
29 /* msvc 6.0 has problems */
30 # define inline __inline
30 # define inline __inline
31 typedef unsigned long uint32_t;
31 typedef unsigned long uint32_t;
32 # else
32 # else
33 # include <stdint.h>
33 # include <stdint.h>
34 # endif
34 # endif
35 static uint32_t ntohl(uint32_t x)
35 static uint32_t ntohl(uint32_t x)
36 {
36 {
37 return ((x & 0x000000ffUL) << 24) |
37 return ((x & 0x000000ffUL) << 24) |
38 ((x & 0x0000ff00UL) << 8) |
38 ((x & 0x0000ff00UL) << 8) |
39 ((x & 0x00ff0000UL) >> 8) |
39 ((x & 0x00ff0000UL) >> 8) |
40 ((x & 0xff000000UL) >> 24);
40 ((x & 0xff000000UL) >> 24);
41 }
41 }
42 #else
42 #else
43 /* not windows */
43 /* not windows */
44 # include <sys/types.h>
44 # include <sys/types.h>
45 # ifdef __BEOS__
45 # ifdef __BEOS__
46 # include <ByteOrder.h>
46 # include <ByteOrder.h>
47 # else
47 # else
48 # include <arpa/inet.h>
48 # include <arpa/inet.h>
49 # endif
49 # endif
50 # include <inttypes.h>
50 # include <inttypes.h>
51 #endif
51 #endif
52
52
53 static char mpatch_doc[] = "Efficient binary patching.";
53 static char mpatch_doc[] = "Efficient binary patching.";
54 static PyObject *mpatch_Error;
54 static PyObject *mpatch_Error;
55
55
56 struct frag {
56 struct frag {
57 int start, end, len;
57 int start, end, len;
58 char *data;
58 const char *data;
59 };
59 };
60
60
61 struct flist {
61 struct flist {
62 struct frag *base, *head, *tail;
62 struct frag *base, *head, *tail;
63 };
63 };
64
64
65 static struct flist *lalloc(int size)
65 static struct flist *lalloc(int size)
66 {
66 {
67 struct flist *a = NULL;
67 struct flist *a = NULL;
68
68
69 if (size < 1)
69 if (size < 1)
70 size = 1;
70 size = 1;
71
71
72 a = (struct flist *)malloc(sizeof(struct flist));
72 a = (struct flist *)malloc(sizeof(struct flist));
73 if (a) {
73 if (a) {
74 a->base = (struct frag *)malloc(sizeof(struct frag) * size);
74 a->base = (struct frag *)malloc(sizeof(struct frag) * size);
75 if (a->base) {
75 if (a->base) {
76 a->head = a->tail = a->base;
76 a->head = a->tail = a->base;
77 return a;
77 return a;
78 }
78 }
79 free(a);
79 free(a);
80 a = NULL;
80 a = NULL;
81 }
81 }
82 if (!PyErr_Occurred())
82 if (!PyErr_Occurred())
83 PyErr_NoMemory();
83 PyErr_NoMemory();
84 return NULL;
84 return NULL;
85 }
85 }
86
86
87 static void lfree(struct flist *a)
87 static void lfree(struct flist *a)
88 {
88 {
89 if (a) {
89 if (a) {
90 free(a->base);
90 free(a->base);
91 free(a);
91 free(a);
92 }
92 }
93 }
93 }
94
94
95 static int lsize(struct flist *a)
95 static int lsize(struct flist *a)
96 {
96 {
97 return a->tail - a->head;
97 return a->tail - a->head;
98 }
98 }
99
99
100 /* move hunks in source that are less cut to dest, compensating
100 /* move hunks in source that are less cut to dest, compensating
101 for changes in offset. the last hunk may be split if necessary.
101 for changes in offset. the last hunk may be split if necessary.
102 */
102 */
103 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
103 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
104 {
104 {
105 struct frag *d = dest->tail, *s = src->head;
105 struct frag *d = dest->tail, *s = src->head;
106 int postend, c, l;
106 int postend, c, l;
107
107
108 while (s != src->tail) {
108 while (s != src->tail) {
109 if (s->start + offset >= cut)
109 if (s->start + offset >= cut)
110 break; /* we've gone far enough */
110 break; /* we've gone far enough */
111
111
112 postend = offset + s->start + s->len;
112 postend = offset + s->start + s->len;
113 if (postend <= cut) {
113 if (postend <= cut) {
114 /* save this hunk */
114 /* save this hunk */
115 offset += s->start + s->len - s->end;
115 offset += s->start + s->len - s->end;
116 *d++ = *s++;
116 *d++ = *s++;
117 }
117 }
118 else {
118 else {
119 /* break up this hunk */
119 /* break up this hunk */
120 c = cut - offset;
120 c = cut - offset;
121 if (s->end < c)
121 if (s->end < c)
122 c = s->end;
122 c = s->end;
123 l = cut - offset - s->start;
123 l = cut - offset - s->start;
124 if (s->len < l)
124 if (s->len < l)
125 l = s->len;
125 l = s->len;
126
126
127 offset += s->start + l - c;
127 offset += s->start + l - c;
128
128
129 d->start = s->start;
129 d->start = s->start;
130 d->end = c;
130 d->end = c;
131 d->len = l;
131 d->len = l;
132 d->data = s->data;
132 d->data = s->data;
133 d++;
133 d++;
134 s->start = c;
134 s->start = c;
135 s->len = s->len - l;
135 s->len = s->len - l;
136 s->data = s->data + l;
136 s->data = s->data + l;
137
137
138 break;
138 break;
139 }
139 }
140 }
140 }
141
141
142 dest->tail = d;
142 dest->tail = d;
143 src->head = s;
143 src->head = s;
144 return offset;
144 return offset;
145 }
145 }
146
146
147 /* like gather, but with no output list */
147 /* like gather, but with no output list */
148 static int discard(struct flist *src, int cut, int offset)
148 static int discard(struct flist *src, int cut, int offset)
149 {
149 {
150 struct frag *s = src->head;
150 struct frag *s = src->head;
151 int postend, c, l;
151 int postend, c, l;
152
152
153 while (s != src->tail) {
153 while (s != src->tail) {
154 if (s->start + offset >= cut)
154 if (s->start + offset >= cut)
155 break;
155 break;
156
156
157 postend = offset + s->start + s->len;
157 postend = offset + s->start + s->len;
158 if (postend <= cut) {
158 if (postend <= cut) {
159 offset += s->start + s->len - s->end;
159 offset += s->start + s->len - s->end;
160 s++;
160 s++;
161 }
161 }
162 else {
162 else {
163 c = cut - offset;
163 c = cut - offset;
164 if (s->end < c)
164 if (s->end < c)
165 c = s->end;
165 c = s->end;
166 l = cut - offset - s->start;
166 l = cut - offset - s->start;
167 if (s->len < l)
167 if (s->len < l)
168 l = s->len;
168 l = s->len;
169
169
170 offset += s->start + l - c;
170 offset += s->start + l - c;
171 s->start = c;
171 s->start = c;
172 s->len = s->len - l;
172 s->len = s->len - l;
173 s->data = s->data + l;
173 s->data = s->data + l;
174
174
175 break;
175 break;
176 }
176 }
177 }
177 }
178
178
179 src->head = s;
179 src->head = s;
180 return offset;
180 return offset;
181 }
181 }
182
182
183 /* combine hunk lists a and b, while adjusting b for offset changes in a/
183 /* combine hunk lists a and b, while adjusting b for offset changes in a/
184 this deletes a and b and returns the resultant list. */
184 this deletes a and b and returns the resultant list. */
185 static struct flist *combine(struct flist *a, struct flist *b)
185 static struct flist *combine(struct flist *a, struct flist *b)
186 {
186 {
187 struct flist *c = NULL;
187 struct flist *c = NULL;
188 struct frag *bh, *ct;
188 struct frag *bh, *ct;
189 int offset = 0, post;
189 int offset = 0, post;
190
190
191 if (a && b)
191 if (a && b)
192 c = lalloc((lsize(a) + lsize(b)) * 2);
192 c = lalloc((lsize(a) + lsize(b)) * 2);
193
193
194 if (c) {
194 if (c) {
195
195
196 for (bh = b->head; bh != b->tail; bh++) {
196 for (bh = b->head; bh != b->tail; bh++) {
197 /* save old hunks */
197 /* save old hunks */
198 offset = gather(c, a, bh->start, offset);
198 offset = gather(c, a, bh->start, offset);
199
199
200 /* discard replaced hunks */
200 /* discard replaced hunks */
201 post = discard(a, bh->end, offset);
201 post = discard(a, bh->end, offset);
202
202
203 /* insert new hunk */
203 /* insert new hunk */
204 ct = c->tail;
204 ct = c->tail;
205 ct->start = bh->start - offset;
205 ct->start = bh->start - offset;
206 ct->end = bh->end - post;
206 ct->end = bh->end - post;
207 ct->len = bh->len;
207 ct->len = bh->len;
208 ct->data = bh->data;
208 ct->data = bh->data;
209 c->tail++;
209 c->tail++;
210 offset = post;
210 offset = post;
211 }
211 }
212
212
213 /* hold on to tail from a */
213 /* hold on to tail from a */
214 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
214 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
215 c->tail += lsize(a);
215 c->tail += lsize(a);
216 }
216 }
217
217
218 lfree(a);
218 lfree(a);
219 lfree(b);
219 lfree(b);
220 return c;
220 return c;
221 }
221 }
222
222
223 /* decode a binary patch into a hunk list */
223 /* decode a binary patch into a hunk list */
224 static struct flist *decode(char *bin, int len)
224 static struct flist *decode(const char *bin, int len)
225 {
225 {
226 struct flist *l;
226 struct flist *l;
227 struct frag *lt;
227 struct frag *lt;
228 char *data = bin + 12, *end = bin + len;
228 const char *data = bin + 12, *end = bin + len;
229 char decode[12]; /* for dealing with alignment issues */
229 char decode[12]; /* for dealing with alignment issues */
230
230
231 /* assume worst case size, we won't have many of these lists */
231 /* assume worst case size, we won't have many of these lists */
232 l = lalloc(len / 12);
232 l = lalloc(len / 12);
233 if (!l)
233 if (!l)
234 return NULL;
234 return NULL;
235
235
236 lt = l->tail;
236 lt = l->tail;
237
237
238 while (data <= end) {
238 while (data <= end) {
239 memcpy(decode, bin, 12);
239 memcpy(decode, bin, 12);
240 lt->start = ntohl(*(uint32_t *)decode);
240 lt->start = ntohl(*(uint32_t *)decode);
241 lt->end = ntohl(*(uint32_t *)(decode + 4));
241 lt->end = ntohl(*(uint32_t *)(decode + 4));
242 lt->len = ntohl(*(uint32_t *)(decode + 8));
242 lt->len = ntohl(*(uint32_t *)(decode + 8));
243 if (lt->start > lt->end)
243 if (lt->start > lt->end)
244 break; /* sanity check */
244 break; /* sanity check */
245 bin = data + lt->len;
245 bin = data + lt->len;
246 if (bin < data)
246 if (bin < data)
247 break; /* big data + big (bogus) len can wrap around */
247 break; /* big data + big (bogus) len can wrap around */
248 lt->data = data;
248 lt->data = data;
249 data = bin + 12;
249 data = bin + 12;
250 lt++;
250 lt++;
251 }
251 }
252
252
253 if (bin != end) {
253 if (bin != end) {
254 if (!PyErr_Occurred())
254 if (!PyErr_Occurred())
255 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
255 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
256 lfree(l);
256 lfree(l);
257 return NULL;
257 return NULL;
258 }
258 }
259
259
260 l->tail = lt;
260 l->tail = lt;
261 return l;
261 return l;
262 }
262 }
263
263
264 /* calculate the size of resultant text */
264 /* calculate the size of resultant text */
265 static int calcsize(int len, struct flist *l)
265 static int calcsize(int len, struct flist *l)
266 {
266 {
267 int outlen = 0, last = 0;
267 int outlen = 0, last = 0;
268 struct frag *f = l->head;
268 struct frag *f = l->head;
269
269
270 while (f != l->tail) {
270 while (f != l->tail) {
271 if (f->start < last || f->end > len) {
271 if (f->start < last || f->end > len) {
272 if (!PyErr_Occurred())
272 if (!PyErr_Occurred())
273 PyErr_SetString(mpatch_Error,
273 PyErr_SetString(mpatch_Error,
274 "invalid patch");
274 "invalid patch");
275 return -1;
275 return -1;
276 }
276 }
277 outlen += f->start - last;
277 outlen += f->start - last;
278 last = f->end;
278 last = f->end;
279 outlen += f->len;
279 outlen += f->len;
280 f++;
280 f++;
281 }
281 }
282
282
283 outlen += len - last;
283 outlen += len - last;
284 return outlen;
284 return outlen;
285 }
285 }
286
286
287 static int apply(char *buf, char *orig, int len, struct flist *l)
287 static int apply(char *buf, const char *orig, int len, struct flist *l)
288 {
288 {
289 struct frag *f = l->head;
289 struct frag *f = l->head;
290 int last = 0;
290 int last = 0;
291 char *p = buf;
291 char *p = buf;
292
292
293 while (f != l->tail) {
293 while (f != l->tail) {
294 if (f->start < last || f->end > len) {
294 if (f->start < last || f->end > len) {
295 if (!PyErr_Occurred())
295 if (!PyErr_Occurred())
296 PyErr_SetString(mpatch_Error,
296 PyErr_SetString(mpatch_Error,
297 "invalid patch");
297 "invalid patch");
298 return 0;
298 return 0;
299 }
299 }
300 memcpy(p, orig + last, f->start - last);
300 memcpy(p, orig + last, f->start - last);
301 p += f->start - last;
301 p += f->start - last;
302 memcpy(p, f->data, f->len);
302 memcpy(p, f->data, f->len);
303 last = f->end;
303 last = f->end;
304 p += f->len;
304 p += f->len;
305 f++;
305 f++;
306 }
306 }
307 memcpy(p, orig + last, len - last);
307 memcpy(p, orig + last, len - last);
308 return 1;
308 return 1;
309 }
309 }
310
310
311 /* recursively generate a patch of all bins between start and end */
311 /* recursively generate a patch of all bins between start and end */
312 static struct flist *fold(PyObject *bins, int start, int end)
312 static struct flist *fold(PyObject *bins, int start, int end)
313 {
313 {
314 int len;
314 int len;
315 ssize_t blen;
316 const char *buffer;
315
317
316 if (start + 1 == end) {
318 if (start + 1 == end) {
317 /* trivial case, output a decoded list */
319 /* trivial case, output a decoded list */
318 PyObject *tmp = PyList_GetItem(bins, start);
320 PyObject *tmp = PyList_GetItem(bins, start);
319 if (!tmp)
321 if (!tmp)
320 return NULL;
322 return NULL;
321 return decode(PyString_AsString(tmp), PyString_Size(tmp));
323 if (PyObject_AsCharBuffer(tmp, &buffer, &blen))
324 return NULL;
325 return decode(buffer, blen);
322 }
326 }
323
327
324 /* divide and conquer, memory management is elsewhere */
328 /* divide and conquer, memory management is elsewhere */
325 len = (end - start) / 2;
329 len = (end - start) / 2;
326 return combine(fold(bins, start, start + len),
330 return combine(fold(bins, start, start + len),
327 fold(bins, start + len, end));
331 fold(bins, start + len, end));
328 }
332 }
329
333
330 static PyObject *
334 static PyObject *
331 patches(PyObject *self, PyObject *args)
335 patches(PyObject *self, PyObject *args)
332 {
336 {
333 PyObject *text, *bins, *result;
337 PyObject *text, *bins, *result;
334 struct flist *patch;
338 struct flist *patch;
335 char *in, *out;
339 const char *in;
340 char *out;
336 int len, outlen;
341 int len, outlen;
342 ssize_t inlen;
337
343
338 if (!PyArg_ParseTuple(args, "SO:mpatch", &text, &bins))
344 if (!PyArg_ParseTuple(args, "OO:mpatch", &text, &bins))
339 return NULL;
345 return NULL;
340
346
341 len = PyList_Size(bins);
347 len = PyList_Size(bins);
342 if (!len) {
348 if (!len) {
343 /* nothing to do */
349 /* nothing to do */
344 Py_INCREF(text);
350 Py_INCREF(text);
345 return text;
351 return text;
346 }
352 }
347
353
354 if (PyObject_AsCharBuffer(text, &in, &inlen))
355 return NULL;
356
348 patch = fold(bins, 0, len);
357 patch = fold(bins, 0, len);
349 if (!patch)
358 if (!patch)
350 return NULL;
359 return NULL;
351
360
352 outlen = calcsize(PyString_Size(text), patch);
361 outlen = calcsize(inlen, patch);
353 if (outlen < 0) {
362 if (outlen < 0) {
354 result = NULL;
363 result = NULL;
355 goto cleanup;
364 goto cleanup;
356 }
365 }
357 result = PyString_FromStringAndSize(NULL, outlen);
366 result = PyString_FromStringAndSize(NULL, outlen);
358 if (!result) {
367 if (!result) {
359 result = NULL;
368 result = NULL;
360 goto cleanup;
369 goto cleanup;
361 }
370 }
362 in = PyString_AsString(text);
363 out = PyString_AsString(result);
371 out = PyString_AsString(result);
364 if (!apply(out, in, PyString_Size(text), patch)) {
372 if (!apply(out, in, inlen, patch)) {
365 Py_DECREF(result);
373 Py_DECREF(result);
366 result = NULL;
374 result = NULL;
367 }
375 }
368 cleanup:
376 cleanup:
369 lfree(patch);
377 lfree(patch);
370 return result;
378 return result;
371 }
379 }
372
380
373 /* calculate size of a patched file directly */
381 /* calculate size of a patched file directly */
374 static PyObject *
382 static PyObject *
375 patchedsize(PyObject *self, PyObject *args)
383 patchedsize(PyObject *self, PyObject *args)
376 {
384 {
377 long orig, start, end, len, outlen = 0, last = 0;
385 long orig, start, end, len, outlen = 0, last = 0;
378 int patchlen;
386 int patchlen;
379 char *bin, *binend, *data;
387 char *bin, *binend, *data;
380 char decode[12]; /* for dealing with alignment issues */
388 char decode[12]; /* for dealing with alignment issues */
381
389
382 if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen))
390 if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen))
383 return NULL;
391 return NULL;
384
392
385 binend = bin + patchlen;
393 binend = bin + patchlen;
386 data = bin + 12;
394 data = bin + 12;
387
395
388 while (data <= binend) {
396 while (data <= binend) {
389 memcpy(decode, bin, 12);
397 memcpy(decode, bin, 12);
390 start = ntohl(*(uint32_t *)decode);
398 start = ntohl(*(uint32_t *)decode);
391 end = ntohl(*(uint32_t *)(decode + 4));
399 end = ntohl(*(uint32_t *)(decode + 4));
392 len = ntohl(*(uint32_t *)(decode + 8));
400 len = ntohl(*(uint32_t *)(decode + 8));
393 if (start > end)
401 if (start > end)
394 break; /* sanity check */
402 break; /* sanity check */
395 bin = data + len;
403 bin = data + len;
396 if (bin < data)
404 if (bin < data)
397 break; /* big data + big (bogus) len can wrap around */
405 break; /* big data + big (bogus) len can wrap around */
398 data = bin + 12;
406 data = bin + 12;
399 outlen += start - last;
407 outlen += start - last;
400 last = end;
408 last = end;
401 outlen += len;
409 outlen += len;
402 }
410 }
403
411
404 if (bin != binend) {
412 if (bin != binend) {
405 if (!PyErr_Occurred())
413 if (!PyErr_Occurred())
406 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
414 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
407 return NULL;
415 return NULL;
408 }
416 }
409
417
410 outlen += orig - last;
418 outlen += orig - last;
411 return Py_BuildValue("l", outlen);
419 return Py_BuildValue("l", outlen);
412 }
420 }
413
421
414 static PyMethodDef methods[] = {
422 static PyMethodDef methods[] = {
415 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
423 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
416 {"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"},
424 {"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"},
417 {NULL, NULL}
425 {NULL, NULL}
418 };
426 };
419
427
420 PyMODINIT_FUNC
428 PyMODINIT_FUNC
421 initmpatch(void)
429 initmpatch(void)
422 {
430 {
423 Py_InitModule3("mpatch", methods, mpatch_doc);
431 Py_InitModule3("mpatch", methods, mpatch_doc);
424 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
432 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
425 }
433 }
426
434
General Comments 0
You need to be logged in to leave comments. Login now