##// END OF EJS Templates
mpatch: use Py_ssize_t...
Adrian Buehlmann -
r16757:923bd97b default
parent child Browse files
Show More
@@ -1,430 +1,428 b''
1 /*
1 /*
2 mpatch.c - efficient binary patching for Mercurial
2 mpatch.c - efficient binary patching for Mercurial
3
3
4 This implements a patch algorithm that's O(m + nlog n) where m is the
4 This implements a patch algorithm that's O(m + nlog n) where m is the
5 size of the output and n is the number of patches.
5 size of the output and n is the number of patches.
6
6
7 Given a list of binary patches, it unpacks each into a hunk list,
7 Given a list of binary patches, it unpacks each into a hunk list,
8 then combines the hunk lists with a treewise recursion to form a
8 then combines the hunk lists with a treewise recursion to form a
9 single hunk list. This hunk list is then applied to the original
9 single hunk list. This hunk list is then applied to the original
10 text.
10 text.
11
11
12 The text (or binary) fragments are copied directly from their source
12 The text (or binary) fragments are copied directly from their source
13 Python objects into a preallocated output string to avoid the
13 Python objects into a preallocated output string to avoid the
14 allocation of intermediate Python objects. Working memory is about 2x
14 allocation of intermediate Python objects. Working memory is about 2x
15 the total number of hunks.
15 the total number of hunks.
16
16
17 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
17 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
18
18
19 This software may be used and distributed according to the terms
19 This software may be used and distributed according to the terms
20 of the GNU General Public License, incorporated herein by reference.
20 of the GNU General Public License, incorporated herein by reference.
21 */
21 */
22
22
23 #include <Python.h>
23 #include <Python.h>
24 #include <stdlib.h>
24 #include <stdlib.h>
25 #include <string.h>
25 #include <string.h>
26
26
27 #include "util.h"
27 #include "util.h"
28
28
29 static char mpatch_doc[] = "Efficient binary patching.";
29 static char mpatch_doc[] = "Efficient binary patching.";
30 static PyObject *mpatch_Error;
30 static PyObject *mpatch_Error;
31
31
32 struct frag {
32 struct frag {
33 int start, end, len;
33 int start, end, len;
34 const char *data;
34 const char *data;
35 };
35 };
36
36
37 struct flist {
37 struct flist {
38 struct frag *base, *head, *tail;
38 struct frag *base, *head, *tail;
39 };
39 };
40
40
41 static struct flist *lalloc(int size)
41 static struct flist *lalloc(Py_ssize_t size)
42 {
42 {
43 struct flist *a = NULL;
43 struct flist *a = NULL;
44
44
45 if (size < 1)
45 if (size < 1)
46 size = 1;
46 size = 1;
47
47
48 a = (struct flist *)malloc(sizeof(struct flist));
48 a = (struct flist *)malloc(sizeof(struct flist));
49 if (a) {
49 if (a) {
50 a->base = (struct frag *)malloc(sizeof(struct frag) * size);
50 a->base = (struct frag *)malloc(sizeof(struct frag) * size);
51 if (a->base) {
51 if (a->base) {
52 a->head = a->tail = a->base;
52 a->head = a->tail = a->base;
53 return a;
53 return a;
54 }
54 }
55 free(a);
55 free(a);
56 a = NULL;
56 a = NULL;
57 }
57 }
58 if (!PyErr_Occurred())
58 if (!PyErr_Occurred())
59 PyErr_NoMemory();
59 PyErr_NoMemory();
60 return NULL;
60 return NULL;
61 }
61 }
62
62
63 static void lfree(struct flist *a)
63 static void lfree(struct flist *a)
64 {
64 {
65 if (a) {
65 if (a) {
66 free(a->base);
66 free(a->base);
67 free(a);
67 free(a);
68 }
68 }
69 }
69 }
70
70
71 static int lsize(struct flist *a)
71 static Py_ssize_t lsize(struct flist *a)
72 {
72 {
73 return a->tail - a->head;
73 return a->tail - a->head;
74 }
74 }
75
75
76 /* move hunks in source that are less cut to dest, compensating
76 /* move hunks in source that are less cut to dest, compensating
77 for changes in offset. the last hunk may be split if necessary.
77 for changes in offset. the last hunk may be split if necessary.
78 */
78 */
79 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
79 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
80 {
80 {
81 struct frag *d = dest->tail, *s = src->head;
81 struct frag *d = dest->tail, *s = src->head;
82 int postend, c, l;
82 int postend, c, l;
83
83
84 while (s != src->tail) {
84 while (s != src->tail) {
85 if (s->start + offset >= cut)
85 if (s->start + offset >= cut)
86 break; /* we've gone far enough */
86 break; /* we've gone far enough */
87
87
88 postend = offset + s->start + s->len;
88 postend = offset + s->start + s->len;
89 if (postend <= cut) {
89 if (postend <= cut) {
90 /* save this hunk */
90 /* save this hunk */
91 offset += s->start + s->len - s->end;
91 offset += s->start + s->len - s->end;
92 *d++ = *s++;
92 *d++ = *s++;
93 }
93 }
94 else {
94 else {
95 /* break up this hunk */
95 /* break up this hunk */
96 c = cut - offset;
96 c = cut - offset;
97 if (s->end < c)
97 if (s->end < c)
98 c = s->end;
98 c = s->end;
99 l = cut - offset - s->start;
99 l = cut - offset - s->start;
100 if (s->len < l)
100 if (s->len < l)
101 l = s->len;
101 l = s->len;
102
102
103 offset += s->start + l - c;
103 offset += s->start + l - c;
104
104
105 d->start = s->start;
105 d->start = s->start;
106 d->end = c;
106 d->end = c;
107 d->len = l;
107 d->len = l;
108 d->data = s->data;
108 d->data = s->data;
109 d++;
109 d++;
110 s->start = c;
110 s->start = c;
111 s->len = s->len - l;
111 s->len = s->len - l;
112 s->data = s->data + l;
112 s->data = s->data + l;
113
113
114 break;
114 break;
115 }
115 }
116 }
116 }
117
117
118 dest->tail = d;
118 dest->tail = d;
119 src->head = s;
119 src->head = s;
120 return offset;
120 return offset;
121 }
121 }
122
122
123 /* like gather, but with no output list */
123 /* like gather, but with no output list */
124 static int discard(struct flist *src, int cut, int offset)
124 static int discard(struct flist *src, int cut, int offset)
125 {
125 {
126 struct frag *s = src->head;
126 struct frag *s = src->head;
127 int postend, c, l;
127 int postend, c, l;
128
128
129 while (s != src->tail) {
129 while (s != src->tail) {
130 if (s->start + offset >= cut)
130 if (s->start + offset >= cut)
131 break;
131 break;
132
132
133 postend = offset + s->start + s->len;
133 postend = offset + s->start + s->len;
134 if (postend <= cut) {
134 if (postend <= cut) {
135 offset += s->start + s->len - s->end;
135 offset += s->start + s->len - s->end;
136 s++;
136 s++;
137 }
137 }
138 else {
138 else {
139 c = cut - offset;
139 c = cut - offset;
140 if (s->end < c)
140 if (s->end < c)
141 c = s->end;
141 c = s->end;
142 l = cut - offset - s->start;
142 l = cut - offset - s->start;
143 if (s->len < l)
143 if (s->len < l)
144 l = s->len;
144 l = s->len;
145
145
146 offset += s->start + l - c;
146 offset += s->start + l - c;
147 s->start = c;
147 s->start = c;
148 s->len = s->len - l;
148 s->len = s->len - l;
149 s->data = s->data + l;
149 s->data = s->data + l;
150
150
151 break;
151 break;
152 }
152 }
153 }
153 }
154
154
155 src->head = s;
155 src->head = s;
156 return offset;
156 return offset;
157 }
157 }
158
158
159 /* combine hunk lists a and b, while adjusting b for offset changes in a/
159 /* combine hunk lists a and b, while adjusting b for offset changes in a/
160 this deletes a and b and returns the resultant list. */
160 this deletes a and b and returns the resultant list. */
161 static struct flist *combine(struct flist *a, struct flist *b)
161 static struct flist *combine(struct flist *a, struct flist *b)
162 {
162 {
163 struct flist *c = NULL;
163 struct flist *c = NULL;
164 struct frag *bh, *ct;
164 struct frag *bh, *ct;
165 int offset = 0, post;
165 int offset = 0, post;
166
166
167 if (a && b)
167 if (a && b)
168 c = lalloc((lsize(a) + lsize(b)) * 2);
168 c = lalloc((lsize(a) + lsize(b)) * 2);
169
169
170 if (c) {
170 if (c) {
171
171
172 for (bh = b->head; bh != b->tail; bh++) {
172 for (bh = b->head; bh != b->tail; bh++) {
173 /* save old hunks */
173 /* save old hunks */
174 offset = gather(c, a, bh->start, offset);
174 offset = gather(c, a, bh->start, offset);
175
175
176 /* discard replaced hunks */
176 /* discard replaced hunks */
177 post = discard(a, bh->end, offset);
177 post = discard(a, bh->end, offset);
178
178
179 /* insert new hunk */
179 /* insert new hunk */
180 ct = c->tail;
180 ct = c->tail;
181 ct->start = bh->start - offset;
181 ct->start = bh->start - offset;
182 ct->end = bh->end - post;
182 ct->end = bh->end - post;
183 ct->len = bh->len;
183 ct->len = bh->len;
184 ct->data = bh->data;
184 ct->data = bh->data;
185 c->tail++;
185 c->tail++;
186 offset = post;
186 offset = post;
187 }
187 }
188
188
189 /* hold on to tail from a */
189 /* hold on to tail from a */
190 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
190 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
191 c->tail += lsize(a);
191 c->tail += lsize(a);
192 }
192 }
193
193
194 lfree(a);
194 lfree(a);
195 lfree(b);
195 lfree(b);
196 return c;
196 return c;
197 }
197 }
198
198
199 /* decode a binary patch into a hunk list */
199 /* decode a binary patch into a hunk list */
200 static struct flist *decode(const char *bin, int len)
200 static struct flist *decode(const char *bin, Py_ssize_t len)
201 {
201 {
202 struct flist *l;
202 struct flist *l;
203 struct frag *lt;
203 struct frag *lt;
204 const char *data = bin + 12, *end = bin + len;
204 const char *data = bin + 12, *end = bin + len;
205
205
206 /* assume worst case size, we won't have many of these lists */
206 /* assume worst case size, we won't have many of these lists */
207 l = lalloc(len / 12);
207 l = lalloc(len / 12);
208 if (!l)
208 if (!l)
209 return NULL;
209 return NULL;
210
210
211 lt = l->tail;
211 lt = l->tail;
212
212
213 while (data <= end) {
213 while (data <= end) {
214 lt->start = getbe32(bin);
214 lt->start = getbe32(bin);
215 lt->end = getbe32(bin + 4);
215 lt->end = getbe32(bin + 4);
216 lt->len = getbe32(bin + 8);
216 lt->len = getbe32(bin + 8);
217 if (lt->start > lt->end)
217 if (lt->start > lt->end)
218 break; /* sanity check */
218 break; /* sanity check */
219 bin = data + lt->len;
219 bin = data + lt->len;
220 if (bin < data)
220 if (bin < data)
221 break; /* big data + big (bogus) len can wrap around */
221 break; /* big data + big (bogus) len can wrap around */
222 lt->data = data;
222 lt->data = data;
223 data = bin + 12;
223 data = bin + 12;
224 lt++;
224 lt++;
225 }
225 }
226
226
227 if (bin != end) {
227 if (bin != end) {
228 if (!PyErr_Occurred())
228 if (!PyErr_Occurred())
229 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
229 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
230 lfree(l);
230 lfree(l);
231 return NULL;
231 return NULL;
232 }
232 }
233
233
234 l->tail = lt;
234 l->tail = lt;
235 return l;
235 return l;
236 }
236 }
237
237
238 /* calculate the size of resultant text */
238 /* calculate the size of resultant text */
239 static int calcsize(int len, struct flist *l)
239 static Py_ssize_t calcsize(Py_ssize_t len, struct flist *l)
240 {
240 {
241 int outlen = 0, last = 0;
241 Py_ssize_t outlen = 0, last = 0;
242 struct frag *f = l->head;
242 struct frag *f = l->head;
243
243
244 while (f != l->tail) {
244 while (f != l->tail) {
245 if (f->start < last || f->end > len) {
245 if (f->start < last || f->end > len) {
246 if (!PyErr_Occurred())
246 if (!PyErr_Occurred())
247 PyErr_SetString(mpatch_Error,
247 PyErr_SetString(mpatch_Error,
248 "invalid patch");
248 "invalid patch");
249 return -1;
249 return -1;
250 }
250 }
251 outlen += f->start - last;
251 outlen += f->start - last;
252 last = f->end;
252 last = f->end;
253 outlen += f->len;
253 outlen += f->len;
254 f++;
254 f++;
255 }
255 }
256
256
257 outlen += len - last;
257 outlen += len - last;
258 return outlen;
258 return outlen;
259 }
259 }
260
260
261 static int apply(char *buf, const char *orig, int len, struct flist *l)
261 static int apply(char *buf, const char *orig, Py_ssize_t len, struct flist *l)
262 {
262 {
263 struct frag *f = l->head;
263 struct frag *f = l->head;
264 int last = 0;
264 int last = 0;
265 char *p = buf;
265 char *p = buf;
266
266
267 while (f != l->tail) {
267 while (f != l->tail) {
268 if (f->start < last || f->end > len) {
268 if (f->start < last || f->end > len) {
269 if (!PyErr_Occurred())
269 if (!PyErr_Occurred())
270 PyErr_SetString(mpatch_Error,
270 PyErr_SetString(mpatch_Error,
271 "invalid patch");
271 "invalid patch");
272 return 0;
272 return 0;
273 }
273 }
274 memcpy(p, orig + last, f->start - last);
274 memcpy(p, orig + last, f->start - last);
275 p += f->start - last;
275 p += f->start - last;
276 memcpy(p, f->data, f->len);
276 memcpy(p, f->data, f->len);
277 last = f->end;
277 last = f->end;
278 p += f->len;
278 p += f->len;
279 f++;
279 f++;
280 }
280 }
281 memcpy(p, orig + last, len - last);
281 memcpy(p, orig + last, len - last);
282 return 1;
282 return 1;
283 }
283 }
284
284
285 /* recursively generate a patch of all bins between start and end */
285 /* recursively generate a patch of all bins between start and end */
286 static struct flist *fold(PyObject *bins, int start, int end)
286 static struct flist *fold(PyObject *bins, Py_ssize_t start, Py_ssize_t end)
287 {
287 {
288 int len;
288 Py_ssize_t len, blen;
289 Py_ssize_t blen;
290 const char *buffer;
289 const char *buffer;
291
290
292 if (start + 1 == end) {
291 if (start + 1 == end) {
293 /* trivial case, output a decoded list */
292 /* trivial case, output a decoded list */
294 PyObject *tmp = PyList_GetItem(bins, start);
293 PyObject *tmp = PyList_GetItem(bins, start);
295 if (!tmp)
294 if (!tmp)
296 return NULL;
295 return NULL;
297 if (PyObject_AsCharBuffer(tmp, &buffer, &blen))
296 if (PyObject_AsCharBuffer(tmp, &buffer, &blen))
298 return NULL;
297 return NULL;
299 return decode(buffer, blen);
298 return decode(buffer, blen);
300 }
299 }
301
300
302 /* divide and conquer, memory management is elsewhere */
301 /* divide and conquer, memory management is elsewhere */
303 len = (end - start) / 2;
302 len = (end - start) / 2;
304 return combine(fold(bins, start, start + len),
303 return combine(fold(bins, start, start + len),
305 fold(bins, start + len, end));
304 fold(bins, start + len, end));
306 }
305 }
307
306
308 static PyObject *
307 static PyObject *
309 patches(PyObject *self, PyObject *args)
308 patches(PyObject *self, PyObject *args)
310 {
309 {
311 PyObject *text, *bins, *result;
310 PyObject *text, *bins, *result;
312 struct flist *patch;
311 struct flist *patch;
313 const char *in;
312 const char *in;
314 char *out;
313 char *out;
315 int len, outlen;
314 Py_ssize_t len, outlen, inlen;
316 Py_ssize_t inlen;
317
315
318 if (!PyArg_ParseTuple(args, "OO:mpatch", &text, &bins))
316 if (!PyArg_ParseTuple(args, "OO:mpatch", &text, &bins))
319 return NULL;
317 return NULL;
320
318
321 len = PyList_Size(bins);
319 len = PyList_Size(bins);
322 if (!len) {
320 if (!len) {
323 /* nothing to do */
321 /* nothing to do */
324 Py_INCREF(text);
322 Py_INCREF(text);
325 return text;
323 return text;
326 }
324 }
327
325
328 if (PyObject_AsCharBuffer(text, &in, &inlen))
326 if (PyObject_AsCharBuffer(text, &in, &inlen))
329 return NULL;
327 return NULL;
330
328
331 patch = fold(bins, 0, len);
329 patch = fold(bins, 0, len);
332 if (!patch)
330 if (!patch)
333 return NULL;
331 return NULL;
334
332
335 outlen = calcsize(inlen, patch);
333 outlen = calcsize(inlen, patch);
336 if (outlen < 0) {
334 if (outlen < 0) {
337 result = NULL;
335 result = NULL;
338 goto cleanup;
336 goto cleanup;
339 }
337 }
340 result = PyBytes_FromStringAndSize(NULL, outlen);
338 result = PyBytes_FromStringAndSize(NULL, outlen);
341 if (!result) {
339 if (!result) {
342 result = NULL;
340 result = NULL;
343 goto cleanup;
341 goto cleanup;
344 }
342 }
345 out = PyBytes_AsString(result);
343 out = PyBytes_AsString(result);
346 if (!apply(out, in, inlen, patch)) {
344 if (!apply(out, in, inlen, patch)) {
347 Py_DECREF(result);
345 Py_DECREF(result);
348 result = NULL;
346 result = NULL;
349 }
347 }
350 cleanup:
348 cleanup:
351 lfree(patch);
349 lfree(patch);
352 return result;
350 return result;
353 }
351 }
354
352
355 /* calculate size of a patched file directly */
353 /* calculate size of a patched file directly */
356 static PyObject *
354 static PyObject *
357 patchedsize(PyObject *self, PyObject *args)
355 patchedsize(PyObject *self, PyObject *args)
358 {
356 {
359 long orig, start, end, len, outlen = 0, last = 0;
357 long orig, start, end, len, outlen = 0, last = 0;
360 int patchlen;
358 int patchlen;
361 char *bin, *binend, *data;
359 char *bin, *binend, *data;
362
360
363 if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen))
361 if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen))
364 return NULL;
362 return NULL;
365
363
366 binend = bin + patchlen;
364 binend = bin + patchlen;
367 data = bin + 12;
365 data = bin + 12;
368
366
369 while (data <= binend) {
367 while (data <= binend) {
370 start = getbe32(bin);
368 start = getbe32(bin);
371 end = getbe32(bin + 4);
369 end = getbe32(bin + 4);
372 len = getbe32(bin + 8);
370 len = getbe32(bin + 8);
373 if (start > end)
371 if (start > end)
374 break; /* sanity check */
372 break; /* sanity check */
375 bin = data + len;
373 bin = data + len;
376 if (bin < data)
374 if (bin < data)
377 break; /* big data + big (bogus) len can wrap around */
375 break; /* big data + big (bogus) len can wrap around */
378 data = bin + 12;
376 data = bin + 12;
379 outlen += start - last;
377 outlen += start - last;
380 last = end;
378 last = end;
381 outlen += len;
379 outlen += len;
382 }
380 }
383
381
384 if (bin != binend) {
382 if (bin != binend) {
385 if (!PyErr_Occurred())
383 if (!PyErr_Occurred())
386 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
384 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
387 return NULL;
385 return NULL;
388 }
386 }
389
387
390 outlen += orig - last;
388 outlen += orig - last;
391 return Py_BuildValue("l", outlen);
389 return Py_BuildValue("l", outlen);
392 }
390 }
393
391
394 static PyMethodDef methods[] = {
392 static PyMethodDef methods[] = {
395 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
393 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
396 {"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"},
394 {"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"},
397 {NULL, NULL}
395 {NULL, NULL}
398 };
396 };
399
397
400 #ifdef IS_PY3K
398 #ifdef IS_PY3K
401 static struct PyModuleDef mpatch_module = {
399 static struct PyModuleDef mpatch_module = {
402 PyModuleDef_HEAD_INIT,
400 PyModuleDef_HEAD_INIT,
403 "mpatch",
401 "mpatch",
404 mpatch_doc,
402 mpatch_doc,
405 -1,
403 -1,
406 methods
404 methods
407 };
405 };
408
406
409 PyMODINIT_FUNC PyInit_mpatch(void)
407 PyMODINIT_FUNC PyInit_mpatch(void)
410 {
408 {
411 PyObject *m;
409 PyObject *m;
412
410
413 m = PyModule_Create(&mpatch_module);
411 m = PyModule_Create(&mpatch_module);
414 if (m == NULL)
412 if (m == NULL)
415 return NULL;
413 return NULL;
416
414
417 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
415 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
418 Py_INCREF(mpatch_Error);
416 Py_INCREF(mpatch_Error);
419 PyModule_AddObject(m, "mpatchError", mpatch_Error);
417 PyModule_AddObject(m, "mpatchError", mpatch_Error);
420
418
421 return m;
419 return m;
422 }
420 }
423 #else
421 #else
424 PyMODINIT_FUNC
422 PyMODINIT_FUNC
425 initmpatch(void)
423 initmpatch(void)
426 {
424 {
427 Py_InitModule3("mpatch", methods, mpatch_doc);
425 Py_InitModule3("mpatch", methods, mpatch_doc);
428 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
426 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
429 }
427 }
430 #endif
428 #endif
General Comments 0
You need to be logged in to leave comments. Login now