##// END OF EJS Templates
mpatch: use Py_ssize_t for string length
Adrian Buehlmann -
r16758:9a8ab5c4 default
parent child Browse files
Show More
@@ -1,428 +1,429
1 /*
1 /*
2 mpatch.c - efficient binary patching for Mercurial
2 mpatch.c - efficient binary patching for Mercurial
3
3
4 This implements a patch algorithm that's O(m + nlog n) where m is the
4 This implements a patch algorithm that's O(m + nlog n) where m is the
5 size of the output and n is the number of patches.
5 size of the output and n is the number of patches.
6
6
7 Given a list of binary patches, it unpacks each into a hunk list,
7 Given a list of binary patches, it unpacks each into a hunk list,
8 then combines the hunk lists with a treewise recursion to form a
8 then combines the hunk lists with a treewise recursion to form a
9 single hunk list. This hunk list is then applied to the original
9 single hunk list. This hunk list is then applied to the original
10 text.
10 text.
11
11
12 The text (or binary) fragments are copied directly from their source
12 The text (or binary) fragments are copied directly from their source
13 Python objects into a preallocated output string to avoid the
13 Python objects into a preallocated output string to avoid the
14 allocation of intermediate Python objects. Working memory is about 2x
14 allocation of intermediate Python objects. Working memory is about 2x
15 the total number of hunks.
15 the total number of hunks.
16
16
17 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
17 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
18
18
19 This software may be used and distributed according to the terms
19 This software may be used and distributed according to the terms
20 of the GNU General Public License, incorporated herein by reference.
20 of the GNU General Public License, incorporated herein by reference.
21 */
21 */
22
22
23 #define PY_SSIZE_T_CLEAN
23 #include <Python.h>
24 #include <Python.h>
24 #include <stdlib.h>
25 #include <stdlib.h>
25 #include <string.h>
26 #include <string.h>
26
27
27 #include "util.h"
28 #include "util.h"
28
29
29 static char mpatch_doc[] = "Efficient binary patching.";
30 static char mpatch_doc[] = "Efficient binary patching.";
30 static PyObject *mpatch_Error;
31 static PyObject *mpatch_Error;
31
32
32 struct frag {
33 struct frag {
33 int start, end, len;
34 int start, end, len;
34 const char *data;
35 const char *data;
35 };
36 };
36
37
37 struct flist {
38 struct flist {
38 struct frag *base, *head, *tail;
39 struct frag *base, *head, *tail;
39 };
40 };
40
41
41 static struct flist *lalloc(Py_ssize_t size)
42 static struct flist *lalloc(Py_ssize_t size)
42 {
43 {
43 struct flist *a = NULL;
44 struct flist *a = NULL;
44
45
45 if (size < 1)
46 if (size < 1)
46 size = 1;
47 size = 1;
47
48
48 a = (struct flist *)malloc(sizeof(struct flist));
49 a = (struct flist *)malloc(sizeof(struct flist));
49 if (a) {
50 if (a) {
50 a->base = (struct frag *)malloc(sizeof(struct frag) * size);
51 a->base = (struct frag *)malloc(sizeof(struct frag) * size);
51 if (a->base) {
52 if (a->base) {
52 a->head = a->tail = a->base;
53 a->head = a->tail = a->base;
53 return a;
54 return a;
54 }
55 }
55 free(a);
56 free(a);
56 a = NULL;
57 a = NULL;
57 }
58 }
58 if (!PyErr_Occurred())
59 if (!PyErr_Occurred())
59 PyErr_NoMemory();
60 PyErr_NoMemory();
60 return NULL;
61 return NULL;
61 }
62 }
62
63
63 static void lfree(struct flist *a)
64 static void lfree(struct flist *a)
64 {
65 {
65 if (a) {
66 if (a) {
66 free(a->base);
67 free(a->base);
67 free(a);
68 free(a);
68 }
69 }
69 }
70 }
70
71
71 static Py_ssize_t lsize(struct flist *a)
72 static Py_ssize_t lsize(struct flist *a)
72 {
73 {
73 return a->tail - a->head;
74 return a->tail - a->head;
74 }
75 }
75
76
76 /* move hunks in source that are less cut to dest, compensating
77 /* move hunks in source that are less cut to dest, compensating
77 for changes in offset. the last hunk may be split if necessary.
78 for changes in offset. the last hunk may be split if necessary.
78 */
79 */
79 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
80 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
80 {
81 {
81 struct frag *d = dest->tail, *s = src->head;
82 struct frag *d = dest->tail, *s = src->head;
82 int postend, c, l;
83 int postend, c, l;
83
84
84 while (s != src->tail) {
85 while (s != src->tail) {
85 if (s->start + offset >= cut)
86 if (s->start + offset >= cut)
86 break; /* we've gone far enough */
87 break; /* we've gone far enough */
87
88
88 postend = offset + s->start + s->len;
89 postend = offset + s->start + s->len;
89 if (postend <= cut) {
90 if (postend <= cut) {
90 /* save this hunk */
91 /* save this hunk */
91 offset += s->start + s->len - s->end;
92 offset += s->start + s->len - s->end;
92 *d++ = *s++;
93 *d++ = *s++;
93 }
94 }
94 else {
95 else {
95 /* break up this hunk */
96 /* break up this hunk */
96 c = cut - offset;
97 c = cut - offset;
97 if (s->end < c)
98 if (s->end < c)
98 c = s->end;
99 c = s->end;
99 l = cut - offset - s->start;
100 l = cut - offset - s->start;
100 if (s->len < l)
101 if (s->len < l)
101 l = s->len;
102 l = s->len;
102
103
103 offset += s->start + l - c;
104 offset += s->start + l - c;
104
105
105 d->start = s->start;
106 d->start = s->start;
106 d->end = c;
107 d->end = c;
107 d->len = l;
108 d->len = l;
108 d->data = s->data;
109 d->data = s->data;
109 d++;
110 d++;
110 s->start = c;
111 s->start = c;
111 s->len = s->len - l;
112 s->len = s->len - l;
112 s->data = s->data + l;
113 s->data = s->data + l;
113
114
114 break;
115 break;
115 }
116 }
116 }
117 }
117
118
118 dest->tail = d;
119 dest->tail = d;
119 src->head = s;
120 src->head = s;
120 return offset;
121 return offset;
121 }
122 }
122
123
123 /* like gather, but with no output list */
124 /* like gather, but with no output list */
124 static int discard(struct flist *src, int cut, int offset)
125 static int discard(struct flist *src, int cut, int offset)
125 {
126 {
126 struct frag *s = src->head;
127 struct frag *s = src->head;
127 int postend, c, l;
128 int postend, c, l;
128
129
129 while (s != src->tail) {
130 while (s != src->tail) {
130 if (s->start + offset >= cut)
131 if (s->start + offset >= cut)
131 break;
132 break;
132
133
133 postend = offset + s->start + s->len;
134 postend = offset + s->start + s->len;
134 if (postend <= cut) {
135 if (postend <= cut) {
135 offset += s->start + s->len - s->end;
136 offset += s->start + s->len - s->end;
136 s++;
137 s++;
137 }
138 }
138 else {
139 else {
139 c = cut - offset;
140 c = cut - offset;
140 if (s->end < c)
141 if (s->end < c)
141 c = s->end;
142 c = s->end;
142 l = cut - offset - s->start;
143 l = cut - offset - s->start;
143 if (s->len < l)
144 if (s->len < l)
144 l = s->len;
145 l = s->len;
145
146
146 offset += s->start + l - c;
147 offset += s->start + l - c;
147 s->start = c;
148 s->start = c;
148 s->len = s->len - l;
149 s->len = s->len - l;
149 s->data = s->data + l;
150 s->data = s->data + l;
150
151
151 break;
152 break;
152 }
153 }
153 }
154 }
154
155
155 src->head = s;
156 src->head = s;
156 return offset;
157 return offset;
157 }
158 }
158
159
159 /* combine hunk lists a and b, while adjusting b for offset changes in a/
160 /* combine hunk lists a and b, while adjusting b for offset changes in a/
160 this deletes a and b and returns the resultant list. */
161 this deletes a and b and returns the resultant list. */
161 static struct flist *combine(struct flist *a, struct flist *b)
162 static struct flist *combine(struct flist *a, struct flist *b)
162 {
163 {
163 struct flist *c = NULL;
164 struct flist *c = NULL;
164 struct frag *bh, *ct;
165 struct frag *bh, *ct;
165 int offset = 0, post;
166 int offset = 0, post;
166
167
167 if (a && b)
168 if (a && b)
168 c = lalloc((lsize(a) + lsize(b)) * 2);
169 c = lalloc((lsize(a) + lsize(b)) * 2);
169
170
170 if (c) {
171 if (c) {
171
172
172 for (bh = b->head; bh != b->tail; bh++) {
173 for (bh = b->head; bh != b->tail; bh++) {
173 /* save old hunks */
174 /* save old hunks */
174 offset = gather(c, a, bh->start, offset);
175 offset = gather(c, a, bh->start, offset);
175
176
176 /* discard replaced hunks */
177 /* discard replaced hunks */
177 post = discard(a, bh->end, offset);
178 post = discard(a, bh->end, offset);
178
179
179 /* insert new hunk */
180 /* insert new hunk */
180 ct = c->tail;
181 ct = c->tail;
181 ct->start = bh->start - offset;
182 ct->start = bh->start - offset;
182 ct->end = bh->end - post;
183 ct->end = bh->end - post;
183 ct->len = bh->len;
184 ct->len = bh->len;
184 ct->data = bh->data;
185 ct->data = bh->data;
185 c->tail++;
186 c->tail++;
186 offset = post;
187 offset = post;
187 }
188 }
188
189
189 /* hold on to tail from a */
190 /* hold on to tail from a */
190 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
191 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
191 c->tail += lsize(a);
192 c->tail += lsize(a);
192 }
193 }
193
194
194 lfree(a);
195 lfree(a);
195 lfree(b);
196 lfree(b);
196 return c;
197 return c;
197 }
198 }
198
199
199 /* decode a binary patch into a hunk list */
200 /* decode a binary patch into a hunk list */
200 static struct flist *decode(const char *bin, Py_ssize_t len)
201 static struct flist *decode(const char *bin, Py_ssize_t len)
201 {
202 {
202 struct flist *l;
203 struct flist *l;
203 struct frag *lt;
204 struct frag *lt;
204 const char *data = bin + 12, *end = bin + len;
205 const char *data = bin + 12, *end = bin + len;
205
206
206 /* assume worst case size, we won't have many of these lists */
207 /* assume worst case size, we won't have many of these lists */
207 l = lalloc(len / 12);
208 l = lalloc(len / 12);
208 if (!l)
209 if (!l)
209 return NULL;
210 return NULL;
210
211
211 lt = l->tail;
212 lt = l->tail;
212
213
213 while (data <= end) {
214 while (data <= end) {
214 lt->start = getbe32(bin);
215 lt->start = getbe32(bin);
215 lt->end = getbe32(bin + 4);
216 lt->end = getbe32(bin + 4);
216 lt->len = getbe32(bin + 8);
217 lt->len = getbe32(bin + 8);
217 if (lt->start > lt->end)
218 if (lt->start > lt->end)
218 break; /* sanity check */
219 break; /* sanity check */
219 bin = data + lt->len;
220 bin = data + lt->len;
220 if (bin < data)
221 if (bin < data)
221 break; /* big data + big (bogus) len can wrap around */
222 break; /* big data + big (bogus) len can wrap around */
222 lt->data = data;
223 lt->data = data;
223 data = bin + 12;
224 data = bin + 12;
224 lt++;
225 lt++;
225 }
226 }
226
227
227 if (bin != end) {
228 if (bin != end) {
228 if (!PyErr_Occurred())
229 if (!PyErr_Occurred())
229 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
230 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
230 lfree(l);
231 lfree(l);
231 return NULL;
232 return NULL;
232 }
233 }
233
234
234 l->tail = lt;
235 l->tail = lt;
235 return l;
236 return l;
236 }
237 }
237
238
238 /* calculate the size of resultant text */
239 /* calculate the size of resultant text */
239 static Py_ssize_t calcsize(Py_ssize_t len, struct flist *l)
240 static Py_ssize_t calcsize(Py_ssize_t len, struct flist *l)
240 {
241 {
241 Py_ssize_t outlen = 0, last = 0;
242 Py_ssize_t outlen = 0, last = 0;
242 struct frag *f = l->head;
243 struct frag *f = l->head;
243
244
244 while (f != l->tail) {
245 while (f != l->tail) {
245 if (f->start < last || f->end > len) {
246 if (f->start < last || f->end > len) {
246 if (!PyErr_Occurred())
247 if (!PyErr_Occurred())
247 PyErr_SetString(mpatch_Error,
248 PyErr_SetString(mpatch_Error,
248 "invalid patch");
249 "invalid patch");
249 return -1;
250 return -1;
250 }
251 }
251 outlen += f->start - last;
252 outlen += f->start - last;
252 last = f->end;
253 last = f->end;
253 outlen += f->len;
254 outlen += f->len;
254 f++;
255 f++;
255 }
256 }
256
257
257 outlen += len - last;
258 outlen += len - last;
258 return outlen;
259 return outlen;
259 }
260 }
260
261
261 static int apply(char *buf, const char *orig, Py_ssize_t len, struct flist *l)
262 static int apply(char *buf, const char *orig, Py_ssize_t len, struct flist *l)
262 {
263 {
263 struct frag *f = l->head;
264 struct frag *f = l->head;
264 int last = 0;
265 int last = 0;
265 char *p = buf;
266 char *p = buf;
266
267
267 while (f != l->tail) {
268 while (f != l->tail) {
268 if (f->start < last || f->end > len) {
269 if (f->start < last || f->end > len) {
269 if (!PyErr_Occurred())
270 if (!PyErr_Occurred())
270 PyErr_SetString(mpatch_Error,
271 PyErr_SetString(mpatch_Error,
271 "invalid patch");
272 "invalid patch");
272 return 0;
273 return 0;
273 }
274 }
274 memcpy(p, orig + last, f->start - last);
275 memcpy(p, orig + last, f->start - last);
275 p += f->start - last;
276 p += f->start - last;
276 memcpy(p, f->data, f->len);
277 memcpy(p, f->data, f->len);
277 last = f->end;
278 last = f->end;
278 p += f->len;
279 p += f->len;
279 f++;
280 f++;
280 }
281 }
281 memcpy(p, orig + last, len - last);
282 memcpy(p, orig + last, len - last);
282 return 1;
283 return 1;
283 }
284 }
284
285
285 /* recursively generate a patch of all bins between start and end */
286 /* recursively generate a patch of all bins between start and end */
286 static struct flist *fold(PyObject *bins, Py_ssize_t start, Py_ssize_t end)
287 static struct flist *fold(PyObject *bins, Py_ssize_t start, Py_ssize_t end)
287 {
288 {
288 Py_ssize_t len, blen;
289 Py_ssize_t len, blen;
289 const char *buffer;
290 const char *buffer;
290
291
291 if (start + 1 == end) {
292 if (start + 1 == end) {
292 /* trivial case, output a decoded list */
293 /* trivial case, output a decoded list */
293 PyObject *tmp = PyList_GetItem(bins, start);
294 PyObject *tmp = PyList_GetItem(bins, start);
294 if (!tmp)
295 if (!tmp)
295 return NULL;
296 return NULL;
296 if (PyObject_AsCharBuffer(tmp, &buffer, &blen))
297 if (PyObject_AsCharBuffer(tmp, &buffer, &blen))
297 return NULL;
298 return NULL;
298 return decode(buffer, blen);
299 return decode(buffer, blen);
299 }
300 }
300
301
301 /* divide and conquer, memory management is elsewhere */
302 /* divide and conquer, memory management is elsewhere */
302 len = (end - start) / 2;
303 len = (end - start) / 2;
303 return combine(fold(bins, start, start + len),
304 return combine(fold(bins, start, start + len),
304 fold(bins, start + len, end));
305 fold(bins, start + len, end));
305 }
306 }
306
307
307 static PyObject *
308 static PyObject *
308 patches(PyObject *self, PyObject *args)
309 patches(PyObject *self, PyObject *args)
309 {
310 {
310 PyObject *text, *bins, *result;
311 PyObject *text, *bins, *result;
311 struct flist *patch;
312 struct flist *patch;
312 const char *in;
313 const char *in;
313 char *out;
314 char *out;
314 Py_ssize_t len, outlen, inlen;
315 Py_ssize_t len, outlen, inlen;
315
316
316 if (!PyArg_ParseTuple(args, "OO:mpatch", &text, &bins))
317 if (!PyArg_ParseTuple(args, "OO:mpatch", &text, &bins))
317 return NULL;
318 return NULL;
318
319
319 len = PyList_Size(bins);
320 len = PyList_Size(bins);
320 if (!len) {
321 if (!len) {
321 /* nothing to do */
322 /* nothing to do */
322 Py_INCREF(text);
323 Py_INCREF(text);
323 return text;
324 return text;
324 }
325 }
325
326
326 if (PyObject_AsCharBuffer(text, &in, &inlen))
327 if (PyObject_AsCharBuffer(text, &in, &inlen))
327 return NULL;
328 return NULL;
328
329
329 patch = fold(bins, 0, len);
330 patch = fold(bins, 0, len);
330 if (!patch)
331 if (!patch)
331 return NULL;
332 return NULL;
332
333
333 outlen = calcsize(inlen, patch);
334 outlen = calcsize(inlen, patch);
334 if (outlen < 0) {
335 if (outlen < 0) {
335 result = NULL;
336 result = NULL;
336 goto cleanup;
337 goto cleanup;
337 }
338 }
338 result = PyBytes_FromStringAndSize(NULL, outlen);
339 result = PyBytes_FromStringAndSize(NULL, outlen);
339 if (!result) {
340 if (!result) {
340 result = NULL;
341 result = NULL;
341 goto cleanup;
342 goto cleanup;
342 }
343 }
343 out = PyBytes_AsString(result);
344 out = PyBytes_AsString(result);
344 if (!apply(out, in, inlen, patch)) {
345 if (!apply(out, in, inlen, patch)) {
345 Py_DECREF(result);
346 Py_DECREF(result);
346 result = NULL;
347 result = NULL;
347 }
348 }
348 cleanup:
349 cleanup:
349 lfree(patch);
350 lfree(patch);
350 return result;
351 return result;
351 }
352 }
352
353
353 /* calculate size of a patched file directly */
354 /* calculate size of a patched file directly */
354 static PyObject *
355 static PyObject *
355 patchedsize(PyObject *self, PyObject *args)
356 patchedsize(PyObject *self, PyObject *args)
356 {
357 {
357 long orig, start, end, len, outlen = 0, last = 0;
358 long orig, start, end, len, outlen = 0, last = 0;
358 int patchlen;
359 Py_ssize_t patchlen;
359 char *bin, *binend, *data;
360 char *bin, *binend, *data;
360
361
361 if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen))
362 if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen))
362 return NULL;
363 return NULL;
363
364
364 binend = bin + patchlen;
365 binend = bin + patchlen;
365 data = bin + 12;
366 data = bin + 12;
366
367
367 while (data <= binend) {
368 while (data <= binend) {
368 start = getbe32(bin);
369 start = getbe32(bin);
369 end = getbe32(bin + 4);
370 end = getbe32(bin + 4);
370 len = getbe32(bin + 8);
371 len = getbe32(bin + 8);
371 if (start > end)
372 if (start > end)
372 break; /* sanity check */
373 break; /* sanity check */
373 bin = data + len;
374 bin = data + len;
374 if (bin < data)
375 if (bin < data)
375 break; /* big data + big (bogus) len can wrap around */
376 break; /* big data + big (bogus) len can wrap around */
376 data = bin + 12;
377 data = bin + 12;
377 outlen += start - last;
378 outlen += start - last;
378 last = end;
379 last = end;
379 outlen += len;
380 outlen += len;
380 }
381 }
381
382
382 if (bin != binend) {
383 if (bin != binend) {
383 if (!PyErr_Occurred())
384 if (!PyErr_Occurred())
384 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
385 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
385 return NULL;
386 return NULL;
386 }
387 }
387
388
388 outlen += orig - last;
389 outlen += orig - last;
389 return Py_BuildValue("l", outlen);
390 return Py_BuildValue("l", outlen);
390 }
391 }
391
392
392 static PyMethodDef methods[] = {
393 static PyMethodDef methods[] = {
393 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
394 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
394 {"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"},
395 {"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"},
395 {NULL, NULL}
396 {NULL, NULL}
396 };
397 };
397
398
398 #ifdef IS_PY3K
399 #ifdef IS_PY3K
399 static struct PyModuleDef mpatch_module = {
400 static struct PyModuleDef mpatch_module = {
400 PyModuleDef_HEAD_INIT,
401 PyModuleDef_HEAD_INIT,
401 "mpatch",
402 "mpatch",
402 mpatch_doc,
403 mpatch_doc,
403 -1,
404 -1,
404 methods
405 methods
405 };
406 };
406
407
407 PyMODINIT_FUNC PyInit_mpatch(void)
408 PyMODINIT_FUNC PyInit_mpatch(void)
408 {
409 {
409 PyObject *m;
410 PyObject *m;
410
411
411 m = PyModule_Create(&mpatch_module);
412 m = PyModule_Create(&mpatch_module);
412 if (m == NULL)
413 if (m == NULL)
413 return NULL;
414 return NULL;
414
415
415 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
416 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
416 Py_INCREF(mpatch_Error);
417 Py_INCREF(mpatch_Error);
417 PyModule_AddObject(m, "mpatchError", mpatch_Error);
418 PyModule_AddObject(m, "mpatchError", mpatch_Error);
418
419
419 return m;
420 return m;
420 }
421 }
421 #else
422 #else
422 PyMODINIT_FUNC
423 PyMODINIT_FUNC
423 initmpatch(void)
424 initmpatch(void)
424 {
425 {
425 Py_InitModule3("mpatch", methods, mpatch_doc);
426 Py_InitModule3("mpatch", methods, mpatch_doc);
426 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
427 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
427 }
428 }
428 #endif
429 #endif
General Comments 0
You need to be logged in to leave comments. Login now