##// END OF EJS Templates
mpatch: Define Py_ssize_t for old pythons and use it instead of ssize_t....
Shun-ichi GOTO -
r5459:b0e5f44f default
parent child Browse files
Show More
@@ -1,434 +1,444
1 /*
1 /*
2 mpatch.c - efficient binary patching for Mercurial
2 mpatch.c - efficient binary patching for Mercurial
3
3
4 This implements a patch algorithm that's O(m + nlog n) where m is the
4 This implements a patch algorithm that's O(m + nlog n) where m is the
5 size of the output and n is the number of patches.
5 size of the output and n is the number of patches.
6
6
7 Given a list of binary patches, it unpacks each into a hunk list,
7 Given a list of binary patches, it unpacks each into a hunk list,
8 then combines the hunk lists with a treewise recursion to form a
8 then combines the hunk lists with a treewise recursion to form a
9 single hunk list. This hunk list is then applied to the original
9 single hunk list. This hunk list is then applied to the original
10 text.
10 text.
11
11
12 The text (or binary) fragments are copied directly from their source
12 The text (or binary) fragments are copied directly from their source
13 Python objects into a preallocated output string to avoid the
13 Python objects into a preallocated output string to avoid the
14 allocation of intermediate Python objects. Working memory is about 2x
14 allocation of intermediate Python objects. Working memory is about 2x
15 the total number of hunks.
15 the total number of hunks.
16
16
17 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
17 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
18
18
19 This software may be used and distributed according to the terms
19 This software may be used and distributed according to the terms
20 of the GNU General Public License, incorporated herein by reference.
20 of the GNU General Public License, incorporated herein by reference.
21 */
21 */
22
22
23 #include <Python.h>
23 #include <Python.h>
24 #include <stdlib.h>
24 #include <stdlib.h>
25 #include <string.h>
25 #include <string.h>
26
26
27 /* Definitions to get compatibility with python 2.4 and earlier which
28 does not have Py_ssize_t. See also PEP 353.
29 Note: msvc (8 or earlier) does not have ssize_t, so we use Py_ssize_t.
30 */
31 #if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
32 typedef int Py_ssize_t;
33 #define PY_SSIZE_T_MAX INT_MAX
34 #define PY_SSIZE_T_MIN INT_MIN
35 #endif
36
27 #ifdef _WIN32
37 #ifdef _WIN32
28 # ifdef _MSC_VER
38 # ifdef _MSC_VER
29 /* msvc 6.0 has problems */
39 /* msvc 6.0 has problems */
30 # define inline __inline
40 # define inline __inline
31 typedef unsigned long uint32_t;
41 typedef unsigned long uint32_t;
32 # else
42 # else
33 # include <stdint.h>
43 # include <stdint.h>
34 # endif
44 # endif
35 static uint32_t ntohl(uint32_t x)
45 static uint32_t ntohl(uint32_t x)
36 {
46 {
37 return ((x & 0x000000ffUL) << 24) |
47 return ((x & 0x000000ffUL) << 24) |
38 ((x & 0x0000ff00UL) << 8) |
48 ((x & 0x0000ff00UL) << 8) |
39 ((x & 0x00ff0000UL) >> 8) |
49 ((x & 0x00ff0000UL) >> 8) |
40 ((x & 0xff000000UL) >> 24);
50 ((x & 0xff000000UL) >> 24);
41 }
51 }
42 #else
52 #else
43 /* not windows */
53 /* not windows */
44 # include <sys/types.h>
54 # include <sys/types.h>
45 # ifdef __BEOS__
55 # ifdef __BEOS__
46 # include <ByteOrder.h>
56 # include <ByteOrder.h>
47 # else
57 # else
48 # include <arpa/inet.h>
58 # include <arpa/inet.h>
49 # endif
59 # endif
50 # include <inttypes.h>
60 # include <inttypes.h>
51 #endif
61 #endif
52
62
53 static char mpatch_doc[] = "Efficient binary patching.";
63 static char mpatch_doc[] = "Efficient binary patching.";
54 static PyObject *mpatch_Error;
64 static PyObject *mpatch_Error;
55
65
56 struct frag {
66 struct frag {
57 int start, end, len;
67 int start, end, len;
58 const char *data;
68 const char *data;
59 };
69 };
60
70
61 struct flist {
71 struct flist {
62 struct frag *base, *head, *tail;
72 struct frag *base, *head, *tail;
63 };
73 };
64
74
65 static struct flist *lalloc(int size)
75 static struct flist *lalloc(int size)
66 {
76 {
67 struct flist *a = NULL;
77 struct flist *a = NULL;
68
78
69 if (size < 1)
79 if (size < 1)
70 size = 1;
80 size = 1;
71
81
72 a = (struct flist *)malloc(sizeof(struct flist));
82 a = (struct flist *)malloc(sizeof(struct flist));
73 if (a) {
83 if (a) {
74 a->base = (struct frag *)malloc(sizeof(struct frag) * size);
84 a->base = (struct frag *)malloc(sizeof(struct frag) * size);
75 if (a->base) {
85 if (a->base) {
76 a->head = a->tail = a->base;
86 a->head = a->tail = a->base;
77 return a;
87 return a;
78 }
88 }
79 free(a);
89 free(a);
80 a = NULL;
90 a = NULL;
81 }
91 }
82 if (!PyErr_Occurred())
92 if (!PyErr_Occurred())
83 PyErr_NoMemory();
93 PyErr_NoMemory();
84 return NULL;
94 return NULL;
85 }
95 }
86
96
87 static void lfree(struct flist *a)
97 static void lfree(struct flist *a)
88 {
98 {
89 if (a) {
99 if (a) {
90 free(a->base);
100 free(a->base);
91 free(a);
101 free(a);
92 }
102 }
93 }
103 }
94
104
95 static int lsize(struct flist *a)
105 static int lsize(struct flist *a)
96 {
106 {
97 return a->tail - a->head;
107 return a->tail - a->head;
98 }
108 }
99
109
100 /* move hunks in source that are less cut to dest, compensating
110 /* move hunks in source that are less cut to dest, compensating
101 for changes in offset. the last hunk may be split if necessary.
111 for changes in offset. the last hunk may be split if necessary.
102 */
112 */
103 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
113 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
104 {
114 {
105 struct frag *d = dest->tail, *s = src->head;
115 struct frag *d = dest->tail, *s = src->head;
106 int postend, c, l;
116 int postend, c, l;
107
117
108 while (s != src->tail) {
118 while (s != src->tail) {
109 if (s->start + offset >= cut)
119 if (s->start + offset >= cut)
110 break; /* we've gone far enough */
120 break; /* we've gone far enough */
111
121
112 postend = offset + s->start + s->len;
122 postend = offset + s->start + s->len;
113 if (postend <= cut) {
123 if (postend <= cut) {
114 /* save this hunk */
124 /* save this hunk */
115 offset += s->start + s->len - s->end;
125 offset += s->start + s->len - s->end;
116 *d++ = *s++;
126 *d++ = *s++;
117 }
127 }
118 else {
128 else {
119 /* break up this hunk */
129 /* break up this hunk */
120 c = cut - offset;
130 c = cut - offset;
121 if (s->end < c)
131 if (s->end < c)
122 c = s->end;
132 c = s->end;
123 l = cut - offset - s->start;
133 l = cut - offset - s->start;
124 if (s->len < l)
134 if (s->len < l)
125 l = s->len;
135 l = s->len;
126
136
127 offset += s->start + l - c;
137 offset += s->start + l - c;
128
138
129 d->start = s->start;
139 d->start = s->start;
130 d->end = c;
140 d->end = c;
131 d->len = l;
141 d->len = l;
132 d->data = s->data;
142 d->data = s->data;
133 d++;
143 d++;
134 s->start = c;
144 s->start = c;
135 s->len = s->len - l;
145 s->len = s->len - l;
136 s->data = s->data + l;
146 s->data = s->data + l;
137
147
138 break;
148 break;
139 }
149 }
140 }
150 }
141
151
142 dest->tail = d;
152 dest->tail = d;
143 src->head = s;
153 src->head = s;
144 return offset;
154 return offset;
145 }
155 }
146
156
147 /* like gather, but with no output list */
157 /* like gather, but with no output list */
148 static int discard(struct flist *src, int cut, int offset)
158 static int discard(struct flist *src, int cut, int offset)
149 {
159 {
150 struct frag *s = src->head;
160 struct frag *s = src->head;
151 int postend, c, l;
161 int postend, c, l;
152
162
153 while (s != src->tail) {
163 while (s != src->tail) {
154 if (s->start + offset >= cut)
164 if (s->start + offset >= cut)
155 break;
165 break;
156
166
157 postend = offset + s->start + s->len;
167 postend = offset + s->start + s->len;
158 if (postend <= cut) {
168 if (postend <= cut) {
159 offset += s->start + s->len - s->end;
169 offset += s->start + s->len - s->end;
160 s++;
170 s++;
161 }
171 }
162 else {
172 else {
163 c = cut - offset;
173 c = cut - offset;
164 if (s->end < c)
174 if (s->end < c)
165 c = s->end;
175 c = s->end;
166 l = cut - offset - s->start;
176 l = cut - offset - s->start;
167 if (s->len < l)
177 if (s->len < l)
168 l = s->len;
178 l = s->len;
169
179
170 offset += s->start + l - c;
180 offset += s->start + l - c;
171 s->start = c;
181 s->start = c;
172 s->len = s->len - l;
182 s->len = s->len - l;
173 s->data = s->data + l;
183 s->data = s->data + l;
174
184
175 break;
185 break;
176 }
186 }
177 }
187 }
178
188
179 src->head = s;
189 src->head = s;
180 return offset;
190 return offset;
181 }
191 }
182
192
183 /* combine hunk lists a and b, while adjusting b for offset changes in a/
193 /* combine hunk lists a and b, while adjusting b for offset changes in a/
184 this deletes a and b and returns the resultant list. */
194 this deletes a and b and returns the resultant list. */
185 static struct flist *combine(struct flist *a, struct flist *b)
195 static struct flist *combine(struct flist *a, struct flist *b)
186 {
196 {
187 struct flist *c = NULL;
197 struct flist *c = NULL;
188 struct frag *bh, *ct;
198 struct frag *bh, *ct;
189 int offset = 0, post;
199 int offset = 0, post;
190
200
191 if (a && b)
201 if (a && b)
192 c = lalloc((lsize(a) + lsize(b)) * 2);
202 c = lalloc((lsize(a) + lsize(b)) * 2);
193
203
194 if (c) {
204 if (c) {
195
205
196 for (bh = b->head; bh != b->tail; bh++) {
206 for (bh = b->head; bh != b->tail; bh++) {
197 /* save old hunks */
207 /* save old hunks */
198 offset = gather(c, a, bh->start, offset);
208 offset = gather(c, a, bh->start, offset);
199
209
200 /* discard replaced hunks */
210 /* discard replaced hunks */
201 post = discard(a, bh->end, offset);
211 post = discard(a, bh->end, offset);
202
212
203 /* insert new hunk */
213 /* insert new hunk */
204 ct = c->tail;
214 ct = c->tail;
205 ct->start = bh->start - offset;
215 ct->start = bh->start - offset;
206 ct->end = bh->end - post;
216 ct->end = bh->end - post;
207 ct->len = bh->len;
217 ct->len = bh->len;
208 ct->data = bh->data;
218 ct->data = bh->data;
209 c->tail++;
219 c->tail++;
210 offset = post;
220 offset = post;
211 }
221 }
212
222
213 /* hold on to tail from a */
223 /* hold on to tail from a */
214 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
224 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
215 c->tail += lsize(a);
225 c->tail += lsize(a);
216 }
226 }
217
227
218 lfree(a);
228 lfree(a);
219 lfree(b);
229 lfree(b);
220 return c;
230 return c;
221 }
231 }
222
232
223 /* decode a binary patch into a hunk list */
233 /* decode a binary patch into a hunk list */
224 static struct flist *decode(const char *bin, int len)
234 static struct flist *decode(const char *bin, int len)
225 {
235 {
226 struct flist *l;
236 struct flist *l;
227 struct frag *lt;
237 struct frag *lt;
228 const char *data = bin + 12, *end = bin + len;
238 const char *data = bin + 12, *end = bin + len;
229 char decode[12]; /* for dealing with alignment issues */
239 char decode[12]; /* for dealing with alignment issues */
230
240
231 /* assume worst case size, we won't have many of these lists */
241 /* assume worst case size, we won't have many of these lists */
232 l = lalloc(len / 12);
242 l = lalloc(len / 12);
233 if (!l)
243 if (!l)
234 return NULL;
244 return NULL;
235
245
236 lt = l->tail;
246 lt = l->tail;
237
247
238 while (data <= end) {
248 while (data <= end) {
239 memcpy(decode, bin, 12);
249 memcpy(decode, bin, 12);
240 lt->start = ntohl(*(uint32_t *)decode);
250 lt->start = ntohl(*(uint32_t *)decode);
241 lt->end = ntohl(*(uint32_t *)(decode + 4));
251 lt->end = ntohl(*(uint32_t *)(decode + 4));
242 lt->len = ntohl(*(uint32_t *)(decode + 8));
252 lt->len = ntohl(*(uint32_t *)(decode + 8));
243 if (lt->start > lt->end)
253 if (lt->start > lt->end)
244 break; /* sanity check */
254 break; /* sanity check */
245 bin = data + lt->len;
255 bin = data + lt->len;
246 if (bin < data)
256 if (bin < data)
247 break; /* big data + big (bogus) len can wrap around */
257 break; /* big data + big (bogus) len can wrap around */
248 lt->data = data;
258 lt->data = data;
249 data = bin + 12;
259 data = bin + 12;
250 lt++;
260 lt++;
251 }
261 }
252
262
253 if (bin != end) {
263 if (bin != end) {
254 if (!PyErr_Occurred())
264 if (!PyErr_Occurred())
255 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
265 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
256 lfree(l);
266 lfree(l);
257 return NULL;
267 return NULL;
258 }
268 }
259
269
260 l->tail = lt;
270 l->tail = lt;
261 return l;
271 return l;
262 }
272 }
263
273
264 /* calculate the size of resultant text */
274 /* calculate the size of resultant text */
265 static int calcsize(int len, struct flist *l)
275 static int calcsize(int len, struct flist *l)
266 {
276 {
267 int outlen = 0, last = 0;
277 int outlen = 0, last = 0;
268 struct frag *f = l->head;
278 struct frag *f = l->head;
269
279
270 while (f != l->tail) {
280 while (f != l->tail) {
271 if (f->start < last || f->end > len) {
281 if (f->start < last || f->end > len) {
272 if (!PyErr_Occurred())
282 if (!PyErr_Occurred())
273 PyErr_SetString(mpatch_Error,
283 PyErr_SetString(mpatch_Error,
274 "invalid patch");
284 "invalid patch");
275 return -1;
285 return -1;
276 }
286 }
277 outlen += f->start - last;
287 outlen += f->start - last;
278 last = f->end;
288 last = f->end;
279 outlen += f->len;
289 outlen += f->len;
280 f++;
290 f++;
281 }
291 }
282
292
283 outlen += len - last;
293 outlen += len - last;
284 return outlen;
294 return outlen;
285 }
295 }
286
296
287 static int apply(char *buf, const char *orig, int len, struct flist *l)
297 static int apply(char *buf, const char *orig, int len, struct flist *l)
288 {
298 {
289 struct frag *f = l->head;
299 struct frag *f = l->head;
290 int last = 0;
300 int last = 0;
291 char *p = buf;
301 char *p = buf;
292
302
293 while (f != l->tail) {
303 while (f != l->tail) {
294 if (f->start < last || f->end > len) {
304 if (f->start < last || f->end > len) {
295 if (!PyErr_Occurred())
305 if (!PyErr_Occurred())
296 PyErr_SetString(mpatch_Error,
306 PyErr_SetString(mpatch_Error,
297 "invalid patch");
307 "invalid patch");
298 return 0;
308 return 0;
299 }
309 }
300 memcpy(p, orig + last, f->start - last);
310 memcpy(p, orig + last, f->start - last);
301 p += f->start - last;
311 p += f->start - last;
302 memcpy(p, f->data, f->len);
312 memcpy(p, f->data, f->len);
303 last = f->end;
313 last = f->end;
304 p += f->len;
314 p += f->len;
305 f++;
315 f++;
306 }
316 }
307 memcpy(p, orig + last, len - last);
317 memcpy(p, orig + last, len - last);
308 return 1;
318 return 1;
309 }
319 }
310
320
311 /* recursively generate a patch of all bins between start and end */
321 /* recursively generate a patch of all bins between start and end */
312 static struct flist *fold(PyObject *bins, int start, int end)
322 static struct flist *fold(PyObject *bins, int start, int end)
313 {
323 {
314 int len;
324 int len;
315 ssize_t blen;
325 Py_ssize_t blen;
316 const char *buffer;
326 const char *buffer;
317
327
318 if (start + 1 == end) {
328 if (start + 1 == end) {
319 /* trivial case, output a decoded list */
329 /* trivial case, output a decoded list */
320 PyObject *tmp = PyList_GetItem(bins, start);
330 PyObject *tmp = PyList_GetItem(bins, start);
321 if (!tmp)
331 if (!tmp)
322 return NULL;
332 return NULL;
323 if (PyObject_AsCharBuffer(tmp, &buffer, &blen))
333 if (PyObject_AsCharBuffer(tmp, &buffer, &blen))
324 return NULL;
334 return NULL;
325 return decode(buffer, blen);
335 return decode(buffer, blen);
326 }
336 }
327
337
328 /* divide and conquer, memory management is elsewhere */
338 /* divide and conquer, memory management is elsewhere */
329 len = (end - start) / 2;
339 len = (end - start) / 2;
330 return combine(fold(bins, start, start + len),
340 return combine(fold(bins, start, start + len),
331 fold(bins, start + len, end));
341 fold(bins, start + len, end));
332 }
342 }
333
343
334 static PyObject *
344 static PyObject *
335 patches(PyObject *self, PyObject *args)
345 patches(PyObject *self, PyObject *args)
336 {
346 {
337 PyObject *text, *bins, *result;
347 PyObject *text, *bins, *result;
338 struct flist *patch;
348 struct flist *patch;
339 const char *in;
349 const char *in;
340 char *out;
350 char *out;
341 int len, outlen;
351 int len, outlen;
342 ssize_t inlen;
352 Py_ssize_t inlen;
343
353
344 if (!PyArg_ParseTuple(args, "OO:mpatch", &text, &bins))
354 if (!PyArg_ParseTuple(args, "OO:mpatch", &text, &bins))
345 return NULL;
355 return NULL;
346
356
347 len = PyList_Size(bins);
357 len = PyList_Size(bins);
348 if (!len) {
358 if (!len) {
349 /* nothing to do */
359 /* nothing to do */
350 Py_INCREF(text);
360 Py_INCREF(text);
351 return text;
361 return text;
352 }
362 }
353
363
354 if (PyObject_AsCharBuffer(text, &in, &inlen))
364 if (PyObject_AsCharBuffer(text, &in, &inlen))
355 return NULL;
365 return NULL;
356
366
357 patch = fold(bins, 0, len);
367 patch = fold(bins, 0, len);
358 if (!patch)
368 if (!patch)
359 return NULL;
369 return NULL;
360
370
361 outlen = calcsize(inlen, patch);
371 outlen = calcsize(inlen, patch);
362 if (outlen < 0) {
372 if (outlen < 0) {
363 result = NULL;
373 result = NULL;
364 goto cleanup;
374 goto cleanup;
365 }
375 }
366 result = PyString_FromStringAndSize(NULL, outlen);
376 result = PyString_FromStringAndSize(NULL, outlen);
367 if (!result) {
377 if (!result) {
368 result = NULL;
378 result = NULL;
369 goto cleanup;
379 goto cleanup;
370 }
380 }
371 out = PyString_AsString(result);
381 out = PyString_AsString(result);
372 if (!apply(out, in, inlen, patch)) {
382 if (!apply(out, in, inlen, patch)) {
373 Py_DECREF(result);
383 Py_DECREF(result);
374 result = NULL;
384 result = NULL;
375 }
385 }
376 cleanup:
386 cleanup:
377 lfree(patch);
387 lfree(patch);
378 return result;
388 return result;
379 }
389 }
380
390
381 /* calculate size of a patched file directly */
391 /* calculate size of a patched file directly */
382 static PyObject *
392 static PyObject *
383 patchedsize(PyObject *self, PyObject *args)
393 patchedsize(PyObject *self, PyObject *args)
384 {
394 {
385 long orig, start, end, len, outlen = 0, last = 0;
395 long orig, start, end, len, outlen = 0, last = 0;
386 int patchlen;
396 int patchlen;
387 char *bin, *binend, *data;
397 char *bin, *binend, *data;
388 char decode[12]; /* for dealing with alignment issues */
398 char decode[12]; /* for dealing with alignment issues */
389
399
390 if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen))
400 if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen))
391 return NULL;
401 return NULL;
392
402
393 binend = bin + patchlen;
403 binend = bin + patchlen;
394 data = bin + 12;
404 data = bin + 12;
395
405
396 while (data <= binend) {
406 while (data <= binend) {
397 memcpy(decode, bin, 12);
407 memcpy(decode, bin, 12);
398 start = ntohl(*(uint32_t *)decode);
408 start = ntohl(*(uint32_t *)decode);
399 end = ntohl(*(uint32_t *)(decode + 4));
409 end = ntohl(*(uint32_t *)(decode + 4));
400 len = ntohl(*(uint32_t *)(decode + 8));
410 len = ntohl(*(uint32_t *)(decode + 8));
401 if (start > end)
411 if (start > end)
402 break; /* sanity check */
412 break; /* sanity check */
403 bin = data + len;
413 bin = data + len;
404 if (bin < data)
414 if (bin < data)
405 break; /* big data + big (bogus) len can wrap around */
415 break; /* big data + big (bogus) len can wrap around */
406 data = bin + 12;
416 data = bin + 12;
407 outlen += start - last;
417 outlen += start - last;
408 last = end;
418 last = end;
409 outlen += len;
419 outlen += len;
410 }
420 }
411
421
412 if (bin != binend) {
422 if (bin != binend) {
413 if (!PyErr_Occurred())
423 if (!PyErr_Occurred())
414 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
424 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
415 return NULL;
425 return NULL;
416 }
426 }
417
427
418 outlen += orig - last;
428 outlen += orig - last;
419 return Py_BuildValue("l", outlen);
429 return Py_BuildValue("l", outlen);
420 }
430 }
421
431
422 static PyMethodDef methods[] = {
432 static PyMethodDef methods[] = {
423 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
433 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
424 {"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"},
434 {"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"},
425 {NULL, NULL}
435 {NULL, NULL}
426 };
436 };
427
437
428 PyMODINIT_FUNC
438 PyMODINIT_FUNC
429 initmpatch(void)
439 initmpatch(void)
430 {
440 {
431 Py_InitModule3("mpatch", methods, mpatch_doc);
441 Py_InitModule3("mpatch", methods, mpatch_doc);
432 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
442 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
433 }
443 }
434
444
General Comments 0
You need to be logged in to leave comments. Login now