##// END OF EJS Templates
mpatch: Define Py_ssize_t for old pythons and use it instead of ssize_t....
Shun-ichi GOTO -
r5459:b0e5f44f default
parent child Browse files
Show More
@@ -1,434 +1,444
1 1 /*
2 2 mpatch.c - efficient binary patching for Mercurial
3 3
4 4 This implements a patch algorithm that's O(m + nlog n) where m is the
5 5 size of the output and n is the number of patches.
6 6
7 7 Given a list of binary patches, it unpacks each into a hunk list,
8 8 then combines the hunk lists with a treewise recursion to form a
9 9 single hunk list. This hunk list is then applied to the original
10 10 text.
11 11
12 12 The text (or binary) fragments are copied directly from their source
13 13 Python objects into a preallocated output string to avoid the
14 14 allocation of intermediate Python objects. Working memory is about 2x
15 15 the total number of hunks.
16 16
17 17 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
18 18
19 19 This software may be used and distributed according to the terms
20 20 of the GNU General Public License, incorporated herein by reference.
21 21 */
22 22
23 23 #include <Python.h>
24 24 #include <stdlib.h>
25 25 #include <string.h>
26 26
27 /* Definitions to get compatibility with python 2.4 and earlier which
28 does not have Py_ssize_t. See also PEP 353.
29 Note: msvc (8 or earlier) does not have ssize_t, so we use Py_ssize_t.
30 */
31 #if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
32 typedef int Py_ssize_t;
33 #define PY_SSIZE_T_MAX INT_MAX
34 #define PY_SSIZE_T_MIN INT_MIN
35 #endif
36
27 37 #ifdef _WIN32
28 38 # ifdef _MSC_VER
29 39 /* msvc 6.0 has problems */
30 40 # define inline __inline
31 41 typedef unsigned long uint32_t;
32 42 # else
33 43 # include <stdint.h>
34 44 # endif
35 45 static uint32_t ntohl(uint32_t x)
36 46 {
37 47 return ((x & 0x000000ffUL) << 24) |
38 48 ((x & 0x0000ff00UL) << 8) |
39 49 ((x & 0x00ff0000UL) >> 8) |
40 50 ((x & 0xff000000UL) >> 24);
41 51 }
42 52 #else
43 53 /* not windows */
44 54 # include <sys/types.h>
45 55 # ifdef __BEOS__
46 56 # include <ByteOrder.h>
47 57 # else
48 58 # include <arpa/inet.h>
49 59 # endif
50 60 # include <inttypes.h>
51 61 #endif
52 62
53 63 static char mpatch_doc[] = "Efficient binary patching.";
54 64 static PyObject *mpatch_Error;
55 65
56 66 struct frag {
57 67 int start, end, len;
58 68 const char *data;
59 69 };
60 70
61 71 struct flist {
62 72 struct frag *base, *head, *tail;
63 73 };
64 74
65 75 static struct flist *lalloc(int size)
66 76 {
67 77 struct flist *a = NULL;
68 78
69 79 if (size < 1)
70 80 size = 1;
71 81
72 82 a = (struct flist *)malloc(sizeof(struct flist));
73 83 if (a) {
74 84 a->base = (struct frag *)malloc(sizeof(struct frag) * size);
75 85 if (a->base) {
76 86 a->head = a->tail = a->base;
77 87 return a;
78 88 }
79 89 free(a);
80 90 a = NULL;
81 91 }
82 92 if (!PyErr_Occurred())
83 93 PyErr_NoMemory();
84 94 return NULL;
85 95 }
86 96
87 97 static void lfree(struct flist *a)
88 98 {
89 99 if (a) {
90 100 free(a->base);
91 101 free(a);
92 102 }
93 103 }
94 104
95 105 static int lsize(struct flist *a)
96 106 {
97 107 return a->tail - a->head;
98 108 }
99 109
100 110 /* move hunks in source that are less cut to dest, compensating
101 111 for changes in offset. the last hunk may be split if necessary.
102 112 */
103 113 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
104 114 {
105 115 struct frag *d = dest->tail, *s = src->head;
106 116 int postend, c, l;
107 117
108 118 while (s != src->tail) {
109 119 if (s->start + offset >= cut)
110 120 break; /* we've gone far enough */
111 121
112 122 postend = offset + s->start + s->len;
113 123 if (postend <= cut) {
114 124 /* save this hunk */
115 125 offset += s->start + s->len - s->end;
116 126 *d++ = *s++;
117 127 }
118 128 else {
119 129 /* break up this hunk */
120 130 c = cut - offset;
121 131 if (s->end < c)
122 132 c = s->end;
123 133 l = cut - offset - s->start;
124 134 if (s->len < l)
125 135 l = s->len;
126 136
127 137 offset += s->start + l - c;
128 138
129 139 d->start = s->start;
130 140 d->end = c;
131 141 d->len = l;
132 142 d->data = s->data;
133 143 d++;
134 144 s->start = c;
135 145 s->len = s->len - l;
136 146 s->data = s->data + l;
137 147
138 148 break;
139 149 }
140 150 }
141 151
142 152 dest->tail = d;
143 153 src->head = s;
144 154 return offset;
145 155 }
146 156
147 157 /* like gather, but with no output list */
148 158 static int discard(struct flist *src, int cut, int offset)
149 159 {
150 160 struct frag *s = src->head;
151 161 int postend, c, l;
152 162
153 163 while (s != src->tail) {
154 164 if (s->start + offset >= cut)
155 165 break;
156 166
157 167 postend = offset + s->start + s->len;
158 168 if (postend <= cut) {
159 169 offset += s->start + s->len - s->end;
160 170 s++;
161 171 }
162 172 else {
163 173 c = cut - offset;
164 174 if (s->end < c)
165 175 c = s->end;
166 176 l = cut - offset - s->start;
167 177 if (s->len < l)
168 178 l = s->len;
169 179
170 180 offset += s->start + l - c;
171 181 s->start = c;
172 182 s->len = s->len - l;
173 183 s->data = s->data + l;
174 184
175 185 break;
176 186 }
177 187 }
178 188
179 189 src->head = s;
180 190 return offset;
181 191 }
182 192
183 193 /* combine hunk lists a and b, while adjusting b for offset changes in a/
184 194 this deletes a and b and returns the resultant list. */
185 195 static struct flist *combine(struct flist *a, struct flist *b)
186 196 {
187 197 struct flist *c = NULL;
188 198 struct frag *bh, *ct;
189 199 int offset = 0, post;
190 200
191 201 if (a && b)
192 202 c = lalloc((lsize(a) + lsize(b)) * 2);
193 203
194 204 if (c) {
195 205
196 206 for (bh = b->head; bh != b->tail; bh++) {
197 207 /* save old hunks */
198 208 offset = gather(c, a, bh->start, offset);
199 209
200 210 /* discard replaced hunks */
201 211 post = discard(a, bh->end, offset);
202 212
203 213 /* insert new hunk */
204 214 ct = c->tail;
205 215 ct->start = bh->start - offset;
206 216 ct->end = bh->end - post;
207 217 ct->len = bh->len;
208 218 ct->data = bh->data;
209 219 c->tail++;
210 220 offset = post;
211 221 }
212 222
213 223 /* hold on to tail from a */
214 224 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
215 225 c->tail += lsize(a);
216 226 }
217 227
218 228 lfree(a);
219 229 lfree(b);
220 230 return c;
221 231 }
222 232
223 233 /* decode a binary patch into a hunk list */
224 234 static struct flist *decode(const char *bin, int len)
225 235 {
226 236 struct flist *l;
227 237 struct frag *lt;
228 238 const char *data = bin + 12, *end = bin + len;
229 239 char decode[12]; /* for dealing with alignment issues */
230 240
231 241 /* assume worst case size, we won't have many of these lists */
232 242 l = lalloc(len / 12);
233 243 if (!l)
234 244 return NULL;
235 245
236 246 lt = l->tail;
237 247
238 248 while (data <= end) {
239 249 memcpy(decode, bin, 12);
240 250 lt->start = ntohl(*(uint32_t *)decode);
241 251 lt->end = ntohl(*(uint32_t *)(decode + 4));
242 252 lt->len = ntohl(*(uint32_t *)(decode + 8));
243 253 if (lt->start > lt->end)
244 254 break; /* sanity check */
245 255 bin = data + lt->len;
246 256 if (bin < data)
247 257 break; /* big data + big (bogus) len can wrap around */
248 258 lt->data = data;
249 259 data = bin + 12;
250 260 lt++;
251 261 }
252 262
253 263 if (bin != end) {
254 264 if (!PyErr_Occurred())
255 265 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
256 266 lfree(l);
257 267 return NULL;
258 268 }
259 269
260 270 l->tail = lt;
261 271 return l;
262 272 }
263 273
264 274 /* calculate the size of resultant text */
265 275 static int calcsize(int len, struct flist *l)
266 276 {
267 277 int outlen = 0, last = 0;
268 278 struct frag *f = l->head;
269 279
270 280 while (f != l->tail) {
271 281 if (f->start < last || f->end > len) {
272 282 if (!PyErr_Occurred())
273 283 PyErr_SetString(mpatch_Error,
274 284 "invalid patch");
275 285 return -1;
276 286 }
277 287 outlen += f->start - last;
278 288 last = f->end;
279 289 outlen += f->len;
280 290 f++;
281 291 }
282 292
283 293 outlen += len - last;
284 294 return outlen;
285 295 }
286 296
287 297 static int apply(char *buf, const char *orig, int len, struct flist *l)
288 298 {
289 299 struct frag *f = l->head;
290 300 int last = 0;
291 301 char *p = buf;
292 302
293 303 while (f != l->tail) {
294 304 if (f->start < last || f->end > len) {
295 305 if (!PyErr_Occurred())
296 306 PyErr_SetString(mpatch_Error,
297 307 "invalid patch");
298 308 return 0;
299 309 }
300 310 memcpy(p, orig + last, f->start - last);
301 311 p += f->start - last;
302 312 memcpy(p, f->data, f->len);
303 313 last = f->end;
304 314 p += f->len;
305 315 f++;
306 316 }
307 317 memcpy(p, orig + last, len - last);
308 318 return 1;
309 319 }
310 320
311 321 /* recursively generate a patch of all bins between start and end */
312 322 static struct flist *fold(PyObject *bins, int start, int end)
313 323 {
314 324 int len;
315 ssize_t blen;
325 Py_ssize_t blen;
316 326 const char *buffer;
317 327
318 328 if (start + 1 == end) {
319 329 /* trivial case, output a decoded list */
320 330 PyObject *tmp = PyList_GetItem(bins, start);
321 331 if (!tmp)
322 332 return NULL;
323 333 if (PyObject_AsCharBuffer(tmp, &buffer, &blen))
324 334 return NULL;
325 335 return decode(buffer, blen);
326 336 }
327 337
328 338 /* divide and conquer, memory management is elsewhere */
329 339 len = (end - start) / 2;
330 340 return combine(fold(bins, start, start + len),
331 341 fold(bins, start + len, end));
332 342 }
333 343
334 344 static PyObject *
335 345 patches(PyObject *self, PyObject *args)
336 346 {
337 347 PyObject *text, *bins, *result;
338 348 struct flist *patch;
339 349 const char *in;
340 350 char *out;
341 351 int len, outlen;
342 ssize_t inlen;
352 Py_ssize_t inlen;
343 353
344 354 if (!PyArg_ParseTuple(args, "OO:mpatch", &text, &bins))
345 355 return NULL;
346 356
347 357 len = PyList_Size(bins);
348 358 if (!len) {
349 359 /* nothing to do */
350 360 Py_INCREF(text);
351 361 return text;
352 362 }
353 363
354 364 if (PyObject_AsCharBuffer(text, &in, &inlen))
355 365 return NULL;
356 366
357 367 patch = fold(bins, 0, len);
358 368 if (!patch)
359 369 return NULL;
360 370
361 371 outlen = calcsize(inlen, patch);
362 372 if (outlen < 0) {
363 373 result = NULL;
364 374 goto cleanup;
365 375 }
366 376 result = PyString_FromStringAndSize(NULL, outlen);
367 377 if (!result) {
368 378 result = NULL;
369 379 goto cleanup;
370 380 }
371 381 out = PyString_AsString(result);
372 382 if (!apply(out, in, inlen, patch)) {
373 383 Py_DECREF(result);
374 384 result = NULL;
375 385 }
376 386 cleanup:
377 387 lfree(patch);
378 388 return result;
379 389 }
380 390
381 391 /* calculate size of a patched file directly */
382 392 static PyObject *
383 393 patchedsize(PyObject *self, PyObject *args)
384 394 {
385 395 long orig, start, end, len, outlen = 0, last = 0;
386 396 int patchlen;
387 397 char *bin, *binend, *data;
388 398 char decode[12]; /* for dealing with alignment issues */
389 399
390 400 if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen))
391 401 return NULL;
392 402
393 403 binend = bin + patchlen;
394 404 data = bin + 12;
395 405
396 406 while (data <= binend) {
397 407 memcpy(decode, bin, 12);
398 408 start = ntohl(*(uint32_t *)decode);
399 409 end = ntohl(*(uint32_t *)(decode + 4));
400 410 len = ntohl(*(uint32_t *)(decode + 8));
401 411 if (start > end)
402 412 break; /* sanity check */
403 413 bin = data + len;
404 414 if (bin < data)
405 415 break; /* big data + big (bogus) len can wrap around */
406 416 data = bin + 12;
407 417 outlen += start - last;
408 418 last = end;
409 419 outlen += len;
410 420 }
411 421
412 422 if (bin != binend) {
413 423 if (!PyErr_Occurred())
414 424 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
415 425 return NULL;
416 426 }
417 427
418 428 outlen += orig - last;
419 429 return Py_BuildValue("l", outlen);
420 430 }
421 431
422 432 static PyMethodDef methods[] = {
423 433 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
424 434 {"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"},
425 435 {NULL, NULL}
426 436 };
427 437
428 438 PyMODINIT_FUNC
429 439 initmpatch(void)
430 440 {
431 441 Py_InitModule3("mpatch", methods, mpatch_doc);
432 442 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
433 443 }
434 444
General Comments 0
You need to be logged in to leave comments. Login now