##// END OF EJS Templates
mpatch.c: Added preliminary support for py3k....
Renato Cunha -
r11360:2ac98313 default
parent child Browse files
Show More
@@ -1,444 +1,470
1 1 /*
2 2 mpatch.c - efficient binary patching for Mercurial
3 3
4 4 This implements a patch algorithm that's O(m + nlog n) where m is the
5 5 size of the output and n is the number of patches.
6 6
7 7 Given a list of binary patches, it unpacks each into a hunk list,
8 8 then combines the hunk lists with a treewise recursion to form a
9 9 single hunk list. This hunk list is then applied to the original
10 10 text.
11 11
12 12 The text (or binary) fragments are copied directly from their source
13 13 Python objects into a preallocated output string to avoid the
14 14 allocation of intermediate Python objects. Working memory is about 2x
15 15 the total number of hunks.
16 16
17 17 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
18 18
19 19 This software may be used and distributed according to the terms
20 20 of the GNU General Public License, incorporated herein by reference.
21 21 */
22 22
23 23 #include <Python.h>
24 24 #include <stdlib.h>
25 25 #include <string.h>
26 26
27 #include "util.h"
28
27 29 /* Definitions to get compatibility with python 2.4 and earlier which
28 30 does not have Py_ssize_t. See also PEP 353.
29 31 Note: msvc (8 or earlier) does not have ssize_t, so we use Py_ssize_t.
30 32 */
31 33 #if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
32 34 typedef int Py_ssize_t;
33 35 #define PY_SSIZE_T_MAX INT_MAX
34 36 #define PY_SSIZE_T_MIN INT_MIN
35 37 #endif
36 38
37 39 #ifdef _WIN32
38 40 #ifdef _MSC_VER
39 41 /* msvc 6.0 has problems */
40 42 #define inline __inline
41 43 typedef unsigned long uint32_t;
42 44 #else
43 45 #include <stdint.h>
44 46 #endif
45 47 static uint32_t ntohl(uint32_t x)
46 48 {
47 49 return ((x & 0x000000ffUL) << 24) |
48 50 ((x & 0x0000ff00UL) << 8) |
49 51 ((x & 0x00ff0000UL) >> 8) |
50 52 ((x & 0xff000000UL) >> 24);
51 53 }
52 54 #else
53 55 /* not windows */
54 56 #include <sys/types.h>
55 57 #if defined __BEOS__ && !defined __HAIKU__
56 58 #include <ByteOrder.h>
57 59 #else
58 60 #include <arpa/inet.h>
59 61 #endif
60 62 #include <inttypes.h>
61 63 #endif
62 64
63 65 static char mpatch_doc[] = "Efficient binary patching.";
64 66 static PyObject *mpatch_Error;
65 67
66 68 struct frag {
67 69 int start, end, len;
68 70 const char *data;
69 71 };
70 72
71 73 struct flist {
72 74 struct frag *base, *head, *tail;
73 75 };
74 76
75 77 static struct flist *lalloc(int size)
76 78 {
77 79 struct flist *a = NULL;
78 80
79 81 if (size < 1)
80 82 size = 1;
81 83
82 84 a = (struct flist *)malloc(sizeof(struct flist));
83 85 if (a) {
84 86 a->base = (struct frag *)malloc(sizeof(struct frag) * size);
85 87 if (a->base) {
86 88 a->head = a->tail = a->base;
87 89 return a;
88 90 }
89 91 free(a);
90 92 a = NULL;
91 93 }
92 94 if (!PyErr_Occurred())
93 95 PyErr_NoMemory();
94 96 return NULL;
95 97 }
96 98
97 99 static void lfree(struct flist *a)
98 100 {
99 101 if (a) {
100 102 free(a->base);
101 103 free(a);
102 104 }
103 105 }
104 106
105 107 static int lsize(struct flist *a)
106 108 {
107 109 return a->tail - a->head;
108 110 }
109 111
110 112 /* move hunks in source that are less cut to dest, compensating
111 113 for changes in offset. the last hunk may be split if necessary.
112 114 */
113 115 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
114 116 {
115 117 struct frag *d = dest->tail, *s = src->head;
116 118 int postend, c, l;
117 119
118 120 while (s != src->tail) {
119 121 if (s->start + offset >= cut)
120 122 break; /* we've gone far enough */
121 123
122 124 postend = offset + s->start + s->len;
123 125 if (postend <= cut) {
124 126 /* save this hunk */
125 127 offset += s->start + s->len - s->end;
126 128 *d++ = *s++;
127 129 }
128 130 else {
129 131 /* break up this hunk */
130 132 c = cut - offset;
131 133 if (s->end < c)
132 134 c = s->end;
133 135 l = cut - offset - s->start;
134 136 if (s->len < l)
135 137 l = s->len;
136 138
137 139 offset += s->start + l - c;
138 140
139 141 d->start = s->start;
140 142 d->end = c;
141 143 d->len = l;
142 144 d->data = s->data;
143 145 d++;
144 146 s->start = c;
145 147 s->len = s->len - l;
146 148 s->data = s->data + l;
147 149
148 150 break;
149 151 }
150 152 }
151 153
152 154 dest->tail = d;
153 155 src->head = s;
154 156 return offset;
155 157 }
156 158
157 159 /* like gather, but with no output list */
158 160 static int discard(struct flist *src, int cut, int offset)
159 161 {
160 162 struct frag *s = src->head;
161 163 int postend, c, l;
162 164
163 165 while (s != src->tail) {
164 166 if (s->start + offset >= cut)
165 167 break;
166 168
167 169 postend = offset + s->start + s->len;
168 170 if (postend <= cut) {
169 171 offset += s->start + s->len - s->end;
170 172 s++;
171 173 }
172 174 else {
173 175 c = cut - offset;
174 176 if (s->end < c)
175 177 c = s->end;
176 178 l = cut - offset - s->start;
177 179 if (s->len < l)
178 180 l = s->len;
179 181
180 182 offset += s->start + l - c;
181 183 s->start = c;
182 184 s->len = s->len - l;
183 185 s->data = s->data + l;
184 186
185 187 break;
186 188 }
187 189 }
188 190
189 191 src->head = s;
190 192 return offset;
191 193 }
192 194
193 195 /* combine hunk lists a and b, while adjusting b for offset changes in a/
194 196 this deletes a and b and returns the resultant list. */
195 197 static struct flist *combine(struct flist *a, struct flist *b)
196 198 {
197 199 struct flist *c = NULL;
198 200 struct frag *bh, *ct;
199 201 int offset = 0, post;
200 202
201 203 if (a && b)
202 204 c = lalloc((lsize(a) + lsize(b)) * 2);
203 205
204 206 if (c) {
205 207
206 208 for (bh = b->head; bh != b->tail; bh++) {
207 209 /* save old hunks */
208 210 offset = gather(c, a, bh->start, offset);
209 211
210 212 /* discard replaced hunks */
211 213 post = discard(a, bh->end, offset);
212 214
213 215 /* insert new hunk */
214 216 ct = c->tail;
215 217 ct->start = bh->start - offset;
216 218 ct->end = bh->end - post;
217 219 ct->len = bh->len;
218 220 ct->data = bh->data;
219 221 c->tail++;
220 222 offset = post;
221 223 }
222 224
223 225 /* hold on to tail from a */
224 226 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
225 227 c->tail += lsize(a);
226 228 }
227 229
228 230 lfree(a);
229 231 lfree(b);
230 232 return c;
231 233 }
232 234
233 235 /* decode a binary patch into a hunk list */
234 236 static struct flist *decode(const char *bin, int len)
235 237 {
236 238 struct flist *l;
237 239 struct frag *lt;
238 240 const char *data = bin + 12, *end = bin + len;
239 241 char decode[12]; /* for dealing with alignment issues */
240 242
241 243 /* assume worst case size, we won't have many of these lists */
242 244 l = lalloc(len / 12);
243 245 if (!l)
244 246 return NULL;
245 247
246 248 lt = l->tail;
247 249
248 250 while (data <= end) {
249 251 memcpy(decode, bin, 12);
250 252 lt->start = ntohl(*(uint32_t *)decode);
251 253 lt->end = ntohl(*(uint32_t *)(decode + 4));
252 254 lt->len = ntohl(*(uint32_t *)(decode + 8));
253 255 if (lt->start > lt->end)
254 256 break; /* sanity check */
255 257 bin = data + lt->len;
256 258 if (bin < data)
257 259 break; /* big data + big (bogus) len can wrap around */
258 260 lt->data = data;
259 261 data = bin + 12;
260 262 lt++;
261 263 }
262 264
263 265 if (bin != end) {
264 266 if (!PyErr_Occurred())
265 267 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
266 268 lfree(l);
267 269 return NULL;
268 270 }
269 271
270 272 l->tail = lt;
271 273 return l;
272 274 }
273 275
274 276 /* calculate the size of resultant text */
275 277 static int calcsize(int len, struct flist *l)
276 278 {
277 279 int outlen = 0, last = 0;
278 280 struct frag *f = l->head;
279 281
280 282 while (f != l->tail) {
281 283 if (f->start < last || f->end > len) {
282 284 if (!PyErr_Occurred())
283 285 PyErr_SetString(mpatch_Error,
284 286 "invalid patch");
285 287 return -1;
286 288 }
287 289 outlen += f->start - last;
288 290 last = f->end;
289 291 outlen += f->len;
290 292 f++;
291 293 }
292 294
293 295 outlen += len - last;
294 296 return outlen;
295 297 }
296 298
297 299 static int apply(char *buf, const char *orig, int len, struct flist *l)
298 300 {
299 301 struct frag *f = l->head;
300 302 int last = 0;
301 303 char *p = buf;
302 304
303 305 while (f != l->tail) {
304 306 if (f->start < last || f->end > len) {
305 307 if (!PyErr_Occurred())
306 308 PyErr_SetString(mpatch_Error,
307 309 "invalid patch");
308 310 return 0;
309 311 }
310 312 memcpy(p, orig + last, f->start - last);
311 313 p += f->start - last;
312 314 memcpy(p, f->data, f->len);
313 315 last = f->end;
314 316 p += f->len;
315 317 f++;
316 318 }
317 319 memcpy(p, orig + last, len - last);
318 320 return 1;
319 321 }
320 322
321 323 /* recursively generate a patch of all bins between start and end */
322 324 static struct flist *fold(PyObject *bins, int start, int end)
323 325 {
324 326 int len;
325 327 Py_ssize_t blen;
326 328 const char *buffer;
327 329
328 330 if (start + 1 == end) {
329 331 /* trivial case, output a decoded list */
330 332 PyObject *tmp = PyList_GetItem(bins, start);
331 333 if (!tmp)
332 334 return NULL;
333 335 if (PyObject_AsCharBuffer(tmp, &buffer, &blen))
334 336 return NULL;
335 337 return decode(buffer, blen);
336 338 }
337 339
338 340 /* divide and conquer, memory management is elsewhere */
339 341 len = (end - start) / 2;
340 342 return combine(fold(bins, start, start + len),
341 343 fold(bins, start + len, end));
342 344 }
343 345
344 346 static PyObject *
345 347 patches(PyObject *self, PyObject *args)
346 348 {
347 349 PyObject *text, *bins, *result;
348 350 struct flist *patch;
349 351 const char *in;
350 352 char *out;
351 353 int len, outlen;
352 354 Py_ssize_t inlen;
353 355
354 356 if (!PyArg_ParseTuple(args, "OO:mpatch", &text, &bins))
355 357 return NULL;
356 358
357 359 len = PyList_Size(bins);
358 360 if (!len) {
359 361 /* nothing to do */
360 362 Py_INCREF(text);
361 363 return text;
362 364 }
363 365
364 366 if (PyObject_AsCharBuffer(text, &in, &inlen))
365 367 return NULL;
366 368
367 369 patch = fold(bins, 0, len);
368 370 if (!patch)
369 371 return NULL;
370 372
371 373 outlen = calcsize(inlen, patch);
372 374 if (outlen < 0) {
373 375 result = NULL;
374 376 goto cleanup;
375 377 }
376 result = PyString_FromStringAndSize(NULL, outlen);
378 result = PyBytes_FromStringAndSize(NULL, outlen);
377 379 if (!result) {
378 380 result = NULL;
379 381 goto cleanup;
380 382 }
381 out = PyString_AsString(result);
383 out = PyBytes_AsString(result);
382 384 if (!apply(out, in, inlen, patch)) {
383 385 Py_DECREF(result);
384 386 result = NULL;
385 387 }
386 388 cleanup:
387 389 lfree(patch);
388 390 return result;
389 391 }
390 392
391 393 /* calculate size of a patched file directly */
392 394 static PyObject *
393 395 patchedsize(PyObject *self, PyObject *args)
394 396 {
395 397 long orig, start, end, len, outlen = 0, last = 0;
396 398 int patchlen;
397 399 char *bin, *binend, *data;
398 400 char decode[12]; /* for dealing with alignment issues */
399 401
400 402 if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen))
401 403 return NULL;
402 404
403 405 binend = bin + patchlen;
404 406 data = bin + 12;
405 407
406 408 while (data <= binend) {
407 409 memcpy(decode, bin, 12);
408 410 start = ntohl(*(uint32_t *)decode);
409 411 end = ntohl(*(uint32_t *)(decode + 4));
410 412 len = ntohl(*(uint32_t *)(decode + 8));
411 413 if (start > end)
412 414 break; /* sanity check */
413 415 bin = data + len;
414 416 if (bin < data)
415 417 break; /* big data + big (bogus) len can wrap around */
416 418 data = bin + 12;
417 419 outlen += start - last;
418 420 last = end;
419 421 outlen += len;
420 422 }
421 423
422 424 if (bin != binend) {
423 425 if (!PyErr_Occurred())
424 426 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
425 427 return NULL;
426 428 }
427 429
428 430 outlen += orig - last;
429 431 return Py_BuildValue("l", outlen);
430 432 }
431 433
432 434 static PyMethodDef methods[] = {
433 435 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
434 436 {"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"},
435 437 {NULL, NULL}
436 438 };
437 439
440 #ifdef IS_PY3K
441 static struct PyModuleDef mpatch_module = {
442 PyModuleDef_HEAD_INIT,
443 "mpatch",
444 mpatch_doc,
445 -1,
446 methods
447 };
448
449 PyMODINIT_FUNC PyInit_mpatch(void)
450 {
451 PyObject *m;
452
453 m = PyModule_Create(&mpatch_module);
454 if (m == NULL)
455 return NULL;
456
457 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
458 Py_INCREF(mpatch_Error);
459 PyModule_AddObject(m, "mpatchError", mpatch_Error);
460
461 return m;
462 }
463 #else
438 464 PyMODINIT_FUNC
439 465 initmpatch(void)
440 466 {
441 467 Py_InitModule3("mpatch", methods, mpatch_doc);
442 468 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
443 469 }
444
470 #endif
General Comments 0
You need to be logged in to leave comments. Login now