##// END OF EJS Templates
merge with stable
Thomas Arendsen Hein -
r4377:4759da3e merge default
parent child Browse files
Show More
@@ -1,415 +1,426 b''
1 1 /*
2 2 mpatch.c - efficient binary patching for Mercurial
3 3
4 4 This implements a patch algorithm that's O(m + nlog n) where m is the
5 5 size of the output and n is the number of patches.
6 6
7 7 Given a list of binary patches, it unpacks each into a hunk list,
8 8 then combines the hunk lists with a treewise recursion to form a
9 9 single hunk list. This hunk list is then applied to the original
10 10 text.
11 11
12 12 The text (or binary) fragments are copied directly from their source
13 13 Python objects into a preallocated output string to avoid the
14 14 allocation of intermediate Python objects. Working memory is about 2x
15 15 the total number of hunks.
16 16
17 17 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
18 18
19 19 This software may be used and distributed according to the terms
20 20 of the GNU General Public License, incorporated herein by reference.
21 21 */
22 22
23 23 #include <Python.h>
24 24 #include <stdlib.h>
25 25 #include <string.h>
26 26
27 27 #ifdef _WIN32
28 28 # ifdef _MSC_VER
29 29 /* msvc 6.0 has problems */
30 30 # define inline __inline
31 31 typedef unsigned long uint32_t;
32 32 # else
33 33 # include <stdint.h>
34 34 # endif
35 35 static uint32_t ntohl(uint32_t x)
36 36 {
37 37 return ((x & 0x000000ffUL) << 24) |
38 38 ((x & 0x0000ff00UL) << 8) |
39 39 ((x & 0x00ff0000UL) >> 8) |
40 40 ((x & 0xff000000UL) >> 24);
41 41 }
42 42 #else
43 43 /* not windows */
44 44 # include <sys/types.h>
45 45 # ifdef __BEOS__
46 46 # include <ByteOrder.h>
47 47 # else
48 48 # include <arpa/inet.h>
49 49 # endif
50 50 # include <inttypes.h>
51 51 #endif
52 52
53 53 static char mpatch_doc[] = "Efficient binary patching.";
54 54 static PyObject *mpatch_Error;
55 55
56 56 struct frag {
57 57 int start, end, len;
58 58 char *data;
59 59 };
60 60
61 61 struct flist {
62 62 struct frag *base, *head, *tail;
63 63 };
64 64
65 65 static struct flist *lalloc(int size)
66 66 {
67 67 struct flist *a = NULL;
68 68
69 69 if (size < 1)
70 70 size = 1;
71 71
72 72 a = (struct flist *)malloc(sizeof(struct flist));
73 73 if (a) {
74 74 a->base = (struct frag *)malloc(sizeof(struct frag) * size);
75 75 if (a->base) {
76 76 a->head = a->tail = a->base;
77 77 return a;
78 78 }
79 79 free(a);
80 80 a = NULL;
81 81 }
82 82 if (!PyErr_Occurred())
83 83 PyErr_NoMemory();
84 84 return NULL;
85 85 }
86 86
87 87 static void lfree(struct flist *a)
88 88 {
89 89 if (a) {
90 90 free(a->base);
91 91 free(a);
92 92 }
93 93 }
94 94
95 95 static int lsize(struct flist *a)
96 96 {
97 97 return a->tail - a->head;
98 98 }
99 99
100 100 /* move hunks in source that are less cut to dest, compensating
101 101 for changes in offset. the last hunk may be split if necessary.
102 102 */
103 103 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
104 104 {
105 105 struct frag *d = dest->tail, *s = src->head;
106 106 int postend, c, l;
107 107
108 108 while (s != src->tail) {
109 109 if (s->start + offset >= cut)
110 110 break; /* we've gone far enough */
111 111
112 112 postend = offset + s->start + s->len;
113 113 if (postend <= cut) {
114 114 /* save this hunk */
115 115 offset += s->start + s->len - s->end;
116 116 *d++ = *s++;
117 117 }
118 118 else {
119 119 /* break up this hunk */
120 120 c = cut - offset;
121 121 if (s->end < c)
122 122 c = s->end;
123 123 l = cut - offset - s->start;
124 124 if (s->len < l)
125 125 l = s->len;
126 126
127 127 offset += s->start + l - c;
128 128
129 129 d->start = s->start;
130 130 d->end = c;
131 131 d->len = l;
132 132 d->data = s->data;
133 133 d++;
134 134 s->start = c;
135 135 s->len = s->len - l;
136 136 s->data = s->data + l;
137 137
138 138 break;
139 139 }
140 140 }
141 141
142 142 dest->tail = d;
143 143 src->head = s;
144 144 return offset;
145 145 }
146 146
147 147 /* like gather, but with no output list */
148 148 static int discard(struct flist *src, int cut, int offset)
149 149 {
150 150 struct frag *s = src->head;
151 151 int postend, c, l;
152 152
153 153 while (s != src->tail) {
154 154 if (s->start + offset >= cut)
155 155 break;
156 156
157 157 postend = offset + s->start + s->len;
158 158 if (postend <= cut) {
159 159 offset += s->start + s->len - s->end;
160 160 s++;
161 161 }
162 162 else {
163 163 c = cut - offset;
164 164 if (s->end < c)
165 165 c = s->end;
166 166 l = cut - offset - s->start;
167 167 if (s->len < l)
168 168 l = s->len;
169 169
170 170 offset += s->start + l - c;
171 171 s->start = c;
172 172 s->len = s->len - l;
173 173 s->data = s->data + l;
174 174
175 175 break;
176 176 }
177 177 }
178 178
179 179 src->head = s;
180 180 return offset;
181 181 }
182 182
183 183 /* combine hunk lists a and b, while adjusting b for offset changes in a/
184 184 this deletes a and b and returns the resultant list. */
185 185 static struct flist *combine(struct flist *a, struct flist *b)
186 186 {
187 187 struct flist *c = NULL;
188 188 struct frag *bh, *ct;
189 189 int offset = 0, post;
190 190
191 191 if (a && b)
192 192 c = lalloc((lsize(a) + lsize(b)) * 2);
193 193
194 194 if (c) {
195 195
196 196 for (bh = b->head; bh != b->tail; bh++) {
197 197 /* save old hunks */
198 198 offset = gather(c, a, bh->start, offset);
199 199
200 200 /* discard replaced hunks */
201 201 post = discard(a, bh->end, offset);
202 202
203 203 /* insert new hunk */
204 204 ct = c->tail;
205 205 ct->start = bh->start - offset;
206 206 ct->end = bh->end - post;
207 207 ct->len = bh->len;
208 208 ct->data = bh->data;
209 209 c->tail++;
210 210 offset = post;
211 211 }
212 212
213 213 /* hold on to tail from a */
214 214 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
215 215 c->tail += lsize(a);
216 216 }
217 217
218 218 lfree(a);
219 219 lfree(b);
220 220 return c;
221 221 }
222 222
223 223 /* decode a binary patch into a hunk list */
224 224 static struct flist *decode(char *bin, int len)
225 225 {
226 226 struct flist *l;
227 227 struct frag *lt;
228 char *end = bin + len;
228 char *data = bin + 12, *end = bin + len;
229 229 char decode[12]; /* for dealing with alignment issues */
230 230
231 231 /* assume worst case size, we won't have many of these lists */
232 232 l = lalloc(len / 12);
233 233 if (!l)
234 234 return NULL;
235 235
236 236 lt = l->tail;
237 237
238 while (bin < end) {
238 while (data <= end) {
239 239 memcpy(decode, bin, 12);
240 240 lt->start = ntohl(*(uint32_t *)decode);
241 241 lt->end = ntohl(*(uint32_t *)(decode + 4));
242 242 lt->len = ntohl(*(uint32_t *)(decode + 8));
243 lt->data = bin + 12;
244 bin += 12 + lt->len;
243 if (lt->start > lt->end)
244 break; /* sanity check */
245 bin = data + lt->len;
246 if (bin < data)
247 break; /* big data + big (bogus) len can wrap around */
248 lt->data = data;
249 data = bin + 12;
245 250 lt++;
246 251 }
247 252
248 253 if (bin != end) {
249 254 if (!PyErr_Occurred())
250 255 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
251 256 lfree(l);
252 257 return NULL;
253 258 }
254 259
255 260 l->tail = lt;
256 261 return l;
257 262 }
258 263
259 264 /* calculate the size of resultant text */
260 265 static int calcsize(int len, struct flist *l)
261 266 {
262 267 int outlen = 0, last = 0;
263 268 struct frag *f = l->head;
264 269
265 270 while (f != l->tail) {
266 271 if (f->start < last || f->end > len) {
267 272 if (!PyErr_Occurred())
268 273 PyErr_SetString(mpatch_Error,
269 274 "invalid patch");
270 275 return -1;
271 276 }
272 277 outlen += f->start - last;
273 278 last = f->end;
274 279 outlen += f->len;
275 280 f++;
276 281 }
277 282
278 283 outlen += len - last;
279 284 return outlen;
280 285 }
281 286
282 287 static int apply(char *buf, char *orig, int len, struct flist *l)
283 288 {
284 289 struct frag *f = l->head;
285 290 int last = 0;
286 291 char *p = buf;
287 292
288 293 while (f != l->tail) {
289 294 if (f->start < last || f->end > len) {
290 295 if (!PyErr_Occurred())
291 296 PyErr_SetString(mpatch_Error,
292 297 "invalid patch");
293 298 return 0;
294 299 }
295 300 memcpy(p, orig + last, f->start - last);
296 301 p += f->start - last;
297 302 memcpy(p, f->data, f->len);
298 303 last = f->end;
299 304 p += f->len;
300 305 f++;
301 306 }
302 307 memcpy(p, orig + last, len - last);
303 308 return 1;
304 309 }
305 310
306 311 /* recursively generate a patch of all bins between start and end */
307 312 static struct flist *fold(PyObject *bins, int start, int end)
308 313 {
309 314 int len;
310 315
311 316 if (start + 1 == end) {
312 317 /* trivial case, output a decoded list */
313 318 PyObject *tmp = PyList_GetItem(bins, start);
314 319 if (!tmp)
315 320 return NULL;
316 321 return decode(PyString_AsString(tmp), PyString_Size(tmp));
317 322 }
318 323
319 324 /* divide and conquer, memory management is elsewhere */
320 325 len = (end - start) / 2;
321 326 return combine(fold(bins, start, start + len),
322 327 fold(bins, start + len, end));
323 328 }
324 329
325 330 static PyObject *
326 331 patches(PyObject *self, PyObject *args)
327 332 {
328 333 PyObject *text, *bins, *result;
329 334 struct flist *patch;
330 335 char *in, *out;
331 336 int len, outlen;
332 337
333 338 if (!PyArg_ParseTuple(args, "SO:mpatch", &text, &bins))
334 339 return NULL;
335 340
336 341 len = PyList_Size(bins);
337 342 if (!len) {
338 343 /* nothing to do */
339 344 Py_INCREF(text);
340 345 return text;
341 346 }
342 347
343 348 patch = fold(bins, 0, len);
344 349 if (!patch)
345 350 return NULL;
346 351
347 352 outlen = calcsize(PyString_Size(text), patch);
348 353 if (outlen < 0) {
349 354 result = NULL;
350 355 goto cleanup;
351 356 }
352 357 result = PyString_FromStringAndSize(NULL, outlen);
353 358 if (!result) {
354 359 result = NULL;
355 360 goto cleanup;
356 361 }
357 362 in = PyString_AsString(text);
358 363 out = PyString_AsString(result);
359 364 if (!apply(out, in, PyString_Size(text), patch)) {
360 365 Py_DECREF(result);
361 366 result = NULL;
362 367 }
363 368 cleanup:
364 369 lfree(patch);
365 370 return result;
366 371 }
367 372
368 373 /* calculate size of a patched file directly */
369 374 static PyObject *
370 375 patchedsize(PyObject *self, PyObject *args)
371 376 {
372 377 long orig, start, end, len, outlen = 0, last = 0;
373 378 int patchlen;
374 char *bin, *binend;
379 char *bin, *binend, *data;
375 380 char decode[12]; /* for dealing with alignment issues */
376 381
377 382 if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen))
378 383 return NULL;
379 384
380 385 binend = bin + patchlen;
386 data = bin + 12;
381 387
382 while (bin < binend) {
388 while (data <= binend) {
383 389 memcpy(decode, bin, 12);
384 390 start = ntohl(*(uint32_t *)decode);
385 391 end = ntohl(*(uint32_t *)(decode + 4));
386 392 len = ntohl(*(uint32_t *)(decode + 8));
387 bin += 12 + len;
393 if (start > end)
394 break; /* sanity check */
395 bin = data + len;
396 if (bin < data)
397 break; /* big data + big (bogus) len can wrap around */
398 data = bin + 12;
388 399 outlen += start - last;
389 400 last = end;
390 401 outlen += len;
391 402 }
392 403
393 404 if (bin != binend) {
394 405 if (!PyErr_Occurred())
395 406 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
396 407 return NULL;
397 408 }
398 409
399 410 outlen += orig - last;
400 411 return Py_BuildValue("l", outlen);
401 412 }
402 413
403 414 static PyMethodDef methods[] = {
404 415 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
405 416 {"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"},
406 417 {NULL, NULL}
407 418 };
408 419
409 420 PyMODINIT_FUNC
410 421 initmpatch(void)
411 422 {
412 423 Py_InitModule3("mpatch", methods, mpatch_doc);
413 424 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
414 425 }
415 426
General Comments 0
You need to be logged in to leave comments. Login now