##// END OF EJS Templates
mpatch: use Py_ssize_t for string length
Adrian Buehlmann -
r16758:9a8ab5c4 default
parent child Browse files
Show More
@@ -1,428 +1,429
1 1 /*
2 2 mpatch.c - efficient binary patching for Mercurial
3 3
4 4 This implements a patch algorithm that's O(m + nlog n) where m is the
5 5 size of the output and n is the number of patches.
6 6
7 7 Given a list of binary patches, it unpacks each into a hunk list,
8 8 then combines the hunk lists with a treewise recursion to form a
9 9 single hunk list. This hunk list is then applied to the original
10 10 text.
11 11
12 12 The text (or binary) fragments are copied directly from their source
13 13 Python objects into a preallocated output string to avoid the
14 14 allocation of intermediate Python objects. Working memory is about 2x
15 15 the total number of hunks.
16 16
17 17 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
18 18
19 19 This software may be used and distributed according to the terms
20 20 of the GNU General Public License, incorporated herein by reference.
21 21 */
22 22
23 #define PY_SSIZE_T_CLEAN
23 24 #include <Python.h>
24 25 #include <stdlib.h>
25 26 #include <string.h>
26 27
27 28 #include "util.h"
28 29
29 30 static char mpatch_doc[] = "Efficient binary patching.";
30 31 static PyObject *mpatch_Error;
31 32
32 33 struct frag {
33 34 int start, end, len;
34 35 const char *data;
35 36 };
36 37
37 38 struct flist {
38 39 struct frag *base, *head, *tail;
39 40 };
40 41
41 42 static struct flist *lalloc(Py_ssize_t size)
42 43 {
43 44 struct flist *a = NULL;
44 45
45 46 if (size < 1)
46 47 size = 1;
47 48
48 49 a = (struct flist *)malloc(sizeof(struct flist));
49 50 if (a) {
50 51 a->base = (struct frag *)malloc(sizeof(struct frag) * size);
51 52 if (a->base) {
52 53 a->head = a->tail = a->base;
53 54 return a;
54 55 }
55 56 free(a);
56 57 a = NULL;
57 58 }
58 59 if (!PyErr_Occurred())
59 60 PyErr_NoMemory();
60 61 return NULL;
61 62 }
62 63
63 64 static void lfree(struct flist *a)
64 65 {
65 66 if (a) {
66 67 free(a->base);
67 68 free(a);
68 69 }
69 70 }
70 71
71 72 static Py_ssize_t lsize(struct flist *a)
72 73 {
73 74 return a->tail - a->head;
74 75 }
75 76
76 77 /* move hunks in source that are less cut to dest, compensating
77 78 for changes in offset. the last hunk may be split if necessary.
78 79 */
79 80 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
80 81 {
81 82 struct frag *d = dest->tail, *s = src->head;
82 83 int postend, c, l;
83 84
84 85 while (s != src->tail) {
85 86 if (s->start + offset >= cut)
86 87 break; /* we've gone far enough */
87 88
88 89 postend = offset + s->start + s->len;
89 90 if (postend <= cut) {
90 91 /* save this hunk */
91 92 offset += s->start + s->len - s->end;
92 93 *d++ = *s++;
93 94 }
94 95 else {
95 96 /* break up this hunk */
96 97 c = cut - offset;
97 98 if (s->end < c)
98 99 c = s->end;
99 100 l = cut - offset - s->start;
100 101 if (s->len < l)
101 102 l = s->len;
102 103
103 104 offset += s->start + l - c;
104 105
105 106 d->start = s->start;
106 107 d->end = c;
107 108 d->len = l;
108 109 d->data = s->data;
109 110 d++;
110 111 s->start = c;
111 112 s->len = s->len - l;
112 113 s->data = s->data + l;
113 114
114 115 break;
115 116 }
116 117 }
117 118
118 119 dest->tail = d;
119 120 src->head = s;
120 121 return offset;
121 122 }
122 123
123 124 /* like gather, but with no output list */
124 125 static int discard(struct flist *src, int cut, int offset)
125 126 {
126 127 struct frag *s = src->head;
127 128 int postend, c, l;
128 129
129 130 while (s != src->tail) {
130 131 if (s->start + offset >= cut)
131 132 break;
132 133
133 134 postend = offset + s->start + s->len;
134 135 if (postend <= cut) {
135 136 offset += s->start + s->len - s->end;
136 137 s++;
137 138 }
138 139 else {
139 140 c = cut - offset;
140 141 if (s->end < c)
141 142 c = s->end;
142 143 l = cut - offset - s->start;
143 144 if (s->len < l)
144 145 l = s->len;
145 146
146 147 offset += s->start + l - c;
147 148 s->start = c;
148 149 s->len = s->len - l;
149 150 s->data = s->data + l;
150 151
151 152 break;
152 153 }
153 154 }
154 155
155 156 src->head = s;
156 157 return offset;
157 158 }
158 159
159 160 /* combine hunk lists a and b, while adjusting b for offset changes in a/
160 161 this deletes a and b and returns the resultant list. */
161 162 static struct flist *combine(struct flist *a, struct flist *b)
162 163 {
163 164 struct flist *c = NULL;
164 165 struct frag *bh, *ct;
165 166 int offset = 0, post;
166 167
167 168 if (a && b)
168 169 c = lalloc((lsize(a) + lsize(b)) * 2);
169 170
170 171 if (c) {
171 172
172 173 for (bh = b->head; bh != b->tail; bh++) {
173 174 /* save old hunks */
174 175 offset = gather(c, a, bh->start, offset);
175 176
176 177 /* discard replaced hunks */
177 178 post = discard(a, bh->end, offset);
178 179
179 180 /* insert new hunk */
180 181 ct = c->tail;
181 182 ct->start = bh->start - offset;
182 183 ct->end = bh->end - post;
183 184 ct->len = bh->len;
184 185 ct->data = bh->data;
185 186 c->tail++;
186 187 offset = post;
187 188 }
188 189
189 190 /* hold on to tail from a */
190 191 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
191 192 c->tail += lsize(a);
192 193 }
193 194
194 195 lfree(a);
195 196 lfree(b);
196 197 return c;
197 198 }
198 199
199 200 /* decode a binary patch into a hunk list */
200 201 static struct flist *decode(const char *bin, Py_ssize_t len)
201 202 {
202 203 struct flist *l;
203 204 struct frag *lt;
204 205 const char *data = bin + 12, *end = bin + len;
205 206
206 207 /* assume worst case size, we won't have many of these lists */
207 208 l = lalloc(len / 12);
208 209 if (!l)
209 210 return NULL;
210 211
211 212 lt = l->tail;
212 213
213 214 while (data <= end) {
214 215 lt->start = getbe32(bin);
215 216 lt->end = getbe32(bin + 4);
216 217 lt->len = getbe32(bin + 8);
217 218 if (lt->start > lt->end)
218 219 break; /* sanity check */
219 220 bin = data + lt->len;
220 221 if (bin < data)
221 222 break; /* big data + big (bogus) len can wrap around */
222 223 lt->data = data;
223 224 data = bin + 12;
224 225 lt++;
225 226 }
226 227
227 228 if (bin != end) {
228 229 if (!PyErr_Occurred())
229 230 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
230 231 lfree(l);
231 232 return NULL;
232 233 }
233 234
234 235 l->tail = lt;
235 236 return l;
236 237 }
237 238
238 239 /* calculate the size of resultant text */
239 240 static Py_ssize_t calcsize(Py_ssize_t len, struct flist *l)
240 241 {
241 242 Py_ssize_t outlen = 0, last = 0;
242 243 struct frag *f = l->head;
243 244
244 245 while (f != l->tail) {
245 246 if (f->start < last || f->end > len) {
246 247 if (!PyErr_Occurred())
247 248 PyErr_SetString(mpatch_Error,
248 249 "invalid patch");
249 250 return -1;
250 251 }
251 252 outlen += f->start - last;
252 253 last = f->end;
253 254 outlen += f->len;
254 255 f++;
255 256 }
256 257
257 258 outlen += len - last;
258 259 return outlen;
259 260 }
260 261
261 262 static int apply(char *buf, const char *orig, Py_ssize_t len, struct flist *l)
262 263 {
263 264 struct frag *f = l->head;
264 265 int last = 0;
265 266 char *p = buf;
266 267
267 268 while (f != l->tail) {
268 269 if (f->start < last || f->end > len) {
269 270 if (!PyErr_Occurred())
270 271 PyErr_SetString(mpatch_Error,
271 272 "invalid patch");
272 273 return 0;
273 274 }
274 275 memcpy(p, orig + last, f->start - last);
275 276 p += f->start - last;
276 277 memcpy(p, f->data, f->len);
277 278 last = f->end;
278 279 p += f->len;
279 280 f++;
280 281 }
281 282 memcpy(p, orig + last, len - last);
282 283 return 1;
283 284 }
284 285
285 286 /* recursively generate a patch of all bins between start and end */
286 287 static struct flist *fold(PyObject *bins, Py_ssize_t start, Py_ssize_t end)
287 288 {
288 289 Py_ssize_t len, blen;
289 290 const char *buffer;
290 291
291 292 if (start + 1 == end) {
292 293 /* trivial case, output a decoded list */
293 294 PyObject *tmp = PyList_GetItem(bins, start);
294 295 if (!tmp)
295 296 return NULL;
296 297 if (PyObject_AsCharBuffer(tmp, &buffer, &blen))
297 298 return NULL;
298 299 return decode(buffer, blen);
299 300 }
300 301
301 302 /* divide and conquer, memory management is elsewhere */
302 303 len = (end - start) / 2;
303 304 return combine(fold(bins, start, start + len),
304 305 fold(bins, start + len, end));
305 306 }
306 307
307 308 static PyObject *
308 309 patches(PyObject *self, PyObject *args)
309 310 {
310 311 PyObject *text, *bins, *result;
311 312 struct flist *patch;
312 313 const char *in;
313 314 char *out;
314 315 Py_ssize_t len, outlen, inlen;
315 316
316 317 if (!PyArg_ParseTuple(args, "OO:mpatch", &text, &bins))
317 318 return NULL;
318 319
319 320 len = PyList_Size(bins);
320 321 if (!len) {
321 322 /* nothing to do */
322 323 Py_INCREF(text);
323 324 return text;
324 325 }
325 326
326 327 if (PyObject_AsCharBuffer(text, &in, &inlen))
327 328 return NULL;
328 329
329 330 patch = fold(bins, 0, len);
330 331 if (!patch)
331 332 return NULL;
332 333
333 334 outlen = calcsize(inlen, patch);
334 335 if (outlen < 0) {
335 336 result = NULL;
336 337 goto cleanup;
337 338 }
338 339 result = PyBytes_FromStringAndSize(NULL, outlen);
339 340 if (!result) {
340 341 result = NULL;
341 342 goto cleanup;
342 343 }
343 344 out = PyBytes_AsString(result);
344 345 if (!apply(out, in, inlen, patch)) {
345 346 Py_DECREF(result);
346 347 result = NULL;
347 348 }
348 349 cleanup:
349 350 lfree(patch);
350 351 return result;
351 352 }
352 353
353 354 /* calculate size of a patched file directly */
354 355 static PyObject *
355 356 patchedsize(PyObject *self, PyObject *args)
356 357 {
357 358 long orig, start, end, len, outlen = 0, last = 0;
358 int patchlen;
359 Py_ssize_t patchlen;
359 360 char *bin, *binend, *data;
360 361
361 362 if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen))
362 363 return NULL;
363 364
364 365 binend = bin + patchlen;
365 366 data = bin + 12;
366 367
367 368 while (data <= binend) {
368 369 start = getbe32(bin);
369 370 end = getbe32(bin + 4);
370 371 len = getbe32(bin + 8);
371 372 if (start > end)
372 373 break; /* sanity check */
373 374 bin = data + len;
374 375 if (bin < data)
375 376 break; /* big data + big (bogus) len can wrap around */
376 377 data = bin + 12;
377 378 outlen += start - last;
378 379 last = end;
379 380 outlen += len;
380 381 }
381 382
382 383 if (bin != binend) {
383 384 if (!PyErr_Occurred())
384 385 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
385 386 return NULL;
386 387 }
387 388
388 389 outlen += orig - last;
389 390 return Py_BuildValue("l", outlen);
390 391 }
391 392
392 393 static PyMethodDef methods[] = {
393 394 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
394 395 {"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"},
395 396 {NULL, NULL}
396 397 };
397 398
398 399 #ifdef IS_PY3K
399 400 static struct PyModuleDef mpatch_module = {
400 401 PyModuleDef_HEAD_INIT,
401 402 "mpatch",
402 403 mpatch_doc,
403 404 -1,
404 405 methods
405 406 };
406 407
407 408 PyMODINIT_FUNC PyInit_mpatch(void)
408 409 {
409 410 PyObject *m;
410 411
411 412 m = PyModule_Create(&mpatch_module);
412 413 if (m == NULL)
413 414 return NULL;
414 415
415 416 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
416 417 Py_INCREF(mpatch_Error);
417 418 PyModule_AddObject(m, "mpatchError", mpatch_Error);
418 419
419 420 return m;
420 421 }
421 422 #else
422 423 PyMODINIT_FUNC
423 424 initmpatch(void)
424 425 {
425 426 Py_InitModule3("mpatch", methods, mpatch_doc);
426 427 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
427 428 }
428 429 #endif
General Comments 0
You need to be logged in to leave comments. Login now