##// END OF EJS Templates
catch errors and throw exception with invalid binary patch data
Benoit Boissinot -
r1722:681c5c21 default
parent child Browse files
Show More
@@ -1,331 +1,368 b''
1 1 /*
2 2 mpatch.c - efficient binary patching for Mercurial
3 3
4 4 This implements a patch algorithm that's O(m + nlog n) where m is the
5 5 size of the output and n is the number of patches.
6 6
7 7 Given a list of binary patches, it unpacks each into a hunk list,
8 8 then combines the hunk lists with a treewise recursion to form a
9 9 single hunk list. This hunk list is then applied to the original
10 10 text.
11 11
12 12 The text (or binary) fragments are copied directly from their source
13 13 Python objects into a preallocated output string to avoid the
14 14 allocation of intermediate Python objects. Working memory is about 2x
15 15 the total number of hunks.
16 16
17 17 Copyright 2005 Matt Mackall <mpm@selenic.com>
18 18
19 19 This software may be used and distributed according to the terms
20 20 of the GNU General Public License, incorporated herein by reference.
21 21 */
22 22
23 23 #include <Python.h>
24 24 #include <stdlib.h>
25 25 #include <string.h>
26 26 #ifdef _WIN32
27 27 #ifdef _MSC_VER
28 28 #define inline __inline
29 29 typedef unsigned long uint32_t;
30 30 #else
31 31 #include <stdint.h>
32 32 #endif
33 33 static uint32_t ntohl(uint32_t x)
34 34 {
35 35 return ((x & 0x000000ffUL) << 24) |
36 36 ((x & 0x0000ff00UL) << 8) |
37 37 ((x & 0x00ff0000UL) >> 8) |
38 38 ((x & 0xff000000UL) >> 24);
39 39 }
40 40 #else
41 41 #include <sys/types.h>
42 42 #include <arpa/inet.h>
43 43 #endif
44 44
45 45 static char mpatch_doc[] = "Efficient binary patching.";
46 static PyObject *mpatch_Error;
46 47
47 48 struct frag {
48 49 int start, end, len;
49 50 char *data;
50 51 };
51 52
52 53 struct flist {
53 54 struct frag *base, *head, *tail;
54 55 };
55 56
56 57 static struct flist *lalloc(int size)
57 58 {
58 59 struct flist *a = NULL;
59 60
60 61 a = malloc(sizeof(struct flist));
61 62 if (a) {
62 63 a->base = malloc(sizeof(struct frag) * size);
63 64 if (!a->base) {
64 65 free(a);
65 66 a = NULL;
66 67 } else
67 68 a->head = a->tail = a->base;
69 return a;
68 70 }
69 return a;
71 if (!PyErr_Occurred())
72 PyErr_NoMemory();
73 return NULL;
70 74 }
71 75
72 76 static void lfree(struct flist *a)
73 77 {
74 78 if (a) {
75 79 free(a->base);
76 80 free(a);
77 81 }
78 82 }
79 83
80 84 static int lsize(struct flist *a)
81 85 {
82 86 return a->tail - a->head;
83 87 }
84 88
85 89 /* move hunks in source that are less cut to dest, compensating
86 90 for changes in offset. the last hunk may be split if necessary.
87 91 */
88 92 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
89 93 {
90 94 struct frag *d = dest->tail, *s = src->head;
91 95 int postend, c, l;
92 96
93 97 while (s != src->tail) {
94 98 if (s->start + offset >= cut)
95 99 break; /* we've gone far enough */
96 100
97 101 postend = offset + s->start + s->len;
98 102 if (postend <= cut) {
99 103 /* save this hunk */
100 104 offset += s->start + s->len - s->end;
101 105 *d++ = *s++;
102 106 }
103 107 else {
104 108 /* break up this hunk */
105 109 c = cut - offset;
106 110 if (s->end < c)
107 111 c = s->end;
108 112 l = cut - offset - s->start;
109 113 if (s->len < l)
110 114 l = s->len;
111 115
112 116 offset += s->start + l - c;
113 117
114 118 d->start = s->start;
115 119 d->end = c;
116 120 d->len = l;
117 121 d->data = s->data;
118 122 d++;
119 123 s->start = c;
120 124 s->len = s->len - l;
121 125 s->data = s->data + l;
122 126
123 127 break;
124 128 }
125 129 }
126 130
127 131 dest->tail = d;
128 132 src->head = s;
129 133 return offset;
130 134 }
131 135
132 136 /* like gather, but with no output list */
133 137 static int discard(struct flist *src, int cut, int offset)
134 138 {
135 139 struct frag *s = src->head;
136 140 int postend, c, l;
137 141
138 142 while (s != src->tail) {
139 143 if (s->start + offset >= cut)
140 144 break;
141 145
142 146 postend = offset + s->start + s->len;
143 147 if (postend <= cut) {
144 148 offset += s->start + s->len - s->end;
145 149 s++;
146 150 }
147 151 else {
148 152 c = cut - offset;
149 153 if (s->end < c)
150 154 c = s->end;
151 155 l = cut - offset - s->start;
152 156 if (s->len < l)
153 157 l = s->len;
154 158
155 159 offset += s->start + l - c;
156 160 s->start = c;
157 161 s->len = s->len - l;
158 162 s->data = s->data + l;
159 163
160 164 break;
161 165 }
162 166 }
163 167
164 168 src->head = s;
165 169 return offset;
166 170 }
167 171
168 172 /* combine hunk lists a and b, while adjusting b for offset changes in a/
169 173 this deletes a and b and returns the resultant list. */
170 174 static struct flist *combine(struct flist *a, struct flist *b)
171 175 {
172 176 struct flist *c = NULL;
173 177 struct frag *bh, *ct;
174 178 int offset = 0, post;
175 179
176 180 if (a && b)
177 181 c = lalloc((lsize(a) + lsize(b)) * 2);
178 182
179 183 if (c) {
180 184
181 185 for (bh = b->head; bh != b->tail; bh++) {
182 186 /* save old hunks */
183 187 offset = gather(c, a, bh->start, offset);
184 188
185 189 /* discard replaced hunks */
186 190 post = discard(a, bh->end, offset);
187 191
188 192 /* insert new hunk */
189 193 ct = c->tail;
190 194 ct->start = bh->start - offset;
191 195 ct->end = bh->end - post;
192 196 ct->len = bh->len;
193 197 ct->data = bh->data;
194 198 c->tail++;
195 199 offset = post;
196 200 }
197 201
198 202 /* hold on to tail from a */
199 203 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
200 204 c->tail += lsize(a);
201 205 }
202 206
203 207 lfree(a);
204 208 lfree(b);
205 209 return c;
206 210 }
207 211
208 212 /* decode a binary patch into a hunk list */
209 213 static struct flist *decode(char *bin, int len)
210 214 {
211 215 struct flist *l;
212 216 struct frag *lt;
213 217 char *end = bin + len;
214 218 char decode[12]; /* for dealing with alignment issues */
215 219
216 220 /* assume worst case size, we won't have many of these lists */
217 221 l = lalloc(len / 12);
222 if (!l)
223 return NULL;
224
218 225 lt = l->tail;
219 226
220 227 while (bin < end) {
221 228 memcpy(decode, bin, 12);
222 229 lt->start = ntohl(*(uint32_t *)decode);
223 230 lt->end = ntohl(*(uint32_t *)(decode + 4));
224 231 lt->len = ntohl(*(uint32_t *)(decode + 8));
225 232 lt->data = bin + 12;
226 233 bin += 12 + lt->len;
227 234 lt++;
228 235 }
229 236
237 if (bin != end) {
238 if (!PyErr_Occurred())
239 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
240 lfree(l);
241 return NULL;
242 }
243
230 244 l->tail = lt;
231 245 return l;
232 246 }
233 247
234 248 /* calculate the size of resultant text */
235 249 static int calcsize(int len, struct flist *l)
236 250 {
237 251 int outlen = 0, last = 0;
238 252 struct frag *f = l->head;
239 253
240 254 while (f != l->tail) {
255 if (f->start < last || f->end > len) {
256 if (!PyErr_Occurred())
257 PyErr_SetString(mpatch_Error,
258 "invalid patch");
259 return -1;
260 }
241 261 outlen += f->start - last;
242 262 last = f->end;
243 263 outlen += f->len;
244 264 f++;
245 265 }
246 266
247 267 outlen += len - last;
248 268 return outlen;
249 269 }
250 270
251 static void apply(char *buf, char *orig, int len, struct flist *l)
271 static int apply(char *buf, char *orig, int len, struct flist *l)
252 272 {
253 273 struct frag *f = l->head;
254 274 int last = 0;
255 275 char *p = buf;
256 276
257 277 while (f != l->tail) {
278 if (f->start < last || f->end > len) {
279 if (!PyErr_Occurred())
280 PyErr_SetString(mpatch_Error,
281 "invalid patch");
282 return 0;
283 }
258 284 memcpy(p, orig + last, f->start - last);
259 285 p += f->start - last;
260 286 memcpy(p, f->data, f->len);
261 287 last = f->end;
262 288 p += f->len;
263 289 f++;
264 290 }
265 291 memcpy(p, orig + last, len - last);
292 return 1;
266 293 }
267 294
268 295 /* recursively generate a patch of all bins between start and end */
269 296 static struct flist *fold(PyObject *bins, int start, int end)
270 297 {
271 298 int len;
272 299
273 300 if (start + 1 == end) {
274 301 /* trivial case, output a decoded list */
275 302 PyObject *tmp = PyList_GetItem(bins, start);
276 303 if (!tmp)
277 304 return NULL;
278 305 return decode(PyString_AsString(tmp), PyString_Size(tmp));
279 306 }
280 307
281 308 /* divide and conquer, memory management is elsewhere */
282 309 len = (end - start) / 2;
283 310 return combine(fold(bins, start, start + len),
284 311 fold(bins, start + len, end));
285 312 }
286 313
287 314 static PyObject *
288 315 patches(PyObject *self, PyObject *args)
289 316 {
290 317 PyObject *text, *bins, *result;
291 318 struct flist *patch;
292 319 char *in, *out;
293 320 int len, outlen;
294 321
295 322 if (!PyArg_ParseTuple(args, "SO:mpatch", &text, &bins))
296 323 return NULL;
297 324
298 325 len = PyList_Size(bins);
299 326 if (!len) {
300 327 /* nothing to do */
301 328 Py_INCREF(text);
302 329 return text;
303 330 }
304 331
305 332 patch = fold(bins, 0, len);
306 333 if (!patch)
307 return PyErr_NoMemory();
334 return NULL;
308 335
309 336 outlen = calcsize(PyString_Size(text), patch);
337 if (outlen < 0) {
338 result = NULL;
339 goto cleanup;
340 }
310 341 result = PyString_FromStringAndSize(NULL, outlen);
311 if (result) {
312 in = PyString_AsString(text);
313 out = PyString_AsString(result);
314 apply(out, in, PyString_Size(text), patch);
342 if (!result) {
343 result = NULL;
344 goto cleanup;
315 345 }
316
346 in = PyString_AsString(text);
347 out = PyString_AsString(result);
348 if (!apply(out, in, PyString_Size(text), patch)) {
349 Py_DECREF(result);
350 result = NULL;
351 }
352 cleanup:
317 353 lfree(patch);
318 354 return result;
319 355 }
320 356
321 357 static PyMethodDef methods[] = {
322 358 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
323 359 {NULL, NULL}
324 360 };
325 361
326 362 PyMODINIT_FUNC
327 363 initmpatch(void)
328 364 {
329 365 Py_InitModule3("mpatch", methods, mpatch_doc);
366 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
330 367 }
331 368
General Comments 0
You need to be logged in to leave comments. Login now