##// END OF EJS Templates
catch errors and throw exception with invalid binary patch data
Benoit Boissinot -
r1722:681c5c21 default
parent child Browse files
Show More
@@ -1,331 +1,368 b''
1 /*
1 /*
2 mpatch.c - efficient binary patching for Mercurial
2 mpatch.c - efficient binary patching for Mercurial
3
3
4 This implements a patch algorithm that's O(m + nlog n) where m is the
4 This implements a patch algorithm that's O(m + nlog n) where m is the
5 size of the output and n is the number of patches.
5 size of the output and n is the number of patches.
6
6
7 Given a list of binary patches, it unpacks each into a hunk list,
7 Given a list of binary patches, it unpacks each into a hunk list,
8 then combines the hunk lists with a treewise recursion to form a
8 then combines the hunk lists with a treewise recursion to form a
9 single hunk list. This hunk list is then applied to the original
9 single hunk list. This hunk list is then applied to the original
10 text.
10 text.
11
11
12 The text (or binary) fragments are copied directly from their source
12 The text (or binary) fragments are copied directly from their source
13 Python objects into a preallocated output string to avoid the
13 Python objects into a preallocated output string to avoid the
14 allocation of intermediate Python objects. Working memory is about 2x
14 allocation of intermediate Python objects. Working memory is about 2x
15 the total number of hunks.
15 the total number of hunks.
16
16
17 Copyright 2005 Matt Mackall <mpm@selenic.com>
17 Copyright 2005 Matt Mackall <mpm@selenic.com>
18
18
19 This software may be used and distributed according to the terms
19 This software may be used and distributed according to the terms
20 of the GNU General Public License, incorporated herein by reference.
20 of the GNU General Public License, incorporated herein by reference.
21 */
21 */
22
22
23 #include <Python.h>
23 #include <Python.h>
24 #include <stdlib.h>
24 #include <stdlib.h>
25 #include <string.h>
25 #include <string.h>
26 #ifdef _WIN32
26 #ifdef _WIN32
27 #ifdef _MSC_VER
27 #ifdef _MSC_VER
28 #define inline __inline
28 #define inline __inline
29 typedef unsigned long uint32_t;
29 typedef unsigned long uint32_t;
30 #else
30 #else
31 #include <stdint.h>
31 #include <stdint.h>
32 #endif
32 #endif
33 static uint32_t ntohl(uint32_t x)
33 static uint32_t ntohl(uint32_t x)
34 {
34 {
35 return ((x & 0x000000ffUL) << 24) |
35 return ((x & 0x000000ffUL) << 24) |
36 ((x & 0x0000ff00UL) << 8) |
36 ((x & 0x0000ff00UL) << 8) |
37 ((x & 0x00ff0000UL) >> 8) |
37 ((x & 0x00ff0000UL) >> 8) |
38 ((x & 0xff000000UL) >> 24);
38 ((x & 0xff000000UL) >> 24);
39 }
39 }
40 #else
40 #else
41 #include <sys/types.h>
41 #include <sys/types.h>
42 #include <arpa/inet.h>
42 #include <arpa/inet.h>
43 #endif
43 #endif
44
44
45 static char mpatch_doc[] = "Efficient binary patching.";
45 static char mpatch_doc[] = "Efficient binary patching.";
46 static PyObject *mpatch_Error;
46
47
47 struct frag {
48 struct frag {
48 int start, end, len;
49 int start, end, len;
49 char *data;
50 char *data;
50 };
51 };
51
52
52 struct flist {
53 struct flist {
53 struct frag *base, *head, *tail;
54 struct frag *base, *head, *tail;
54 };
55 };
55
56
56 static struct flist *lalloc(int size)
57 static struct flist *lalloc(int size)
57 {
58 {
58 struct flist *a = NULL;
59 struct flist *a = NULL;
59
60
60 a = malloc(sizeof(struct flist));
61 a = malloc(sizeof(struct flist));
61 if (a) {
62 if (a) {
62 a->base = malloc(sizeof(struct frag) * size);
63 a->base = malloc(sizeof(struct frag) * size);
63 if (!a->base) {
64 if (!a->base) {
64 free(a);
65 free(a);
65 a = NULL;
66 a = NULL;
66 } else
67 } else
67 a->head = a->tail = a->base;
68 a->head = a->tail = a->base;
69 return a;
68 }
70 }
69 return a;
71 if (!PyErr_Occurred())
72 PyErr_NoMemory();
73 return NULL;
70 }
74 }
71
75
72 static void lfree(struct flist *a)
76 static void lfree(struct flist *a)
73 {
77 {
74 if (a) {
78 if (a) {
75 free(a->base);
79 free(a->base);
76 free(a);
80 free(a);
77 }
81 }
78 }
82 }
79
83
80 static int lsize(struct flist *a)
84 static int lsize(struct flist *a)
81 {
85 {
82 return a->tail - a->head;
86 return a->tail - a->head;
83 }
87 }
84
88
85 /* move hunks in source that are less cut to dest, compensating
89 /* move hunks in source that are less cut to dest, compensating
86 for changes in offset. the last hunk may be split if necessary.
90 for changes in offset. the last hunk may be split if necessary.
87 */
91 */
88 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
92 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
89 {
93 {
90 struct frag *d = dest->tail, *s = src->head;
94 struct frag *d = dest->tail, *s = src->head;
91 int postend, c, l;
95 int postend, c, l;
92
96
93 while (s != src->tail) {
97 while (s != src->tail) {
94 if (s->start + offset >= cut)
98 if (s->start + offset >= cut)
95 break; /* we've gone far enough */
99 break; /* we've gone far enough */
96
100
97 postend = offset + s->start + s->len;
101 postend = offset + s->start + s->len;
98 if (postend <= cut) {
102 if (postend <= cut) {
99 /* save this hunk */
103 /* save this hunk */
100 offset += s->start + s->len - s->end;
104 offset += s->start + s->len - s->end;
101 *d++ = *s++;
105 *d++ = *s++;
102 }
106 }
103 else {
107 else {
104 /* break up this hunk */
108 /* break up this hunk */
105 c = cut - offset;
109 c = cut - offset;
106 if (s->end < c)
110 if (s->end < c)
107 c = s->end;
111 c = s->end;
108 l = cut - offset - s->start;
112 l = cut - offset - s->start;
109 if (s->len < l)
113 if (s->len < l)
110 l = s->len;
114 l = s->len;
111
115
112 offset += s->start + l - c;
116 offset += s->start + l - c;
113
117
114 d->start = s->start;
118 d->start = s->start;
115 d->end = c;
119 d->end = c;
116 d->len = l;
120 d->len = l;
117 d->data = s->data;
121 d->data = s->data;
118 d++;
122 d++;
119 s->start = c;
123 s->start = c;
120 s->len = s->len - l;
124 s->len = s->len - l;
121 s->data = s->data + l;
125 s->data = s->data + l;
122
126
123 break;
127 break;
124 }
128 }
125 }
129 }
126
130
127 dest->tail = d;
131 dest->tail = d;
128 src->head = s;
132 src->head = s;
129 return offset;
133 return offset;
130 }
134 }
131
135
132 /* like gather, but with no output list */
136 /* like gather, but with no output list */
133 static int discard(struct flist *src, int cut, int offset)
137 static int discard(struct flist *src, int cut, int offset)
134 {
138 {
135 struct frag *s = src->head;
139 struct frag *s = src->head;
136 int postend, c, l;
140 int postend, c, l;
137
141
138 while (s != src->tail) {
142 while (s != src->tail) {
139 if (s->start + offset >= cut)
143 if (s->start + offset >= cut)
140 break;
144 break;
141
145
142 postend = offset + s->start + s->len;
146 postend = offset + s->start + s->len;
143 if (postend <= cut) {
147 if (postend <= cut) {
144 offset += s->start + s->len - s->end;
148 offset += s->start + s->len - s->end;
145 s++;
149 s++;
146 }
150 }
147 else {
151 else {
148 c = cut - offset;
152 c = cut - offset;
149 if (s->end < c)
153 if (s->end < c)
150 c = s->end;
154 c = s->end;
151 l = cut - offset - s->start;
155 l = cut - offset - s->start;
152 if (s->len < l)
156 if (s->len < l)
153 l = s->len;
157 l = s->len;
154
158
155 offset += s->start + l - c;
159 offset += s->start + l - c;
156 s->start = c;
160 s->start = c;
157 s->len = s->len - l;
161 s->len = s->len - l;
158 s->data = s->data + l;
162 s->data = s->data + l;
159
163
160 break;
164 break;
161 }
165 }
162 }
166 }
163
167
164 src->head = s;
168 src->head = s;
165 return offset;
169 return offset;
166 }
170 }
167
171
168 /* combine hunk lists a and b, while adjusting b for offset changes in a/
172 /* combine hunk lists a and b, while adjusting b for offset changes in a/
169 this deletes a and b and returns the resultant list. */
173 this deletes a and b and returns the resultant list. */
170 static struct flist *combine(struct flist *a, struct flist *b)
174 static struct flist *combine(struct flist *a, struct flist *b)
171 {
175 {
172 struct flist *c = NULL;
176 struct flist *c = NULL;
173 struct frag *bh, *ct;
177 struct frag *bh, *ct;
174 int offset = 0, post;
178 int offset = 0, post;
175
179
176 if (a && b)
180 if (a && b)
177 c = lalloc((lsize(a) + lsize(b)) * 2);
181 c = lalloc((lsize(a) + lsize(b)) * 2);
178
182
179 if (c) {
183 if (c) {
180
184
181 for (bh = b->head; bh != b->tail; bh++) {
185 for (bh = b->head; bh != b->tail; bh++) {
182 /* save old hunks */
186 /* save old hunks */
183 offset = gather(c, a, bh->start, offset);
187 offset = gather(c, a, bh->start, offset);
184
188
185 /* discard replaced hunks */
189 /* discard replaced hunks */
186 post = discard(a, bh->end, offset);
190 post = discard(a, bh->end, offset);
187
191
188 /* insert new hunk */
192 /* insert new hunk */
189 ct = c->tail;
193 ct = c->tail;
190 ct->start = bh->start - offset;
194 ct->start = bh->start - offset;
191 ct->end = bh->end - post;
195 ct->end = bh->end - post;
192 ct->len = bh->len;
196 ct->len = bh->len;
193 ct->data = bh->data;
197 ct->data = bh->data;
194 c->tail++;
198 c->tail++;
195 offset = post;
199 offset = post;
196 }
200 }
197
201
198 /* hold on to tail from a */
202 /* hold on to tail from a */
199 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
203 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
200 c->tail += lsize(a);
204 c->tail += lsize(a);
201 }
205 }
202
206
203 lfree(a);
207 lfree(a);
204 lfree(b);
208 lfree(b);
205 return c;
209 return c;
206 }
210 }
207
211
208 /* decode a binary patch into a hunk list */
212 /* decode a binary patch into a hunk list */
209 static struct flist *decode(char *bin, int len)
213 static struct flist *decode(char *bin, int len)
210 {
214 {
211 struct flist *l;
215 struct flist *l;
212 struct frag *lt;
216 struct frag *lt;
213 char *end = bin + len;
217 char *end = bin + len;
214 char decode[12]; /* for dealing with alignment issues */
218 char decode[12]; /* for dealing with alignment issues */
215
219
216 /* assume worst case size, we won't have many of these lists */
220 /* assume worst case size, we won't have many of these lists */
217 l = lalloc(len / 12);
221 l = lalloc(len / 12);
222 if (!l)
223 return NULL;
224
218 lt = l->tail;
225 lt = l->tail;
219
226
220 while (bin < end) {
227 while (bin < end) {
221 memcpy(decode, bin, 12);
228 memcpy(decode, bin, 12);
222 lt->start = ntohl(*(uint32_t *)decode);
229 lt->start = ntohl(*(uint32_t *)decode);
223 lt->end = ntohl(*(uint32_t *)(decode + 4));
230 lt->end = ntohl(*(uint32_t *)(decode + 4));
224 lt->len = ntohl(*(uint32_t *)(decode + 8));
231 lt->len = ntohl(*(uint32_t *)(decode + 8));
225 lt->data = bin + 12;
232 lt->data = bin + 12;
226 bin += 12 + lt->len;
233 bin += 12 + lt->len;
227 lt++;
234 lt++;
228 }
235 }
229
236
237 if (bin != end) {
238 if (!PyErr_Occurred())
239 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
240 lfree(l);
241 return NULL;
242 }
243
230 l->tail = lt;
244 l->tail = lt;
231 return l;
245 return l;
232 }
246 }
233
247
234 /* calculate the size of resultant text */
248 /* calculate the size of resultant text */
235 static int calcsize(int len, struct flist *l)
249 static int calcsize(int len, struct flist *l)
236 {
250 {
237 int outlen = 0, last = 0;
251 int outlen = 0, last = 0;
238 struct frag *f = l->head;
252 struct frag *f = l->head;
239
253
240 while (f != l->tail) {
254 while (f != l->tail) {
255 if (f->start < last || f->end > len) {
256 if (!PyErr_Occurred())
257 PyErr_SetString(mpatch_Error,
258 "invalid patch");
259 return -1;
260 }
241 outlen += f->start - last;
261 outlen += f->start - last;
242 last = f->end;
262 last = f->end;
243 outlen += f->len;
263 outlen += f->len;
244 f++;
264 f++;
245 }
265 }
246
266
247 outlen += len - last;
267 outlen += len - last;
248 return outlen;
268 return outlen;
249 }
269 }
250
270
251 static void apply(char *buf, char *orig, int len, struct flist *l)
271 static int apply(char *buf, char *orig, int len, struct flist *l)
252 {
272 {
253 struct frag *f = l->head;
273 struct frag *f = l->head;
254 int last = 0;
274 int last = 0;
255 char *p = buf;
275 char *p = buf;
256
276
257 while (f != l->tail) {
277 while (f != l->tail) {
278 if (f->start < last || f->end > len) {
279 if (!PyErr_Occurred())
280 PyErr_SetString(mpatch_Error,
281 "invalid patch");
282 return 0;
283 }
258 memcpy(p, orig + last, f->start - last);
284 memcpy(p, orig + last, f->start - last);
259 p += f->start - last;
285 p += f->start - last;
260 memcpy(p, f->data, f->len);
286 memcpy(p, f->data, f->len);
261 last = f->end;
287 last = f->end;
262 p += f->len;
288 p += f->len;
263 f++;
289 f++;
264 }
290 }
265 memcpy(p, orig + last, len - last);
291 memcpy(p, orig + last, len - last);
292 return 1;
266 }
293 }
267
294
268 /* recursively generate a patch of all bins between start and end */
295 /* recursively generate a patch of all bins between start and end */
269 static struct flist *fold(PyObject *bins, int start, int end)
296 static struct flist *fold(PyObject *bins, int start, int end)
270 {
297 {
271 int len;
298 int len;
272
299
273 if (start + 1 == end) {
300 if (start + 1 == end) {
274 /* trivial case, output a decoded list */
301 /* trivial case, output a decoded list */
275 PyObject *tmp = PyList_GetItem(bins, start);
302 PyObject *tmp = PyList_GetItem(bins, start);
276 if (!tmp)
303 if (!tmp)
277 return NULL;
304 return NULL;
278 return decode(PyString_AsString(tmp), PyString_Size(tmp));
305 return decode(PyString_AsString(tmp), PyString_Size(tmp));
279 }
306 }
280
307
281 /* divide and conquer, memory management is elsewhere */
308 /* divide and conquer, memory management is elsewhere */
282 len = (end - start) / 2;
309 len = (end - start) / 2;
283 return combine(fold(bins, start, start + len),
310 return combine(fold(bins, start, start + len),
284 fold(bins, start + len, end));
311 fold(bins, start + len, end));
285 }
312 }
286
313
287 static PyObject *
314 static PyObject *
288 patches(PyObject *self, PyObject *args)
315 patches(PyObject *self, PyObject *args)
289 {
316 {
290 PyObject *text, *bins, *result;
317 PyObject *text, *bins, *result;
291 struct flist *patch;
318 struct flist *patch;
292 char *in, *out;
319 char *in, *out;
293 int len, outlen;
320 int len, outlen;
294
321
295 if (!PyArg_ParseTuple(args, "SO:mpatch", &text, &bins))
322 if (!PyArg_ParseTuple(args, "SO:mpatch", &text, &bins))
296 return NULL;
323 return NULL;
297
324
298 len = PyList_Size(bins);
325 len = PyList_Size(bins);
299 if (!len) {
326 if (!len) {
300 /* nothing to do */
327 /* nothing to do */
301 Py_INCREF(text);
328 Py_INCREF(text);
302 return text;
329 return text;
303 }
330 }
304
331
305 patch = fold(bins, 0, len);
332 patch = fold(bins, 0, len);
306 if (!patch)
333 if (!patch)
307 return PyErr_NoMemory();
334 return NULL;
308
335
309 outlen = calcsize(PyString_Size(text), patch);
336 outlen = calcsize(PyString_Size(text), patch);
337 if (outlen < 0) {
338 result = NULL;
339 goto cleanup;
340 }
310 result = PyString_FromStringAndSize(NULL, outlen);
341 result = PyString_FromStringAndSize(NULL, outlen);
311 if (result) {
342 if (!result) {
312 in = PyString_AsString(text);
343 result = NULL;
313 out = PyString_AsString(result);
344 goto cleanup;
314 apply(out, in, PyString_Size(text), patch);
315 }
345 }
316
346 in = PyString_AsString(text);
347 out = PyString_AsString(result);
348 if (!apply(out, in, PyString_Size(text), patch)) {
349 Py_DECREF(result);
350 result = NULL;
351 }
352 cleanup:
317 lfree(patch);
353 lfree(patch);
318 return result;
354 return result;
319 }
355 }
320
356
321 static PyMethodDef methods[] = {
357 static PyMethodDef methods[] = {
322 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
358 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
323 {NULL, NULL}
359 {NULL, NULL}
324 };
360 };
325
361
326 PyMODINIT_FUNC
362 PyMODINIT_FUNC
327 initmpatch(void)
363 initmpatch(void)
328 {
364 {
329 Py_InitModule3("mpatch", methods, mpatch_doc);
365 Py_InitModule3("mpatch", methods, mpatch_doc);
366 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
330 }
367 }
331
368
General Comments 0
You need to be logged in to leave comments. Login now