##// END OF EJS Templates
Add safety checking to mpatch
mpm@selenic.com -
r128:d6afb6db default
parent child Browse files
Show More
@@ -1,293 +1,312
1 /*
1 /*
2 mpatch.c - efficient binary patching for Mercurial
2 mpatch.c - efficient binary patching for Mercurial
3
3
4 This implements a patch algorithm that's O(m + nlog n) where m is the
4 This implements a patch algorithm that's O(m + nlog n) where m is the
5 size of the output and n is the number of patches.
5 size of the output and n is the number of patches.
6
6
7 Given a list of binary patches, it unpacks each into a hunk list,
7 Given a list of binary patches, it unpacks each into a hunk list,
8 then combines the hunk lists with a treewise recursion to form a
8 then combines the hunk lists with a treewise recursion to form a
9 single hunk list. This hunk list is then applied to the original
9 single hunk list. This hunk list is then applied to the original
10 text.
10 text.
11
11
12 The text (or binary) fragments are copied directly from their source
12 The text (or binary) fragments are copied directly from their source
13 Python objects into a preallocated output string to avoid the
13 Python objects into a preallocated output string to avoid the
14 allocation of intermediate Python objects. Working memory is about 2x
14 allocation of intermediate Python objects. Working memory is about 2x
15 the total number of hunks.
15 the total number of hunks.
16
16
17 Copyright 2005 Matt Mackall <mpm@selenic.com>
17 Copyright 2005 Matt Mackall <mpm@selenic.com>
18
18
19 This software may be used and distributed according to the terms
19 This software may be used and distributed according to the terms
20 of the GNU General Public License, incorporated herein by reference.
20 of the GNU General Public License, incorporated herein by reference.
21 */
21 */
22
22
23 #include <Python.h>
23 #include <Python.h>
24 #include <stdlib.h>
24 #include <stdlib.h>
25 #include <string.h>
25 #include <string.h>
26 #include <netinet/in.h>
26 #include <netinet/in.h>
27 #include <sys/types.h>
27 #include <sys/types.h>
28
28
29 static char mpatch_doc[] = "Efficient binary patching.";
29 static char mpatch_doc[] = "Efficient binary patching.";
30
30
31 struct frag {
31 struct frag {
32 int start, end, len;
32 int start, end, len;
33 char *data;
33 char *data;
34 };
34 };
35
35
36 struct flist {
36 struct flist {
37 struct frag *base, *head, *tail;
37 struct frag *base, *head, *tail;
38 };
38 };
39
39
40 static struct flist *lalloc(int size)
40 static struct flist *lalloc(int size)
41 {
41 {
42 struct flist *a;
42 struct flist *a = NULL;
43
43
44 a = malloc(sizeof(struct flist));
44 a = malloc(sizeof(struct flist));
45 a->head = a->tail = a->base = malloc(sizeof(struct frag) * size);
45 if (a) {
46 a->base = malloc(sizeof(struct frag) * size);
47 if (!a->base)
48 free(a);
49 else
50 a->head = a->tail = a->base;
51 }
46 return a;
52 return a;
47 }
53 }
48
54
49 static void lfree(struct flist *a)
55 static void lfree(struct flist *a)
50 {
56 {
57 if (a) {
51 free(a->base);
58 free(a->base);
52 free(a);
59 free(a);
53 }
60 }
61 }
54
62
55 static int lsize(struct flist *a)
63 static int lsize(struct flist *a)
56 {
64 {
57 return a->tail - a->head;
65 return a->tail - a->head;
58 }
66 }
59
67
60 /* move hunks in source that are less cut to dest, compensating
68 /* move hunks in source that are less cut to dest, compensating
61 for changes in offset. the last hunk may be split if necessary.
69 for changes in offset. the last hunk may be split if necessary.
62 */
70 */
63 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
71 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
64 {
72 {
65 struct frag *d = dest->tail, *s = src->head;
73 struct frag *d = dest->tail, *s = src->head;
66 int postend, c, l;
74 int postend, c, l;
67
75
68 while (s != src->tail) {
76 while (s != src->tail) {
69 if (s->start + offset >= cut)
77 if (s->start + offset >= cut)
70 break; /* we've gone far enough */
78 break; /* we've gone far enough */
71
79
72 postend = offset + s->start + s->len;
80 postend = offset + s->start + s->len;
73 if (postend <= cut) {
81 if (postend <= cut) {
74 /* save this hunk */
82 /* save this hunk */
75 offset += s->start + s->len - s->end;
83 offset += s->start + s->len - s->end;
76 *d++ = *s++;
84 *d++ = *s++;
77 }
85 }
78 else {
86 else {
79 /* break up this hunk */
87 /* break up this hunk */
80 c = cut - offset;
88 c = cut - offset;
81 if (s->end < c)
89 if (s->end < c)
82 c = s->end;
90 c = s->end;
83 l = cut - offset - s->start;
91 l = cut - offset - s->start;
84 if (s->len < l)
92 if (s->len < l)
85 l = s->len;
93 l = s->len;
86
94
87 offset += s->start + l - c;
95 offset += s->start + l - c;
88
96
89 d->start = s->start;
97 d->start = s->start;
90 d->end = c;
98 d->end = c;
91 d->len = l;
99 d->len = l;
92 d->data = s->data;
100 d->data = s->data;
93 d++;
101 d++;
94 s->start = c;
102 s->start = c;
95 s->len = s->len - l;
103 s->len = s->len - l;
96 s->data = s->data + l;
104 s->data = s->data + l;
97
105
98 break;
106 break;
99 }
107 }
100 }
108 }
101
109
102 dest->tail = d;
110 dest->tail = d;
103 src->head = s;
111 src->head = s;
104 return offset;
112 return offset;
105 }
113 }
106
114
107 /* like gather, but with no output list */
115 /* like gather, but with no output list */
108 static int discard(struct flist *src, int cut, int offset)
116 static int discard(struct flist *src, int cut, int offset)
109 {
117 {
110 struct frag *s = src->head;
118 struct frag *s = src->head;
111 int postend, c, l;
119 int postend, c, l;
112
120
113 while (s != src->tail) {
121 while (s != src->tail) {
114 if (s->start + offset >= cut)
122 if (s->start + offset >= cut)
115 break;
123 break;
116
124
117 postend = offset + s->start + s->len;
125 postend = offset + s->start + s->len;
118 if (postend <= cut) {
126 if (postend <= cut) {
119 offset += s->start + s->len - s->end;
127 offset += s->start + s->len - s->end;
120 s++;
128 s++;
121 }
129 }
122 else {
130 else {
123 c = cut - offset;
131 c = cut - offset;
124 if (s->end < c)
132 if (s->end < c)
125 c = s->end;
133 c = s->end;
126 l = cut - offset - s->start;
134 l = cut - offset - s->start;
127 if (s->len < l)
135 if (s->len < l)
128 l = s->len;
136 l = s->len;
129
137
130 offset += s->start + l - c;
138 offset += s->start + l - c;
131 s->start = c;
139 s->start = c;
132 s->len = s->len - l;
140 s->len = s->len - l;
133 s->data = s->data + l;
141 s->data = s->data + l;
134
142
135 break;
143 break;
136 }
144 }
137 }
145 }
138
146
139 src->head = s;
147 src->head = s;
140 return offset;
148 return offset;
141 }
149 }
142
150
143 /* combine hunk lists a and b, while adjusting b for offset changes in a/
151 /* combine hunk lists a and b, while adjusting b for offset changes in a/
144 this deletes a and b and returns the resultant list. */
152 this deletes a and b and returns the resultant list. */
145 static struct flist *combine(struct flist *a, struct flist *b)
153 static struct flist *combine(struct flist *a, struct flist *b)
146 {
154 {
147 struct flist *c;
155 struct flist *c = NULL;
148 struct frag *bh = b->head, *ct;
156 struct frag *bh, *ct;
149 int offset = 0, post;
157 int offset = 0, post;
150
158
159 if (a && b)
151 c = lalloc((lsize(a) + lsize(b)) * 2);
160 c = lalloc((lsize(a) + lsize(b)) * 2);
152
161
153 while (bh != b->tail) {
162 if (c) {
163
164 for (bh = b->head; bh != b->tail; bh++) {
154 /* save old hunks */
165 /* save old hunks */
155 offset = gather(c, a, bh->start, offset);
166 offset = gather(c, a, bh->start, offset);
156
167
157 /* discard replaced hunks */
168 /* discard replaced hunks */
158 post = discard(a, bh->end, offset);
169 post = discard(a, bh->end, offset);
159
170
160 /* insert new hunk */
171 /* insert new hunk */
161 ct = c->tail;
172 ct = c->tail;
162 ct->start = bh->start - offset;
173 ct->start = bh->start - offset;
163 ct->end = bh->end - post;
174 ct->end = bh->end - post;
164 ct->len = bh->len;
175 ct->len = bh->len;
165 ct->data = bh->data;
176 ct->data = bh->data;
166 c->tail++;
177 c->tail++;
167 bh++;
168 offset = post;
178 offset = post;
169 }
179 }
170
180
171 /* hold on to tail from a */
181 /* hold on to tail from a */
172 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
182 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
173 c->tail += lsize(a);
183 c->tail += lsize(a);
184 }
185
174 lfree(a);
186 lfree(a);
175 lfree(b);
187 lfree(b);
176 return c;
188 return c;
177 }
189 }
178
190
179 /* decode a binary patch into a hunk list */
191 /* decode a binary patch into a hunk list */
180 static struct flist *decode(char *bin, int len)
192 static struct flist *decode(char *bin, int len)
181 {
193 {
182 struct flist *l;
194 struct flist *l;
183 struct frag *lt;
195 struct frag *lt;
184 char *end = bin + len;
196 char *end = bin + len;
185
197
186 /* assume worst case size, we won't have many of these lists */
198 /* assume worst case size, we won't have many of these lists */
187 l = lalloc(len / 12);
199 l = lalloc(len / 12);
188 lt = l->tail;
200 lt = l->tail;
189
201
190 while (bin < end) {
202 while (bin < end) {
191 lt->start = ntohl(*(uint32_t *)bin);
203 lt->start = ntohl(*(uint32_t *)bin);
192 lt->end = ntohl(*(uint32_t *)(bin + 4));
204 lt->end = ntohl(*(uint32_t *)(bin + 4));
193 lt->len = ntohl(*(uint32_t *)(bin + 8));
205 lt->len = ntohl(*(uint32_t *)(bin + 8));
194 lt->data = bin + 12;
206 lt->data = bin + 12;
195 bin += 12 + lt->len;
207 bin += 12 + lt->len;
196 lt++;
208 lt++;
197 }
209 }
198
210
199 l->tail = lt;
211 l->tail = lt;
200 return l;
212 return l;
201 }
213 }
202
214
203 /* calculate the size of resultant text */
215 /* calculate the size of resultant text */
204 static int calcsize(int len, struct flist *l)
216 static int calcsize(int len, struct flist *l)
205 {
217 {
206 int outlen = 0, last = 0;
218 int outlen = 0, last = 0;
207 struct frag *f = l->head;
219 struct frag *f = l->head;
208
220
209 while (f != l->tail) {
221 while (f != l->tail) {
210 outlen += f->start - last;
222 outlen += f->start - last;
211 last = f->end;
223 last = f->end;
212 outlen += f->len;
224 outlen += f->len;
213 f++;
225 f++;
214 }
226 }
215
227
216 outlen += len - last;
228 outlen += len - last;
217 return outlen;
229 return outlen;
218 }
230 }
219
231
220 static void apply(char *buf, char *orig, int len, struct flist *l)
232 static void apply(char *buf, char *orig, int len, struct flist *l)
221 {
233 {
222 struct frag *f = l->head;
234 struct frag *f = l->head;
223 int last = 0;
235 int last = 0;
224 char *p = buf;
236 char *p = buf;
225
237
226 while (f != l->tail) {
238 while (f != l->tail) {
227 memcpy(p, orig + last, f->start - last);
239 memcpy(p, orig + last, f->start - last);
228 p += f->start - last;
240 p += f->start - last;
229 memcpy(p, f->data, f->len);
241 memcpy(p, f->data, f->len);
230 last = f->end;
242 last = f->end;
231 p += f->len;
243 p += f->len;
232 f++;
244 f++;
233 }
245 }
234 memcpy(p, orig + last, len - last);
246 memcpy(p, orig + last, len - last);
235 }
247 }
236
248
237 /* recursively generate a patch of all bins between start and end */
249 /* recursively generate a patch of all bins between start and end */
238 static struct flist *fold(PyObject *bins, int start, int end)
250 static struct flist *fold(PyObject *bins, int start, int end)
239 {
251 {
240 int len;
252 int len;
241
253
242 if (start + 1 == end) {
254 if (start + 1 == end) {
243 /* trivial case, output a decoded list */
255 /* trivial case, output a decoded list */
244 PyObject *tmp = PyList_GetItem(bins, start);
256 PyObject *tmp = PyList_GetItem(bins, start);
257 if (!tmp)
258 return NULL;
245 return decode(PyString_AsString(tmp), PyString_Size(tmp));
259 return decode(PyString_AsString(tmp), PyString_Size(tmp));
246 }
260 }
247
261
248 /* divide and conquer, memory management is elsewhere */
262 /* divide and conquer, memory management is elsewhere */
249 len = (end - start) / 2;
263 len = (end - start) / 2;
250 return combine(fold(bins, start, start + len),
264 return combine(fold(bins, start, start + len),
251 fold(bins, start + len, end));
265 fold(bins, start + len, end));
252 }
266 }
253
267
254 static PyObject *
268 static PyObject *
255 patches(PyObject *self, PyObject *args)
269 patches(PyObject *self, PyObject *args)
256 {
270 {
257 PyObject *text, *bins, *result;
271 PyObject *text, *bins, *result;
258 struct flist *patch;
272 struct flist *patch;
259 char *in, *out;
273 char *in, *out;
260 int len, outlen;
274 int len, outlen;
261
275
262 if (!PyArg_ParseTuple(args, "OO:mpatch", &text, &bins))
276 if (!PyArg_ParseTuple(args, "SO:mpatch", &text, &bins))
263 return NULL;
277 return NULL;
264
278
265 len = PyList_Size(bins);
279 len = PyList_Size(bins);
266 if (!len) {
280 if (!len) {
267 /* nothing to do */
281 /* nothing to do */
268 Py_INCREF(text);
282 Py_INCREF(text);
269 return text;
283 return text;
270 }
284 }
271
285
272 patch = fold(bins, 0, len);
286 patch = fold(bins, 0, len);
287 if (!patch)
288 return PyErr_NoMemory();
289
273 outlen = calcsize(PyString_Size(text), patch);
290 outlen = calcsize(PyString_Size(text), patch);
274 result = PyString_FromStringAndSize(NULL, outlen);
291 result = PyString_FromStringAndSize(NULL, outlen);
292 if (result) {
275 in = PyString_AsString(text);
293 in = PyString_AsString(text);
276 out = PyString_AsString(result);
294 out = PyString_AsString(result);
277 apply(out, in, PyString_Size(text), patch);
295 apply(out, in, PyString_Size(text), patch);
296 }
297
278 lfree(patch);
298 lfree(patch);
279
280 return result;
299 return result;
281 }
300 }
282
301
283 static PyMethodDef methods[] = {
302 static PyMethodDef methods[] = {
284 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
303 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
285 {NULL, NULL}
304 {NULL, NULL}
286 };
305 };
287
306
288 PyMODINIT_FUNC
307 PyMODINIT_FUNC
289 initmpatch(void)
308 initmpatch(void)
290 {
309 {
291 Py_InitModule3("mpatch", methods, mpatch_doc);
310 Py_InitModule3("mpatch", methods, mpatch_doc);
292 }
311 }
293
312
General Comments 0
You need to be logged in to leave comments. Login now