##// END OF EJS Templates
Add safety checking to mpatch
mpm@selenic.com -
r128:d6afb6db default
parent child Browse files
Show More
@@ -1,293 +1,312 b''
1 1 /*
2 2 mpatch.c - efficient binary patching for Mercurial
3 3
4 4 This implements a patch algorithm that's O(m + nlog n) where m is the
5 5 size of the output and n is the number of patches.
6 6
7 7 Given a list of binary patches, it unpacks each into a hunk list,
8 8 then combines the hunk lists with a treewise recursion to form a
9 9 single hunk list. This hunk list is then applied to the original
10 10 text.
11 11
12 12 The text (or binary) fragments are copied directly from their source
13 13 Python objects into a preallocated output string to avoid the
14 14 allocation of intermediate Python objects. Working memory is about 2x
15 15 the total number of hunks.
16 16
17 17 Copyright 2005 Matt Mackall <mpm@selenic.com>
18 18
19 19 This software may be used and distributed according to the terms
20 20 of the GNU General Public License, incorporated herein by reference.
21 21 */
22 22
23 23 #include <Python.h>
24 24 #include <stdlib.h>
25 25 #include <string.h>
26 26 #include <netinet/in.h>
27 27 #include <sys/types.h>
28 28
29 29 static char mpatch_doc[] = "Efficient binary patching.";
30 30
31 31 struct frag {
32 32 int start, end, len;
33 33 char *data;
34 34 };
35 35
36 36 struct flist {
37 37 struct frag *base, *head, *tail;
38 38 };
39 39
40 40 static struct flist *lalloc(int size)
41 41 {
42 struct flist *a;
42 struct flist *a = NULL;
43 43
44 44 a = malloc(sizeof(struct flist));
45 a->head = a->tail = a->base = malloc(sizeof(struct frag) * size);
45 if (a) {
46 a->base = malloc(sizeof(struct frag) * size);
47 if (!a->base)
48 free(a);
49 else
50 a->head = a->tail = a->base;
51 }
46 52 return a;
47 53 }
48 54
49 55 static void lfree(struct flist *a)
50 56 {
51 free(a->base);
52 free(a);
57 if (a) {
58 free(a->base);
59 free(a);
60 }
53 61 }
54 62
55 63 static int lsize(struct flist *a)
56 64 {
57 65 return a->tail - a->head;
58 66 }
59 67
60 68 /* move hunks in source that are less cut to dest, compensating
61 69 for changes in offset. the last hunk may be split if necessary.
62 70 */
63 71 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
64 72 {
65 73 struct frag *d = dest->tail, *s = src->head;
66 74 int postend, c, l;
67 75
68 76 while (s != src->tail) {
69 77 if (s->start + offset >= cut)
70 78 break; /* we've gone far enough */
71 79
72 80 postend = offset + s->start + s->len;
73 81 if (postend <= cut) {
74 82 /* save this hunk */
75 83 offset += s->start + s->len - s->end;
76 84 *d++ = *s++;
77 85 }
78 86 else {
79 87 /* break up this hunk */
80 88 c = cut - offset;
81 89 if (s->end < c)
82 90 c = s->end;
83 91 l = cut - offset - s->start;
84 92 if (s->len < l)
85 93 l = s->len;
86 94
87 95 offset += s->start + l - c;
88 96
89 97 d->start = s->start;
90 98 d->end = c;
91 99 d->len = l;
92 100 d->data = s->data;
93 101 d++;
94 102 s->start = c;
95 103 s->len = s->len - l;
96 104 s->data = s->data + l;
97 105
98 106 break;
99 107 }
100 108 }
101 109
102 110 dest->tail = d;
103 111 src->head = s;
104 112 return offset;
105 113 }
106 114
107 115 /* like gather, but with no output list */
108 116 static int discard(struct flist *src, int cut, int offset)
109 117 {
110 118 struct frag *s = src->head;
111 119 int postend, c, l;
112 120
113 121 while (s != src->tail) {
114 122 if (s->start + offset >= cut)
115 123 break;
116 124
117 125 postend = offset + s->start + s->len;
118 126 if (postend <= cut) {
119 127 offset += s->start + s->len - s->end;
120 128 s++;
121 129 }
122 130 else {
123 131 c = cut - offset;
124 132 if (s->end < c)
125 133 c = s->end;
126 134 l = cut - offset - s->start;
127 135 if (s->len < l)
128 136 l = s->len;
129 137
130 138 offset += s->start + l - c;
131 139 s->start = c;
132 140 s->len = s->len - l;
133 141 s->data = s->data + l;
134 142
135 143 break;
136 144 }
137 145 }
138 146
139 147 src->head = s;
140 148 return offset;
141 149 }
142 150
143 151 /* combine hunk lists a and b, while adjusting b for offset changes in a/
144 152 this deletes a and b and returns the resultant list. */
145 153 static struct flist *combine(struct flist *a, struct flist *b)
146 154 {
147 struct flist *c;
148 struct frag *bh = b->head, *ct;
155 struct flist *c = NULL;
156 struct frag *bh, *ct;
149 157 int offset = 0, post;
150 158
151 c = lalloc((lsize(a) + lsize(b)) * 2);
159 if (a && b)
160 c = lalloc((lsize(a) + lsize(b)) * 2);
161
162 if (c) {
152 163
153 while (bh != b->tail) {
154 /* save old hunks */
155 offset = gather(c, a, bh->start, offset);
164 for (bh = b->head; bh != b->tail; bh++) {
165 /* save old hunks */
166 offset = gather(c, a, bh->start, offset);
156 167
157 /* discard replaced hunks */
158 post = discard(a, bh->end, offset);
168 /* discard replaced hunks */
169 post = discard(a, bh->end, offset);
159 170
160 /* insert new hunk */
161 ct = c->tail;
162 ct->start = bh->start - offset;
163 ct->end = bh->end - post;
164 ct->len = bh->len;
165 ct->data = bh->data;
166 c->tail++;
167 bh++;
168 offset = post;
171 /* insert new hunk */
172 ct = c->tail;
173 ct->start = bh->start - offset;
174 ct->end = bh->end - post;
175 ct->len = bh->len;
176 ct->data = bh->data;
177 c->tail++;
178 offset = post;
179 }
180
181 /* hold on to tail from a */
182 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
183 c->tail += lsize(a);
169 184 }
170 185
171 /* hold on to tail from a */
172 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
173 c->tail += lsize(a);
174 186 lfree(a);
175 187 lfree(b);
176 188 return c;
177 189 }
178 190
179 191 /* decode a binary patch into a hunk list */
180 192 static struct flist *decode(char *bin, int len)
181 193 {
182 194 struct flist *l;
183 195 struct frag *lt;
184 196 char *end = bin + len;
185 197
186 198 /* assume worst case size, we won't have many of these lists */
187 199 l = lalloc(len / 12);
188 200 lt = l->tail;
189 201
190 202 while (bin < end) {
191 203 lt->start = ntohl(*(uint32_t *)bin);
192 204 lt->end = ntohl(*(uint32_t *)(bin + 4));
193 205 lt->len = ntohl(*(uint32_t *)(bin + 8));
194 206 lt->data = bin + 12;
195 207 bin += 12 + lt->len;
196 208 lt++;
197 209 }
198 210
199 211 l->tail = lt;
200 212 return l;
201 213 }
202 214
203 215 /* calculate the size of resultant text */
204 216 static int calcsize(int len, struct flist *l)
205 217 {
206 218 int outlen = 0, last = 0;
207 219 struct frag *f = l->head;
208 220
209 221 while (f != l->tail) {
210 222 outlen += f->start - last;
211 223 last = f->end;
212 224 outlen += f->len;
213 225 f++;
214 226 }
215 227
216 228 outlen += len - last;
217 229 return outlen;
218 230 }
219 231
220 232 static void apply(char *buf, char *orig, int len, struct flist *l)
221 233 {
222 234 struct frag *f = l->head;
223 235 int last = 0;
224 236 char *p = buf;
225 237
226 238 while (f != l->tail) {
227 239 memcpy(p, orig + last, f->start - last);
228 240 p += f->start - last;
229 241 memcpy(p, f->data, f->len);
230 242 last = f->end;
231 243 p += f->len;
232 244 f++;
233 245 }
234 246 memcpy(p, orig + last, len - last);
235 247 }
236 248
237 249 /* recursively generate a patch of all bins between start and end */
238 250 static struct flist *fold(PyObject *bins, int start, int end)
239 251 {
240 252 int len;
241 253
242 254 if (start + 1 == end) {
243 255 /* trivial case, output a decoded list */
244 256 PyObject *tmp = PyList_GetItem(bins, start);
257 if (!tmp)
258 return NULL;
245 259 return decode(PyString_AsString(tmp), PyString_Size(tmp));
246 260 }
247 261
248 262 /* divide and conquer, memory management is elsewhere */
249 263 len = (end - start) / 2;
250 264 return combine(fold(bins, start, start + len),
251 265 fold(bins, start + len, end));
252 266 }
253 267
254 268 static PyObject *
255 269 patches(PyObject *self, PyObject *args)
256 270 {
257 271 PyObject *text, *bins, *result;
258 272 struct flist *patch;
259 273 char *in, *out;
260 274 int len, outlen;
261 275
262 if (!PyArg_ParseTuple(args, "OO:mpatch", &text, &bins))
276 if (!PyArg_ParseTuple(args, "SO:mpatch", &text, &bins))
263 277 return NULL;
264 278
265 279 len = PyList_Size(bins);
266 280 if (!len) {
267 281 /* nothing to do */
268 282 Py_INCREF(text);
269 283 return text;
270 284 }
271 285
272 286 patch = fold(bins, 0, len);
287 if (!patch)
288 return PyErr_NoMemory();
289
273 290 outlen = calcsize(PyString_Size(text), patch);
274 291 result = PyString_FromStringAndSize(NULL, outlen);
275 in = PyString_AsString(text);
276 out = PyString_AsString(result);
277 apply(out, in, PyString_Size(text), patch);
292 if (result) {
293 in = PyString_AsString(text);
294 out = PyString_AsString(result);
295 apply(out, in, PyString_Size(text), patch);
296 }
297
278 298 lfree(patch);
279
280 299 return result;
281 300 }
282 301
283 302 static PyMethodDef methods[] = {
284 303 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
285 304 {NULL, NULL}
286 305 };
287 306
288 307 PyMODINIT_FUNC
289 308 initmpatch(void)
290 309 {
291 310 Py_InitModule3("mpatch", methods, mpatch_doc);
292 311 }
293 312
General Comments 0
You need to be logged in to leave comments. Login now