##// END OF EJS Templates
Gotos are embarrassing.
mpm@selenic.com -
r82:7ed96baa default
parent child Browse files
Show More
@@ -1,295 +1,293
1 /*
1 /*
2 mpatch.c - efficient binary patching for Mercurial
2 mpatch.c - efficient binary patching for Mercurial
3
3
4 This implements a patch algorithm that's O(m + nlog n) where m is the
4 This implements a patch algorithm that's O(m + nlog n) where m is the
5 size of the output and n is the number of patches.
5 size of the output and n is the number of patches.
6
6
7 Given a list of binary patches, it unpacks each into a hunk list,
7 Given a list of binary patches, it unpacks each into a hunk list,
8 then combines the hunk lists with a treewise recursion to form a
8 then combines the hunk lists with a treewise recursion to form a
9 single hunk list. This hunk list is then applied to the original
9 single hunk list. This hunk list is then applied to the original
10 text.
10 text.
11
11
12 The text (or binary) fragments are copied directly from their source
12 The text (or binary) fragments are copied directly from their source
13 Python objects into a preallocated output string to avoid the
13 Python objects into a preallocated output string to avoid the
14 allocation of intermediate Python objects. Working memory is about 2x
14 allocation of intermediate Python objects. Working memory is about 2x
15 the total number of hunks.
15 the total number of hunks.
16
16
17 Copyright 2005 Matt Mackall <mpm@selenic.com>
17 Copyright 2005 Matt Mackall <mpm@selenic.com>
18
18
19 This software may be used and distributed according to the terms
19 This software may be used and distributed according to the terms
20 of the GNU General Public License, incorporated herein by reference.
20 of the GNU General Public License, incorporated herein by reference.
21 */
21 */
22
22
23 #include <Python.h>
23 #include <Python.h>
24 #include <stdlib.h>
24 #include <stdlib.h>
25 #include <string.h>
25 #include <string.h>
26 #include <netinet/in.h>
26 #include <netinet/in.h>
27 #include <sys/types.h>
27 #include <sys/types.h>
28
28
29 static char mpatch_doc[] = "Efficient binary patching.";
29 static char mpatch_doc[] = "Efficient binary patching.";
30
30
31 struct frag {
31 struct frag {
32 int start, end, len;
32 int start, end, len;
33 char *data;
33 char *data;
34 };
34 };
35
35
36 struct flist {
36 struct flist {
37 struct frag *base, *head, *tail;
37 struct frag *base, *head, *tail;
38 };
38 };
39
39
40 static struct flist *lalloc(int size)
40 static struct flist *lalloc(int size)
41 {
41 {
42 struct flist *a;
42 struct flist *a;
43
43
44 a = malloc(sizeof(struct flist));
44 a = malloc(sizeof(struct flist));
45 a->head = a->tail = a->base = malloc(sizeof(struct frag) * size);
45 a->head = a->tail = a->base = malloc(sizeof(struct frag) * size);
46 return a;
46 return a;
47 }
47 }
48
48
49 static void lfree(struct flist *a)
49 static void lfree(struct flist *a)
50 {
50 {
51 free(a->base);
51 free(a->base);
52 free(a);
52 free(a);
53 }
53 }
54
54
55 static int lsize(struct flist *a)
55 static int lsize(struct flist *a)
56 {
56 {
57 return a->tail - a->head;
57 return a->tail - a->head;
58 }
58 }
59
59
60 /* move hunks in source that are less cut to dest, compensating
60 /* move hunks in source that are less cut to dest, compensating
61 for changes in offset. the last hunk may be split if necessary.
61 for changes in offset. the last hunk may be split if necessary.
62 */
62 */
63 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
63 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
64 {
64 {
65 struct frag *d = dest->tail, *s = src->head;
65 struct frag *d = dest->tail, *s = src->head;
66 int postend, c, l;
66 int postend, c, l;
67
67
68 while (s != src->tail) {
68 while (s != src->tail) {
69 if (s->start + offset >= cut)
69 if (s->start + offset >= cut)
70 goto exit; /* we've gone far enough */
70 break; /* we've gone far enough */
71
71
72 postend = offset + s->start + s->len;
72 postend = offset + s->start + s->len;
73 if (postend <= cut) {
73 if (postend <= cut) {
74 /* save this hunk */
74 /* save this hunk */
75 offset += s->start + s->len - s->end;
75 offset += s->start + s->len - s->end;
76 *d++ = *s++;
76 *d++ = *s++;
77 }
77 }
78 else {
78 else {
79 /* break up this hunk */
79 /* break up this hunk */
80 c = cut - offset;
80 c = cut - offset;
81 if (s->end < c)
81 if (s->end < c)
82 c = s->end;
82 c = s->end;
83 l = cut - offset - s->start;
83 l = cut - offset - s->start;
84 if (s->len < l)
84 if (s->len < l)
85 l = s->len;
85 l = s->len;
86
86
87 offset += s->start + l - c;
87 offset += s->start + l - c;
88
88
89 d->start = s->start;
89 d->start = s->start;
90 d->end = c;
90 d->end = c;
91 d->len = l;
91 d->len = l;
92 d->data = s->data;
92 d->data = s->data;
93 d++;
93 d++;
94 s->start = c;
94 s->start = c;
95 s->len = s->len - l;
95 s->len = s->len - l;
96 s->data = s->data + l;
96 s->data = s->data + l;
97
97
98 goto exit;
98 break;
99 }
99 }
100 }
100 }
101
101
102 exit:
103 dest->tail = d;
102 dest->tail = d;
104 src->head = s;
103 src->head = s;
105 return offset;
104 return offset;
106 }
105 }
107
106
108 /* like gather, but with no output list */
107 /* like gather, but with no output list */
109 static int discard(struct flist *src, int cut, int offset)
108 static int discard(struct flist *src, int cut, int offset)
110 {
109 {
111 struct frag *s = src->head;
110 struct frag *s = src->head;
112 int postend, c, l;
111 int postend, c, l;
113
112
114 while (s != src->tail) {
113 while (s != src->tail) {
115 if (s->start + offset >= cut)
114 if (s->start + offset >= cut)
116 goto exit;
115 break;
117
116
118 postend = offset + s->start + s->len;
117 postend = offset + s->start + s->len;
119 if (postend <= cut) {
118 if (postend <= cut) {
120 offset += s->start + s->len - s->end;
119 offset += s->start + s->len - s->end;
121 s++;
120 s++;
122 }
121 }
123 else {
122 else {
124 c = cut - offset;
123 c = cut - offset;
125 if (s->end < c)
124 if (s->end < c)
126 c = s->end;
125 c = s->end;
127 l = cut - offset - s->start;
126 l = cut - offset - s->start;
128 if (s->len < l)
127 if (s->len < l)
129 l = s->len;
128 l = s->len;
130
129
131 offset += s->start + l - c;
130 offset += s->start + l - c;
132 s->start = c;
131 s->start = c;
133 s->len = s->len - l;
132 s->len = s->len - l;
134 s->data = s->data + l;
133 s->data = s->data + l;
135
134
136 goto exit;
135 break;
137 }
136 }
138 }
137 }
139
138
140 exit:
141 src->head = s;
139 src->head = s;
142 return offset;
140 return offset;
143 }
141 }
144
142
145 /* combine hunk lists a and b, while adjusting b for offset changes in a/
143 /* combine hunk lists a and b, while adjusting b for offset changes in a/
146 this deletes a and b and returns the resultant list. */
144 this deletes a and b and returns the resultant list. */
147 static struct flist *combine(struct flist *a, struct flist *b)
145 static struct flist *combine(struct flist *a, struct flist *b)
148 {
146 {
149 struct flist *c;
147 struct flist *c;
150 struct frag *bh = b->head, *ct;
148 struct frag *bh = b->head, *ct;
151 int offset = 0, post;
149 int offset = 0, post;
152
150
153 c = lalloc((lsize(a) + lsize(b)) * 2);
151 c = lalloc((lsize(a) + lsize(b)) * 2);
154
152
155 while (bh != b->tail) {
153 while (bh != b->tail) {
156 /* save old hunks */
154 /* save old hunks */
157 offset = gather(c, a, bh->start, offset);
155 offset = gather(c, a, bh->start, offset);
158
156
159 /* discard replaced hunks */
157 /* discard replaced hunks */
160 post = discard(a, bh->end, offset);
158 post = discard(a, bh->end, offset);
161
159
162 /* insert new hunk */
160 /* insert new hunk */
163 ct = c->tail;
161 ct = c->tail;
164 ct->start = bh->start - offset;
162 ct->start = bh->start - offset;
165 ct->end = bh->end - post;
163 ct->end = bh->end - post;
166 ct->len = bh->len;
164 ct->len = bh->len;
167 ct->data = bh->data;
165 ct->data = bh->data;
168 c->tail++;
166 c->tail++;
169 bh++;
167 bh++;
170 offset = post;
168 offset = post;
171 }
169 }
172
170
173 /* hold on to tail from a */
171 /* hold on to tail from a */
174 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
172 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
175 c->tail += lsize(a);
173 c->tail += lsize(a);
176 lfree(a);
174 lfree(a);
177 lfree(b);
175 lfree(b);
178 return c;
176 return c;
179 }
177 }
180
178
181 /* decode a binary patch into a hunk list */
179 /* decode a binary patch into a hunk list */
182 static struct flist *decode(char *bin, int len)
180 static struct flist *decode(char *bin, int len)
183 {
181 {
184 struct flist *l;
182 struct flist *l;
185 struct frag *lt;
183 struct frag *lt;
186 char *end = bin + len;
184 char *end = bin + len;
187
185
188 /* assume worst case size, we won't have many of these lists */
186 /* assume worst case size, we won't have many of these lists */
189 l = lalloc(len / 12);
187 l = lalloc(len / 12);
190 lt = l->tail;
188 lt = l->tail;
191
189
192 while (bin < end) {
190 while (bin < end) {
193 lt->start = ntohl(*(uint32_t *)bin);
191 lt->start = ntohl(*(uint32_t *)bin);
194 lt->end = ntohl(*(uint32_t *)(bin + 4));
192 lt->end = ntohl(*(uint32_t *)(bin + 4));
195 lt->len = ntohl(*(uint32_t *)(bin + 8));
193 lt->len = ntohl(*(uint32_t *)(bin + 8));
196 lt->data = bin + 12;
194 lt->data = bin + 12;
197 bin += 12 + lt->len;
195 bin += 12 + lt->len;
198 lt++;
196 lt++;
199 }
197 }
200
198
201 l->tail = lt;
199 l->tail = lt;
202 return l;
200 return l;
203 }
201 }
204
202
205 /* calculate the size of resultant text */
203 /* calculate the size of resultant text */
206 static int calcsize(int len, struct flist *l)
204 static int calcsize(int len, struct flist *l)
207 {
205 {
208 int outlen = 0, last = 0;
206 int outlen = 0, last = 0;
209 struct frag *f = l->head;
207 struct frag *f = l->head;
210
208
211 while (f != l->tail) {
209 while (f != l->tail) {
212 outlen += f->start - last;
210 outlen += f->start - last;
213 last = f->end;
211 last = f->end;
214 outlen += f->len;
212 outlen += f->len;
215 f++;
213 f++;
216 }
214 }
217
215
218 outlen += len - last;
216 outlen += len - last;
219 return outlen;
217 return outlen;
220 }
218 }
221
219
222 static void apply(char *buf, char *orig, int len, struct flist *l)
220 static void apply(char *buf, char *orig, int len, struct flist *l)
223 {
221 {
224 struct frag *f = l->head;
222 struct frag *f = l->head;
225 int last = 0;
223 int last = 0;
226 char *p = buf;
224 char *p = buf;
227
225
228 while (f != l->tail) {
226 while (f != l->tail) {
229 memcpy(p, orig + last, f->start - last);
227 memcpy(p, orig + last, f->start - last);
230 p += f->start - last;
228 p += f->start - last;
231 memcpy(p, f->data, f->len);
229 memcpy(p, f->data, f->len);
232 last = f->end;
230 last = f->end;
233 p += f->len;
231 p += f->len;
234 f++;
232 f++;
235 }
233 }
236 memcpy(p, orig + last, len - last);
234 memcpy(p, orig + last, len - last);
237 }
235 }
238
236
239 /* recursively generate a patch of all bins between start and end */
237 /* recursively generate a patch of all bins between start and end */
240 static struct flist *fold(PyObject *bins, int start, int end)
238 static struct flist *fold(PyObject *bins, int start, int end)
241 {
239 {
242 int len;
240 int len;
243
241
244 if (start + 1 == end) {
242 if (start + 1 == end) {
245 /* trivial case, output a decoded list */
243 /* trivial case, output a decoded list */
246 PyObject *tmp = PyList_GetItem(bins, start);
244 PyObject *tmp = PyList_GetItem(bins, start);
247 return decode(PyString_AsString(tmp), PyString_Size(tmp));
245 return decode(PyString_AsString(tmp), PyString_Size(tmp));
248 }
246 }
249
247
250 /* divide and conquer, memory management is elsewhere */
248 /* divide and conquer, memory management is elsewhere */
251 len = (end - start) / 2;
249 len = (end - start) / 2;
252 return combine(fold(bins, start, start + len),
250 return combine(fold(bins, start, start + len),
253 fold(bins, start + len, end));
251 fold(bins, start + len, end));
254 }
252 }
255
253
256 static PyObject *
254 static PyObject *
257 patches(PyObject *self, PyObject *args)
255 patches(PyObject *self, PyObject *args)
258 {
256 {
259 PyObject *text, *bins, *result;
257 PyObject *text, *bins, *result;
260 struct flist *patch;
258 struct flist *patch;
261 char *in, *out;
259 char *in, *out;
262 int len, outlen;
260 int len, outlen;
263
261
264 if (!PyArg_ParseTuple(args, "OO:mpatch", &text, &bins))
262 if (!PyArg_ParseTuple(args, "OO:mpatch", &text, &bins))
265 return NULL;
263 return NULL;
266
264
267 len = PyList_Size(bins);
265 len = PyList_Size(bins);
268 if (!len) {
266 if (!len) {
269 /* nothing to do */
267 /* nothing to do */
270 Py_INCREF(text);
268 Py_INCREF(text);
271 return text;
269 return text;
272 }
270 }
273
271
274 patch = fold(bins, 0, len);
272 patch = fold(bins, 0, len);
275 outlen = calcsize(PyString_Size(text), patch);
273 outlen = calcsize(PyString_Size(text), patch);
276 result = PyString_FromStringAndSize(NULL, outlen);
274 result = PyString_FromStringAndSize(NULL, outlen);
277 in = PyString_AsString(text);
275 in = PyString_AsString(text);
278 out = PyString_AsString(result);
276 out = PyString_AsString(result);
279 apply(out, in, PyString_Size(text), patch);
277 apply(out, in, PyString_Size(text), patch);
280 lfree(patch);
278 lfree(patch);
281
279
282 return result;
280 return result;
283 }
281 }
284
282
285 static PyMethodDef methods[] = {
283 static PyMethodDef methods[] = {
286 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
284 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
287 {NULL, NULL}
285 {NULL, NULL}
288 };
286 };
289
287
290 PyMODINIT_FUNC
288 PyMODINIT_FUNC
291 initmpatch(void)
289 initmpatch(void)
292 {
290 {
293 Py_InitModule3("mpatch", methods, mpatch_doc);
291 Py_InitModule3("mpatch", methods, mpatch_doc);
294 }
292 }
295
293
General Comments 0
You need to be logged in to leave comments. Login now