##// END OF EJS Templates
extensions: use stdint.h...
mpm@selenic.com -
r472:aa3d592d default
parent child Browse files
Show More
@@ -1,344 +1,339 b''
1 /*
1 /*
2 bdiff.c - efficient binary diff extension for Mercurial
2 bdiff.c - efficient binary diff extension for Mercurial
3
3
4 Copyright 2005 Matt Mackall <mpm@selenic.com>
4 Copyright 2005 Matt Mackall <mpm@selenic.com>
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8
8
9 Based roughly on Python difflib
9 Based roughly on Python difflib
10 */
10 */
11
11
12 #include <Python.h>
12 #include <Python.h>
13 #include <stdlib.h>
13 #include <stdlib.h>
14 #include <string.h>
14 #include <string.h>
15 #include <stdint.h>
15 #ifdef _WIN32
16 #ifdef _WIN32
16
17 typedef unsigned long uint32_t;
18
19 static uint32_t htonl(uint32_t x)
17 static uint32_t htonl(uint32_t x)
20 {
18 {
21 return ((x & 0x000000ffUL) << 24) |
19 return ((x & 0x000000ffUL) << 24) |
22 ((x & 0x0000ff00UL) << 8) |
20 ((x & 0x0000ff00UL) << 8) |
23 ((x & 0x00ff0000UL) >> 8) |
21 ((x & 0x00ff0000UL) >> 8) |
24 ((x & 0xff000000UL) >> 24);
22 ((x & 0xff000000UL) >> 24);
25 }
23 }
26
27 #else
24 #else
28 #include <netinet/in.h>
25 #include <netinet/in.h>
29 #include <sys/types.h>
30 #include <stdint.h>
31 #endif
26 #endif
32
27
33 struct line {
28 struct line {
34 int h, len, n, e;
29 int h, len, n, e;
35 const char *l;
30 const char *l;
36 };
31 };
37
32
38 struct hunk {
33 struct hunk {
39 int a1, a2, b1, b2;
34 int a1, a2, b1, b2;
40 };
35 };
41
36
42 struct hunklist {
37 struct hunklist {
43 struct hunk *base, *head;
38 struct hunk *base, *head;
44 };
39 };
45
40
46 static __inline uint32_t rol32(uint32_t word, unsigned int shift)
41 static __inline uint32_t rol32(uint32_t word, unsigned int shift)
47 {
42 {
48 return (word << shift) | (word >> (32 - shift));
43 return (word << shift) | (word >> (32 - shift));
49 }
44 }
50
45
51 int splitlines(const char *a, int len, struct line **lr)
46 int splitlines(const char *a, int len, struct line **lr)
52 {
47 {
53 int h, i;
48 int h, i;
54 const char *p, *b = a;
49 const char *p, *b = a;
55 struct line *l;
50 struct line *l;
56
51
57 /* count the lines */
52 /* count the lines */
58 i = 1; /* extra line for sentinel */
53 i = 1; /* extra line for sentinel */
59 for (p = a; p < a + len; p++)
54 for (p = a; p < a + len; p++)
60 if (*p == '\n' || p == a + len - 1)
55 if (*p == '\n' || p == a + len - 1)
61 i++;
56 i++;
62
57
63 *lr = l = malloc(sizeof(struct line) * i);
58 *lr = l = malloc(sizeof(struct line) * i);
64 if (!l)
59 if (!l)
65 return -1;
60 return -1;
66
61
67 /* build the line array and calculate hashes */
62 /* build the line array and calculate hashes */
68 h = 0;
63 h = 0;
69 for (p = a; p < a + len; p++) {
64 for (p = a; p < a + len; p++) {
70 h = *p + rol32(h, 7); /* a simple hash from GNU diff */
65 h = *p + rol32(h, 7); /* a simple hash from GNU diff */
71 if (*p == '\n' || p == a + len - 1) {
66 if (*p == '\n' || p == a + len - 1) {
72 l->len = p - b + 1;
67 l->len = p - b + 1;
73 l->h = h * l->len;
68 l->h = h * l->len;
74 l->l = b;
69 l->l = b;
75 l->n = -1;
70 l->n = -1;
76 l++;
71 l++;
77 b = p + 1;
72 b = p + 1;
78 h = 0;
73 h = 0;
79 }
74 }
80 }
75 }
81
76
82 /* set up a sentinel */
77 /* set up a sentinel */
83 l->h = l->len = 0;
78 l->h = l->len = 0;
84 l->l = a + len;
79 l->l = a + len;
85 return i - 1;
80 return i - 1;
86 }
81 }
87
82
88 int inline cmp(struct line *a, struct line *b)
83 int inline cmp(struct line *a, struct line *b)
89 {
84 {
90 return a->h != b->h || a->len != b->len || memcmp(a->l, b->l, a->len);
85 return a->h != b->h || a->len != b->len || memcmp(a->l, b->l, a->len);
91 }
86 }
92
87
93 static int equatelines(struct line *a, int an, struct line *b, int bn)
88 static int equatelines(struct line *a, int an, struct line *b, int bn)
94 {
89 {
95 int i, j, buckets = 1, t, *h, *l;
90 int i, j, buckets = 1, t, *h, *l;
96
91
97 /* build a hash table of the next highest power of 2 */
92 /* build a hash table of the next highest power of 2 */
98 while (buckets < bn + 1)
93 while (buckets < bn + 1)
99 buckets *= 2;
94 buckets *= 2;
100
95
101 h = malloc(buckets * sizeof(int));
96 h = malloc(buckets * sizeof(int));
102 l = calloc(buckets, sizeof(int));
97 l = calloc(buckets, sizeof(int));
103 buckets = buckets - 1;
98 buckets = buckets - 1;
104 if (!h || !l) {
99 if (!h || !l) {
105 free(h);
100 free(h);
106 return 0;
101 return 0;
107 }
102 }
108
103
109 /* clear the hash table */
104 /* clear the hash table */
110 for (i = 0; i <= buckets; i++)
105 for (i = 0; i <= buckets; i++)
111 h[i] = -1;
106 h[i] = -1;
112
107
113 /* add lines to the hash table chains */
108 /* add lines to the hash table chains */
114 for (i = bn - 1; i >= 0; i--) {
109 for (i = bn - 1; i >= 0; i--) {
115 /* find the equivalence class */
110 /* find the equivalence class */
116 for (j = b[i].h & buckets; h[j] != -1; j = (j + 1) & buckets)
111 for (j = b[i].h & buckets; h[j] != -1; j = (j + 1) & buckets)
117 if (!cmp(b + i, b + h[j]))
112 if (!cmp(b + i, b + h[j]))
118 break;
113 break;
119
114
120 /* add to the head of the equivalence class */
115 /* add to the head of the equivalence class */
121 b[i].n = h[j];
116 b[i].n = h[j];
122 b[i].e = j;
117 b[i].e = j;
123 h[j] = i;
118 h[j] = i;
124 l[j]++; /* keep track of popularity */
119 l[j]++; /* keep track of popularity */
125 }
120 }
126
121
127 /* compute popularity threshold */
122 /* compute popularity threshold */
128 t = (bn >= 200) ? bn / 100 : bn + 1;
123 t = (bn >= 200) ? bn / 100 : bn + 1;
129
124
130 /* match items in a to their equivalence class in b */
125 /* match items in a to their equivalence class in b */
131 for (i = 0; i < an; i++) {
126 for (i = 0; i < an; i++) {
132 /* find the equivalence class */
127 /* find the equivalence class */
133 for (j = a[i].h & buckets; h[j] != -1; j = (j + 1) & buckets)
128 for (j = a[i].h & buckets; h[j] != -1; j = (j + 1) & buckets)
134 if (!cmp(a + i, b + h[j]))
129 if (!cmp(a + i, b + h[j]))
135 break;
130 break;
136
131
137 a[i].e = j; /* use equivalence class for quick compare */
132 a[i].e = j; /* use equivalence class for quick compare */
138 if(l[j] <= t)
133 if(l[j] <= t)
139 a[i].n = h[j]; /* point to head of match list */
134 a[i].n = h[j]; /* point to head of match list */
140 else
135 else
141 a[i].n = -1; /* too popular */
136 a[i].n = -1; /* too popular */
142 }
137 }
143
138
144 /* discard hash tables */
139 /* discard hash tables */
145 free(h);
140 free(h);
146 free(l);
141 free(l);
147 return 1;
142 return 1;
148 }
143 }
149
144
150 static int longest_match(struct line *a, struct line *b, int *jpos, int *jlen,
145 static int longest_match(struct line *a, struct line *b, int *jpos, int *jlen,
151 int a1, int a2, int b1, int b2, int *omi, int *omj)
146 int a1, int a2, int b1, int b2, int *omi, int *omj)
152 {
147 {
153 int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;
148 int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;
154
149
155 for (i = a1; i < a2; i++) {
150 for (i = a1; i < a2; i++) {
156 /* skip things before the current block */
151 /* skip things before the current block */
157 for (j = a[i].n; j != -1 && j < b1; j = b[j].n)
152 for (j = a[i].n; j != -1 && j < b1; j = b[j].n)
158 ;
153 ;
159
154
160 /* loop through all lines match a[i] in b */
155 /* loop through all lines match a[i] in b */
161 for (; j != -1 && j < b2; j = b[j].n) {
156 for (; j != -1 && j < b2; j = b[j].n) {
162 /* does this extend an earlier match? */
157 /* does this extend an earlier match? */
163 if (i > a1 && j > b1 && jpos[j - 1] == i - 1)
158 if (i > a1 && j > b1 && jpos[j - 1] == i - 1)
164 k = jlen[j - 1] + 1;
159 k = jlen[j - 1] + 1;
165 else
160 else
166 k = 1;
161 k = 1;
167 jpos[j] = i;
162 jpos[j] = i;
168 jlen[j] = k;
163 jlen[j] = k;
169
164
170 /* best match so far? */
165 /* best match so far? */
171 if (k > mk) {
166 if (k > mk) {
172 mi = i;
167 mi = i;
173 mj = j;
168 mj = j;
174 mk = k;
169 mk = k;
175 }
170 }
176 }
171 }
177 }
172 }
178
173
179 if (mk) {
174 if (mk) {
180 mi = mi - mk + 1;
175 mi = mi - mk + 1;
181 mj = mj - mk + 1;
176 mj = mj - mk + 1;
182 }
177 }
183
178
184 /* expand match to include neighboring popular lines */
179 /* expand match to include neighboring popular lines */
185 while (mi - mb > a1 && mj - mb > b1 &&
180 while (mi - mb > a1 && mj - mb > b1 &&
186 a[mi - mb - 1].e == b[mj - mb - 1].e)
181 a[mi - mb - 1].e == b[mj - mb - 1].e)
187 mb++;
182 mb++;
188 while (mi + mk < a2 && mj + mk < b2 &&
183 while (mi + mk < a2 && mj + mk < b2 &&
189 a[mi + mk].e == b[mj + mk].e)
184 a[mi + mk].e == b[mj + mk].e)
190 mk++;
185 mk++;
191
186
192 *omi = mi - mb;
187 *omi = mi - mb;
193 *omj = mj - mb;
188 *omj = mj - mb;
194 return mk + mb;
189 return mk + mb;
195 }
190 }
196
191
197 static void recurse(struct line *a, struct line *b, int *jpos, int *jlen,
192 static void recurse(struct line *a, struct line *b, int *jpos, int *jlen,
198 int a1, int a2, int b1, int b2, struct hunklist *l)
193 int a1, int a2, int b1, int b2, struct hunklist *l)
199 {
194 {
200 int i, j, k;
195 int i, j, k;
201
196
202 /* find the longest match in this chunk */
197 /* find the longest match in this chunk */
203 k = longest_match(a, b, jpos, jlen, a1, a2, b1, b2, &i, &j);
198 k = longest_match(a, b, jpos, jlen, a1, a2, b1, b2, &i, &j);
204 if (!k)
199 if (!k)
205 return;
200 return;
206
201
207 /* and recurse on the remaining chunks on either side */
202 /* and recurse on the remaining chunks on either side */
208 recurse(a, b, jpos, jlen, a1, i, b1, j, l);
203 recurse(a, b, jpos, jlen, a1, i, b1, j, l);
209 l->head->a1 = i;
204 l->head->a1 = i;
210 l->head->a2 = i + k;
205 l->head->a2 = i + k;
211 l->head->b1 = j;
206 l->head->b1 = j;
212 l->head->b2 = j + k;
207 l->head->b2 = j + k;
213 l->head++;
208 l->head++;
214 recurse(a, b, jpos, jlen, i + k, a2, j + k, b2, l);
209 recurse(a, b, jpos, jlen, i + k, a2, j + k, b2, l);
215 }
210 }
216
211
217 static struct hunklist diff(struct line *a, int an, struct line *b, int bn)
212 static struct hunklist diff(struct line *a, int an, struct line *b, int bn)
218 {
213 {
219 struct hunklist l;
214 struct hunklist l;
220 int *jpos, *jlen, t;
215 int *jpos, *jlen, t;
221
216
222 /* allocate and fill arrays */
217 /* allocate and fill arrays */
223 t = equatelines(a, an, b, bn);
218 t = equatelines(a, an, b, bn);
224 jpos = calloc(bn, sizeof(int));
219 jpos = calloc(bn, sizeof(int));
225 jlen = calloc(bn, sizeof(int));
220 jlen = calloc(bn, sizeof(int));
226 l.head = l.base = malloc(sizeof(struct hunk) * ((an + bn) / 4 + 2));
221 l.head = l.base = malloc(sizeof(struct hunk) * ((an + bn) / 4 + 2));
227
222
228 if (jpos && jlen && l.base && t) {
223 if (jpos && jlen && l.base && t) {
229 /* generate the matching block list */
224 /* generate the matching block list */
230 recurse(a, b, jpos, jlen, 0, an, 0, bn, &l);
225 recurse(a, b, jpos, jlen, 0, an, 0, bn, &l);
231 l.head->a1 = an;
226 l.head->a1 = an;
232 l.head->b1 = bn;
227 l.head->b1 = bn;
233 l.head++;
228 l.head++;
234 }
229 }
235
230
236 free(jpos);
231 free(jpos);
237 free(jlen);
232 free(jlen);
238 return l;
233 return l;
239 }
234 }
240
235
241 static PyObject *blocks(PyObject *self, PyObject *args)
236 static PyObject *blocks(PyObject *self, PyObject *args)
242 {
237 {
243 PyObject *sa, *sb, *rl = NULL, *m;
238 PyObject *sa, *sb, *rl = NULL, *m;
244 struct line *a, *b;
239 struct line *a, *b;
245 struct hunklist l;
240 struct hunklist l;
246 struct hunk *h;
241 struct hunk *h;
247 int an, bn, pos = 0;
242 int an, bn, pos = 0;
248
243
249 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
244 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
250 return NULL;
245 return NULL;
251
246
252 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &a);
247 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &a);
253 bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &b);
248 bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &b);
254 if (!a || !b)
249 if (!a || !b)
255 goto nomem;
250 goto nomem;
256
251
257 l = diff(a, an, b, bn);
252 l = diff(a, an, b, bn);
258 rl = PyList_New(l.head - l.base);
253 rl = PyList_New(l.head - l.base);
259 if (!l.head || !rl)
254 if (!l.head || !rl)
260 goto nomem;
255 goto nomem;
261
256
262 for(h = l.base; h != l.head; h++) {
257 for(h = l.base; h != l.head; h++) {
263 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
258 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
264 PyList_SetItem(rl, pos, m);
259 PyList_SetItem(rl, pos, m);
265 pos++;
260 pos++;
266 }
261 }
267
262
268 nomem:
263 nomem:
269 free(a);
264 free(a);
270 free(b);
265 free(b);
271 free(l.base);
266 free(l.base);
272 return rl ? rl : PyErr_NoMemory();
267 return rl ? rl : PyErr_NoMemory();
273 }
268 }
274
269
275 static PyObject *bdiff(PyObject *self, PyObject *args)
270 static PyObject *bdiff(PyObject *self, PyObject *args)
276 {
271 {
277 PyObject *sa, *sb, *result = NULL;
272 PyObject *sa, *sb, *result = NULL;
278 struct line *al, *bl;
273 struct line *al, *bl;
279 struct hunklist l;
274 struct hunklist l;
280 struct hunk *h;
275 struct hunk *h;
281 char encode[12], *rb;
276 char encode[12], *rb;
282 int an, bn, len = 0, la = 0, lb = 0;
277 int an, bn, len = 0, la = 0, lb = 0;
283
278
284 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
279 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
285 return NULL;
280 return NULL;
286
281
287 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &al);
282 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &al);
288 bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &bl);
283 bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &bl);
289 if (!al || !bl)
284 if (!al || !bl)
290 goto nomem;
285 goto nomem;
291
286
292 l = diff(al, an, bl, bn);
287 l = diff(al, an, bl, bn);
293 if (!l.head)
288 if (!l.head)
294 goto nomem;
289 goto nomem;
295
290
296 /* calculate length of output */
291 /* calculate length of output */
297 for(h = l.base; h != l.head; h++) {
292 for(h = l.base; h != l.head; h++) {
298 if (h->a1 != la || h->b1 != lb)
293 if (h->a1 != la || h->b1 != lb)
299 len += 12 + bl[h->b1].l - bl[lb].l;
294 len += 12 + bl[h->b1].l - bl[lb].l;
300 la = h->a2;
295 la = h->a2;
301 lb = h->b2;
296 lb = h->b2;
302 }
297 }
303
298
304 result = PyString_FromStringAndSize(NULL, len);
299 result = PyString_FromStringAndSize(NULL, len);
305 if (!result)
300 if (!result)
306 goto nomem;
301 goto nomem;
307
302
308 /* build binary patch */
303 /* build binary patch */
309 rb = PyString_AsString(result);
304 rb = PyString_AsString(result);
310 la = lb = 0;
305 la = lb = 0;
311
306
312 for(h = l.base; h != l.head; h++) {
307 for(h = l.base; h != l.head; h++) {
313 if (h->a1 != la || h->b1 != lb) {
308 if (h->a1 != la || h->b1 != lb) {
314 len = bl[h->b1].l - bl[lb].l;
309 len = bl[h->b1].l - bl[lb].l;
315 *(uint32_t *)(encode) = htonl(al[la].l - al->l);
310 *(uint32_t *)(encode) = htonl(al[la].l - al->l);
316 *(uint32_t *)(encode + 4) = htonl(al[h->a1].l - al->l);
311 *(uint32_t *)(encode + 4) = htonl(al[h->a1].l - al->l);
317 *(uint32_t *)(encode + 8) = htonl(len);
312 *(uint32_t *)(encode + 8) = htonl(len);
318 memcpy(rb, encode, 12);
313 memcpy(rb, encode, 12);
319 memcpy(rb + 12, bl[lb].l, len);
314 memcpy(rb + 12, bl[lb].l, len);
320 rb += 12 + len;
315 rb += 12 + len;
321 }
316 }
322 la = h->a2;
317 la = h->a2;
323 lb = h->b2;
318 lb = h->b2;
324 }
319 }
325
320
326 nomem:
321 nomem:
327 free(al);
322 free(al);
328 free(bl);
323 free(bl);
329 free(l.base);
324 free(l.base);
330 return result ? result : PyErr_NoMemory();
325 return result ? result : PyErr_NoMemory();
331 }
326 }
332
327
333 static char mdiff_doc[] = "Efficient binary diff.";
328 static char mdiff_doc[] = "Efficient binary diff.";
334
329
335 static PyMethodDef methods[] = {
330 static PyMethodDef methods[] = {
336 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
331 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
337 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
332 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
338 {NULL, NULL}
333 {NULL, NULL}
339 };
334 };
340
335
341 PyMODINIT_FUNC initbdiff(void)
336 PyMODINIT_FUNC initbdiff(void)
342 {
337 {
343 Py_InitModule3("bdiff", methods, mdiff_doc);
338 Py_InitModule3("bdiff", methods, mdiff_doc);
344 }
339 }
@@ -1,330 +1,325 b''
1 /*
1 /*
2 mpatch.c - efficient binary patching for Mercurial
2 mpatch.c - efficient binary patching for Mercurial
3
3
4 This implements a patch algorithm that's O(m + nlog n) where m is the
4 This implements a patch algorithm that's O(m + nlog n) where m is the
5 size of the output and n is the number of patches.
5 size of the output and n is the number of patches.
6
6
7 Given a list of binary patches, it unpacks each into a hunk list,
7 Given a list of binary patches, it unpacks each into a hunk list,
8 then combines the hunk lists with a treewise recursion to form a
8 then combines the hunk lists with a treewise recursion to form a
9 single hunk list. This hunk list is then applied to the original
9 single hunk list. This hunk list is then applied to the original
10 text.
10 text.
11
11
12 The text (or binary) fragments are copied directly from their source
12 The text (or binary) fragments are copied directly from their source
13 Python objects into a preallocated output string to avoid the
13 Python objects into a preallocated output string to avoid the
14 allocation of intermediate Python objects. Working memory is about 2x
14 allocation of intermediate Python objects. Working memory is about 2x
15 the total number of hunks.
15 the total number of hunks.
16
16
17 Copyright 2005 Matt Mackall <mpm@selenic.com>
17 Copyright 2005 Matt Mackall <mpm@selenic.com>
18
18
19 This software may be used and distributed according to the terms
19 This software may be used and distributed according to the terms
20 of the GNU General Public License, incorporated herein by reference.
20 of the GNU General Public License, incorporated herein by reference.
21 */
21 */
22
22
23 #include <Python.h>
23 #include <Python.h>
24 #include <stdlib.h>
24 #include <stdlib.h>
25 #include <string.h>
25 #include <string.h>
26 #include <stdint.h>
26 #ifdef _WIN32
27 #ifdef _WIN32
27
28 typedef unsigned long uint32_t;
29
30 static uint32_t ntohl(uint32_t x)
28 static uint32_t ntohl(uint32_t x)
31 {
29 {
32 return ((x & 0x000000ffUL) << 24) |
30 return ((x & 0x000000ffUL) << 24) |
33 ((x & 0x0000ff00UL) << 8) |
31 ((x & 0x0000ff00UL) << 8) |
34 ((x & 0x00ff0000UL) >> 8) |
32 ((x & 0x00ff0000UL) >> 8) |
35 ((x & 0xff000000UL) >> 24);
33 ((x & 0xff000000UL) >> 24);
36 }
34 }
37
38 #else
35 #else
39 #include <netinet/in.h>
36 #include <netinet/in.h>
40 #include <sys/types.h>
41 #include <stdint.h>
42 #endif
37 #endif
43
38
44 static char mpatch_doc[] = "Efficient binary patching.";
39 static char mpatch_doc[] = "Efficient binary patching.";
45
40
46 struct frag {
41 struct frag {
47 int start, end, len;
42 int start, end, len;
48 char *data;
43 char *data;
49 };
44 };
50
45
51 struct flist {
46 struct flist {
52 struct frag *base, *head, *tail;
47 struct frag *base, *head, *tail;
53 };
48 };
54
49
55 static struct flist *lalloc(int size)
50 static struct flist *lalloc(int size)
56 {
51 {
57 struct flist *a = NULL;
52 struct flist *a = NULL;
58
53
59 a = malloc(sizeof(struct flist));
54 a = malloc(sizeof(struct flist));
60 if (a) {
55 if (a) {
61 a->base = malloc(sizeof(struct frag) * size);
56 a->base = malloc(sizeof(struct frag) * size);
62 if (!a->base) {
57 if (!a->base) {
63 free(a);
58 free(a);
64 a = NULL;
59 a = NULL;
65 } else
60 } else
66 a->head = a->tail = a->base;
61 a->head = a->tail = a->base;
67 }
62 }
68 return a;
63 return a;
69 }
64 }
70
65
71 static void lfree(struct flist *a)
66 static void lfree(struct flist *a)
72 {
67 {
73 if (a) {
68 if (a) {
74 free(a->base);
69 free(a->base);
75 free(a);
70 free(a);
76 }
71 }
77 }
72 }
78
73
79 static int lsize(struct flist *a)
74 static int lsize(struct flist *a)
80 {
75 {
81 return a->tail - a->head;
76 return a->tail - a->head;
82 }
77 }
83
78
84 /* move hunks in source that are less cut to dest, compensating
79 /* move hunks in source that are less cut to dest, compensating
85 for changes in offset. the last hunk may be split if necessary.
80 for changes in offset. the last hunk may be split if necessary.
86 */
81 */
87 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
82 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
88 {
83 {
89 struct frag *d = dest->tail, *s = src->head;
84 struct frag *d = dest->tail, *s = src->head;
90 int postend, c, l;
85 int postend, c, l;
91
86
92 while (s != src->tail) {
87 while (s != src->tail) {
93 if (s->start + offset >= cut)
88 if (s->start + offset >= cut)
94 break; /* we've gone far enough */
89 break; /* we've gone far enough */
95
90
96 postend = offset + s->start + s->len;
91 postend = offset + s->start + s->len;
97 if (postend <= cut) {
92 if (postend <= cut) {
98 /* save this hunk */
93 /* save this hunk */
99 offset += s->start + s->len - s->end;
94 offset += s->start + s->len - s->end;
100 *d++ = *s++;
95 *d++ = *s++;
101 }
96 }
102 else {
97 else {
103 /* break up this hunk */
98 /* break up this hunk */
104 c = cut - offset;
99 c = cut - offset;
105 if (s->end < c)
100 if (s->end < c)
106 c = s->end;
101 c = s->end;
107 l = cut - offset - s->start;
102 l = cut - offset - s->start;
108 if (s->len < l)
103 if (s->len < l)
109 l = s->len;
104 l = s->len;
110
105
111 offset += s->start + l - c;
106 offset += s->start + l - c;
112
107
113 d->start = s->start;
108 d->start = s->start;
114 d->end = c;
109 d->end = c;
115 d->len = l;
110 d->len = l;
116 d->data = s->data;
111 d->data = s->data;
117 d++;
112 d++;
118 s->start = c;
113 s->start = c;
119 s->len = s->len - l;
114 s->len = s->len - l;
120 s->data = s->data + l;
115 s->data = s->data + l;
121
116
122 break;
117 break;
123 }
118 }
124 }
119 }
125
120
126 dest->tail = d;
121 dest->tail = d;
127 src->head = s;
122 src->head = s;
128 return offset;
123 return offset;
129 }
124 }
130
125
131 /* like gather, but with no output list */
126 /* like gather, but with no output list */
132 static int discard(struct flist *src, int cut, int offset)
127 static int discard(struct flist *src, int cut, int offset)
133 {
128 {
134 struct frag *s = src->head;
129 struct frag *s = src->head;
135 int postend, c, l;
130 int postend, c, l;
136
131
137 while (s != src->tail) {
132 while (s != src->tail) {
138 if (s->start + offset >= cut)
133 if (s->start + offset >= cut)
139 break;
134 break;
140
135
141 postend = offset + s->start + s->len;
136 postend = offset + s->start + s->len;
142 if (postend <= cut) {
137 if (postend <= cut) {
143 offset += s->start + s->len - s->end;
138 offset += s->start + s->len - s->end;
144 s++;
139 s++;
145 }
140 }
146 else {
141 else {
147 c = cut - offset;
142 c = cut - offset;
148 if (s->end < c)
143 if (s->end < c)
149 c = s->end;
144 c = s->end;
150 l = cut - offset - s->start;
145 l = cut - offset - s->start;
151 if (s->len < l)
146 if (s->len < l)
152 l = s->len;
147 l = s->len;
153
148
154 offset += s->start + l - c;
149 offset += s->start + l - c;
155 s->start = c;
150 s->start = c;
156 s->len = s->len - l;
151 s->len = s->len - l;
157 s->data = s->data + l;
152 s->data = s->data + l;
158
153
159 break;
154 break;
160 }
155 }
161 }
156 }
162
157
163 src->head = s;
158 src->head = s;
164 return offset;
159 return offset;
165 }
160 }
166
161
167 /* combine hunk lists a and b, while adjusting b for offset changes in a/
162 /* combine hunk lists a and b, while adjusting b for offset changes in a/
168 this deletes a and b and returns the resultant list. */
163 this deletes a and b and returns the resultant list. */
169 static struct flist *combine(struct flist *a, struct flist *b)
164 static struct flist *combine(struct flist *a, struct flist *b)
170 {
165 {
171 struct flist *c = NULL;
166 struct flist *c = NULL;
172 struct frag *bh, *ct;
167 struct frag *bh, *ct;
173 int offset = 0, post;
168 int offset = 0, post;
174
169
175 if (a && b)
170 if (a && b)
176 c = lalloc((lsize(a) + lsize(b)) * 2);
171 c = lalloc((lsize(a) + lsize(b)) * 2);
177
172
178 if (c) {
173 if (c) {
179
174
180 for (bh = b->head; bh != b->tail; bh++) {
175 for (bh = b->head; bh != b->tail; bh++) {
181 /* save old hunks */
176 /* save old hunks */
182 offset = gather(c, a, bh->start, offset);
177 offset = gather(c, a, bh->start, offset);
183
178
184 /* discard replaced hunks */
179 /* discard replaced hunks */
185 post = discard(a, bh->end, offset);
180 post = discard(a, bh->end, offset);
186
181
187 /* insert new hunk */
182 /* insert new hunk */
188 ct = c->tail;
183 ct = c->tail;
189 ct->start = bh->start - offset;
184 ct->start = bh->start - offset;
190 ct->end = bh->end - post;
185 ct->end = bh->end - post;
191 ct->len = bh->len;
186 ct->len = bh->len;
192 ct->data = bh->data;
187 ct->data = bh->data;
193 c->tail++;
188 c->tail++;
194 offset = post;
189 offset = post;
195 }
190 }
196
191
197 /* hold on to tail from a */
192 /* hold on to tail from a */
198 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
193 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
199 c->tail += lsize(a);
194 c->tail += lsize(a);
200 }
195 }
201
196
202 lfree(a);
197 lfree(a);
203 lfree(b);
198 lfree(b);
204 return c;
199 return c;
205 }
200 }
206
201
207 /* decode a binary patch into a hunk list */
202 /* decode a binary patch into a hunk list */
208 static struct flist *decode(char *bin, int len)
203 static struct flist *decode(char *bin, int len)
209 {
204 {
210 struct flist *l;
205 struct flist *l;
211 struct frag *lt;
206 struct frag *lt;
212 char *end = bin + len;
207 char *end = bin + len;
213 char decode[12]; /* for dealing with alignment issues */
208 char decode[12]; /* for dealing with alignment issues */
214
209
215 /* assume worst case size, we won't have many of these lists */
210 /* assume worst case size, we won't have many of these lists */
216 l = lalloc(len / 12);
211 l = lalloc(len / 12);
217 lt = l->tail;
212 lt = l->tail;
218
213
219 while (bin < end) {
214 while (bin < end) {
220 memcpy(decode, bin, 12);
215 memcpy(decode, bin, 12);
221 lt->start = ntohl(*(uint32_t *)decode);
216 lt->start = ntohl(*(uint32_t *)decode);
222 lt->end = ntohl(*(uint32_t *)(decode + 4));
217 lt->end = ntohl(*(uint32_t *)(decode + 4));
223 lt->len = ntohl(*(uint32_t *)(decode + 8));
218 lt->len = ntohl(*(uint32_t *)(decode + 8));
224 lt->data = bin + 12;
219 lt->data = bin + 12;
225 bin += 12 + lt->len;
220 bin += 12 + lt->len;
226 lt++;
221 lt++;
227 }
222 }
228
223
229 l->tail = lt;
224 l->tail = lt;
230 return l;
225 return l;
231 }
226 }
232
227
233 /* calculate the size of resultant text */
228 /* calculate the size of resultant text */
234 static int calcsize(int len, struct flist *l)
229 static int calcsize(int len, struct flist *l)
235 {
230 {
236 int outlen = 0, last = 0;
231 int outlen = 0, last = 0;
237 struct frag *f = l->head;
232 struct frag *f = l->head;
238
233
239 while (f != l->tail) {
234 while (f != l->tail) {
240 outlen += f->start - last;
235 outlen += f->start - last;
241 last = f->end;
236 last = f->end;
242 outlen += f->len;
237 outlen += f->len;
243 f++;
238 f++;
244 }
239 }
245
240
246 outlen += len - last;
241 outlen += len - last;
247 return outlen;
242 return outlen;
248 }
243 }
249
244
250 static void apply(char *buf, char *orig, int len, struct flist *l)
245 static void apply(char *buf, char *orig, int len, struct flist *l)
251 {
246 {
252 struct frag *f = l->head;
247 struct frag *f = l->head;
253 int last = 0;
248 int last = 0;
254 char *p = buf;
249 char *p = buf;
255
250
256 while (f != l->tail) {
251 while (f != l->tail) {
257 memcpy(p, orig + last, f->start - last);
252 memcpy(p, orig + last, f->start - last);
258 p += f->start - last;
253 p += f->start - last;
259 memcpy(p, f->data, f->len);
254 memcpy(p, f->data, f->len);
260 last = f->end;
255 last = f->end;
261 p += f->len;
256 p += f->len;
262 f++;
257 f++;
263 }
258 }
264 memcpy(p, orig + last, len - last);
259 memcpy(p, orig + last, len - last);
265 }
260 }
266
261
267 /* recursively generate a patch of all bins between start and end */
262 /* recursively generate a patch of all bins between start and end */
268 static struct flist *fold(PyObject *bins, int start, int end)
263 static struct flist *fold(PyObject *bins, int start, int end)
269 {
264 {
270 int len;
265 int len;
271
266
272 if (start + 1 == end) {
267 if (start + 1 == end) {
273 /* trivial case, output a decoded list */
268 /* trivial case, output a decoded list */
274 PyObject *tmp = PyList_GetItem(bins, start);
269 PyObject *tmp = PyList_GetItem(bins, start);
275 if (!tmp)
270 if (!tmp)
276 return NULL;
271 return NULL;
277 return decode(PyString_AsString(tmp), PyString_Size(tmp));
272 return decode(PyString_AsString(tmp), PyString_Size(tmp));
278 }
273 }
279
274
280 /* divide and conquer, memory management is elsewhere */
275 /* divide and conquer, memory management is elsewhere */
281 len = (end - start) / 2;
276 len = (end - start) / 2;
282 return combine(fold(bins, start, start + len),
277 return combine(fold(bins, start, start + len),
283 fold(bins, start + len, end));
278 fold(bins, start + len, end));
284 }
279 }
285
280
286 static PyObject *
281 static PyObject *
287 patches(PyObject *self, PyObject *args)
282 patches(PyObject *self, PyObject *args)
288 {
283 {
289 PyObject *text, *bins, *result;
284 PyObject *text, *bins, *result;
290 struct flist *patch;
285 struct flist *patch;
291 char *in, *out;
286 char *in, *out;
292 int len, outlen;
287 int len, outlen;
293
288
294 if (!PyArg_ParseTuple(args, "SO:mpatch", &text, &bins))
289 if (!PyArg_ParseTuple(args, "SO:mpatch", &text, &bins))
295 return NULL;
290 return NULL;
296
291
297 len = PyList_Size(bins);
292 len = PyList_Size(bins);
298 if (!len) {
293 if (!len) {
299 /* nothing to do */
294 /* nothing to do */
300 Py_INCREF(text);
295 Py_INCREF(text);
301 return text;
296 return text;
302 }
297 }
303
298
304 patch = fold(bins, 0, len);
299 patch = fold(bins, 0, len);
305 if (!patch)
300 if (!patch)
306 return PyErr_NoMemory();
301 return PyErr_NoMemory();
307
302
308 outlen = calcsize(PyString_Size(text), patch);
303 outlen = calcsize(PyString_Size(text), patch);
309 result = PyString_FromStringAndSize(NULL, outlen);
304 result = PyString_FromStringAndSize(NULL, outlen);
310 if (result) {
305 if (result) {
311 in = PyString_AsString(text);
306 in = PyString_AsString(text);
312 out = PyString_AsString(result);
307 out = PyString_AsString(result);
313 apply(out, in, PyString_Size(text), patch);
308 apply(out, in, PyString_Size(text), patch);
314 }
309 }
315
310
316 lfree(patch);
311 lfree(patch);
317 return result;
312 return result;
318 }
313 }
319
314
320 static PyMethodDef methods[] = {
315 static PyMethodDef methods[] = {
321 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
316 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
322 {NULL, NULL}
317 {NULL, NULL}
323 };
318 };
324
319
325 PyMODINIT_FUNC
320 PyMODINIT_FUNC
326 initmpatch(void)
321 initmpatch(void)
327 {
322 {
328 Py_InitModule3("mpatch", methods, mpatch_doc);
323 Py_InitModule3("mpatch", methods, mpatch_doc);
329 }
324 }
330
325
General Comments 0
You need to be logged in to leave comments. Login now