##// END OF EJS Templates
More fiddling with uint32_t includes for extensions...
mpm@selenic.com -
r510:7f3fc8fd default
parent child Browse files
Show More
@@ -1,342 +1,344 b''
1 /*
1 /*
2 bdiff.c - efficient binary diff extension for Mercurial
2 bdiff.c - efficient binary diff extension for Mercurial
3
3
4 Copyright 2005 Matt Mackall <mpm@selenic.com>
4 Copyright 2005 Matt Mackall <mpm@selenic.com>
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8
8
9 Based roughly on Python difflib
9 Based roughly on Python difflib
10 */
10 */
11
11
12 #include <Python.h>
12 #include <Python.h>
13 #include <stdlib.h>
13 #include <stdlib.h>
14 #include <string.h>
14 #include <string.h>
15 #ifdef _WIN32
15 #ifdef _WIN32
16 #include <stdint.h>
16 static uint32_t htonl(uint32_t x)
17 static uint32_t htonl(uint32_t x)
17 {
18 {
18 return ((x & 0x000000ffUL) << 24) |
19 return ((x & 0x000000ffUL) << 24) |
19 ((x & 0x0000ff00UL) << 8) |
20 ((x & 0x0000ff00UL) << 8) |
20 ((x & 0x00ff0000UL) >> 8) |
21 ((x & 0x00ff0000UL) >> 8) |
21 ((x & 0xff000000UL) >> 24);
22 ((x & 0xff000000UL) >> 24);
22 }
23 }
23 #else
24 #else
25 #include <sys/types.h>
24 #include <netinet/in.h>
26 #include <netinet/in.h>
25 #endif
27 #endif
26
28
27 struct line {
29 struct line {
28 int h, len, n, e;
30 int h, len, n, e;
29 const char *l;
31 const char *l;
30 };
32 };
31
33
32 struct pos {
34 struct pos {
33 int pos, len;
35 int pos, len;
34 };
36 };
35
37
36 struct hunk {
38 struct hunk {
37 int a1, a2, b1, b2;
39 int a1, a2, b1, b2;
38 };
40 };
39
41
40 struct hunklist {
42 struct hunklist {
41 struct hunk *base, *head;
43 struct hunk *base, *head;
42 };
44 };
43
45
44 static __inline uint32_t rol32(uint32_t word, unsigned int shift)
46 static __inline uint32_t rol32(uint32_t word, unsigned int shift)
45 {
47 {
46 return (word << shift) | (word >> (32 - shift));
48 return (word << shift) | (word >> (32 - shift));
47 }
49 }
48
50
49 int splitlines(const char *a, int len, struct line **lr)
51 int splitlines(const char *a, int len, struct line **lr)
50 {
52 {
51 int h, i;
53 int h, i;
52 const char *p, *b = a;
54 const char *p, *b = a;
53 struct line *l;
55 struct line *l;
54
56
55 /* count the lines */
57 /* count the lines */
56 i = 1; /* extra line for sentinel */
58 i = 1; /* extra line for sentinel */
57 for (p = a; p < a + len; p++)
59 for (p = a; p < a + len; p++)
58 if (*p == '\n' || p == a + len - 1)
60 if (*p == '\n' || p == a + len - 1)
59 i++;
61 i++;
60
62
61 *lr = l = malloc(sizeof(struct line) * i);
63 *lr = l = malloc(sizeof(struct line) * i);
62 if (!l)
64 if (!l)
63 return -1;
65 return -1;
64
66
65 /* build the line array and calculate hashes */
67 /* build the line array and calculate hashes */
66 h = 0;
68 h = 0;
67 for (p = a; p < a + len; p++) {
69 for (p = a; p < a + len; p++) {
68 h = *p + rol32(h, 7); /* a simple hash from GNU diff */
70 h = *p + rol32(h, 7); /* a simple hash from GNU diff */
69 if (*p == '\n' || p == a + len - 1) {
71 if (*p == '\n' || p == a + len - 1) {
70 l->len = p - b + 1;
72 l->len = p - b + 1;
71 l->h = h * l->len;
73 l->h = h * l->len;
72 l->l = b;
74 l->l = b;
73 l->n = -1;
75 l->n = -1;
74 l++;
76 l++;
75 b = p + 1;
77 b = p + 1;
76 h = 0;
78 h = 0;
77 }
79 }
78 }
80 }
79
81
80 /* set up a sentinel */
82 /* set up a sentinel */
81 l->h = l->len = 0;
83 l->h = l->len = 0;
82 l->l = a + len;
84 l->l = a + len;
83 return i - 1;
85 return i - 1;
84 }
86 }
85
87
86 int inline cmp(struct line *a, struct line *b)
88 int inline cmp(struct line *a, struct line *b)
87 {
89 {
88 return a->h != b->h || a->len != b->len || memcmp(a->l, b->l, a->len);
90 return a->h != b->h || a->len != b->len || memcmp(a->l, b->l, a->len);
89 }
91 }
90
92
91 static int equatelines(struct line *a, int an, struct line *b, int bn)
93 static int equatelines(struct line *a, int an, struct line *b, int bn)
92 {
94 {
93 int i, j, buckets = 1, t;
95 int i, j, buckets = 1, t;
94 struct pos *h;
96 struct pos *h;
95
97
96 /* build a hash table of the next highest power of 2 */
98 /* build a hash table of the next highest power of 2 */
97 while (buckets < bn + 1)
99 while (buckets < bn + 1)
98 buckets *= 2;
100 buckets *= 2;
99
101
100 h = malloc(buckets * sizeof(struct pos));
102 h = malloc(buckets * sizeof(struct pos));
101 buckets = buckets - 1;
103 buckets = buckets - 1;
102 if (!h)
104 if (!h)
103 return 0;
105 return 0;
104
106
105 /* clear the hash table */
107 /* clear the hash table */
106 for (i = 0; i <= buckets; i++) {
108 for (i = 0; i <= buckets; i++) {
107 h[i].pos = -1;
109 h[i].pos = -1;
108 h[i].len = 0;
110 h[i].len = 0;
109 }
111 }
110
112
111 /* add lines to the hash table chains */
113 /* add lines to the hash table chains */
112 for (i = bn - 1; i >= 0; i--) {
114 for (i = bn - 1; i >= 0; i--) {
113 /* find the equivalence class */
115 /* find the equivalence class */
114 for (j = b[i].h & buckets; h[j].pos != -1;
116 for (j = b[i].h & buckets; h[j].pos != -1;
115 j = (j + 1) & buckets)
117 j = (j + 1) & buckets)
116 if (!cmp(b + i, b + h[j].pos))
118 if (!cmp(b + i, b + h[j].pos))
117 break;
119 break;
118
120
119 /* add to the head of the equivalence class */
121 /* add to the head of the equivalence class */
120 b[i].n = h[j].pos;
122 b[i].n = h[j].pos;
121 b[i].e = j;
123 b[i].e = j;
122 h[j].pos = i;
124 h[j].pos = i;
123 h[j].len++; /* keep track of popularity */
125 h[j].len++; /* keep track of popularity */
124 }
126 }
125
127
126 /* compute popularity threshold */
128 /* compute popularity threshold */
127 t = (bn >= 200) ? bn / 100 : bn + 1;
129 t = (bn >= 200) ? bn / 100 : bn + 1;
128
130
129 /* match items in a to their equivalence class in b */
131 /* match items in a to their equivalence class in b */
130 for (i = 0; i < an; i++) {
132 for (i = 0; i < an; i++) {
131 /* find the equivalence class */
133 /* find the equivalence class */
132 for (j = a[i].h & buckets; h[j].pos != -1;
134 for (j = a[i].h & buckets; h[j].pos != -1;
133 j = (j + 1) & buckets)
135 j = (j + 1) & buckets)
134 if (!cmp(a + i, b + h[j].pos))
136 if (!cmp(a + i, b + h[j].pos))
135 break;
137 break;
136
138
137 a[i].e = j; /* use equivalence class for quick compare */
139 a[i].e = j; /* use equivalence class for quick compare */
138 if(h[j].len <= t)
140 if(h[j].len <= t)
139 a[i].n = h[j].pos; /* point to head of match list */
141 a[i].n = h[j].pos; /* point to head of match list */
140 else
142 else
141 a[i].n = -1; /* too popular */
143 a[i].n = -1; /* too popular */
142 }
144 }
143
145
144 /* discard hash tables */
146 /* discard hash tables */
145 free(h);
147 free(h);
146 return 1;
148 return 1;
147 }
149 }
148
150
149 static int longest_match(struct line *a, struct line *b, struct pos *pos,
151 static int longest_match(struct line *a, struct line *b, struct pos *pos,
150 int a1, int a2, int b1, int b2, int *omi, int *omj)
152 int a1, int a2, int b1, int b2, int *omi, int *omj)
151 {
153 {
152 int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;
154 int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;
153
155
154 for (i = a1; i < a2; i++) {
156 for (i = a1; i < a2; i++) {
155 /* skip things before the current block */
157 /* skip things before the current block */
156 for (j = a[i].n; j != -1 && j < b1; j = b[j].n)
158 for (j = a[i].n; j != -1 && j < b1; j = b[j].n)
157 ;
159 ;
158
160
159 /* loop through all lines match a[i] in b */
161 /* loop through all lines match a[i] in b */
160 for (; j != -1 && j < b2; j = b[j].n) {
162 for (; j != -1 && j < b2; j = b[j].n) {
161 /* does this extend an earlier match? */
163 /* does this extend an earlier match? */
162 if (i > a1 && j > b1 && pos[j - 1].pos == i - 1)
164 if (i > a1 && j > b1 && pos[j - 1].pos == i - 1)
163 k = pos[j - 1].len + 1;
165 k = pos[j - 1].len + 1;
164 else
166 else
165 k = 1;
167 k = 1;
166 pos[j].pos = i;
168 pos[j].pos = i;
167 pos[j].len = k;
169 pos[j].len = k;
168
170
169 /* best match so far? */
171 /* best match so far? */
170 if (k > mk) {
172 if (k > mk) {
171 mi = i;
173 mi = i;
172 mj = j;
174 mj = j;
173 mk = k;
175 mk = k;
174 }
176 }
175 }
177 }
176 }
178 }
177
179
178 if (mk) {
180 if (mk) {
179 mi = mi - mk + 1;
181 mi = mi - mk + 1;
180 mj = mj - mk + 1;
182 mj = mj - mk + 1;
181 }
183 }
182
184
183 /* expand match to include neighboring popular lines */
185 /* expand match to include neighboring popular lines */
184 while (mi - mb > a1 && mj - mb > b1 &&
186 while (mi - mb > a1 && mj - mb > b1 &&
185 a[mi - mb - 1].e == b[mj - mb - 1].e)
187 a[mi - mb - 1].e == b[mj - mb - 1].e)
186 mb++;
188 mb++;
187 while (mi + mk < a2 && mj + mk < b2 &&
189 while (mi + mk < a2 && mj + mk < b2 &&
188 a[mi + mk].e == b[mj + mk].e)
190 a[mi + mk].e == b[mj + mk].e)
189 mk++;
191 mk++;
190
192
191 *omi = mi - mb;
193 *omi = mi - mb;
192 *omj = mj - mb;
194 *omj = mj - mb;
193 return mk + mb;
195 return mk + mb;
194 }
196 }
195
197
196 static void recurse(struct line *a, struct line *b, struct pos *pos,
198 static void recurse(struct line *a, struct line *b, struct pos *pos,
197 int a1, int a2, int b1, int b2, struct hunklist *l)
199 int a1, int a2, int b1, int b2, struct hunklist *l)
198 {
200 {
199 int i, j, k;
201 int i, j, k;
200
202
201 /* find the longest match in this chunk */
203 /* find the longest match in this chunk */
202 k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
204 k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
203 if (!k)
205 if (!k)
204 return;
206 return;
205
207
206 /* and recurse on the remaining chunks on either side */
208 /* and recurse on the remaining chunks on either side */
207 recurse(a, b, pos, a1, i, b1, j, l);
209 recurse(a, b, pos, a1, i, b1, j, l);
208 l->head->a1 = i;
210 l->head->a1 = i;
209 l->head->a2 = i + k;
211 l->head->a2 = i + k;
210 l->head->b1 = j;
212 l->head->b1 = j;
211 l->head->b2 = j + k;
213 l->head->b2 = j + k;
212 l->head++;
214 l->head++;
213 recurse(a, b, pos, i + k, a2, j + k, b2, l);
215 recurse(a, b, pos, i + k, a2, j + k, b2, l);
214 }
216 }
215
217
216 static struct hunklist diff(struct line *a, int an, struct line *b, int bn)
218 static struct hunklist diff(struct line *a, int an, struct line *b, int bn)
217 {
219 {
218 struct hunklist l;
220 struct hunklist l;
219 struct pos *pos;
221 struct pos *pos;
220 int t;
222 int t;
221
223
222 /* allocate and fill arrays */
224 /* allocate and fill arrays */
223 t = equatelines(a, an, b, bn);
225 t = equatelines(a, an, b, bn);
224 pos = calloc(bn, sizeof(struct pos));
226 pos = calloc(bn, sizeof(struct pos));
225 l.head = l.base = malloc(sizeof(struct hunk) * ((an + bn) / 4 + 2));
227 l.head = l.base = malloc(sizeof(struct hunk) * ((an + bn) / 4 + 2));
226
228
227 if (pos && l.base && t) {
229 if (pos && l.base && t) {
228 /* generate the matching block list */
230 /* generate the matching block list */
229 recurse(a, b, pos, 0, an, 0, bn, &l);
231 recurse(a, b, pos, 0, an, 0, bn, &l);
230 l.head->a1 = an;
232 l.head->a1 = an;
231 l.head->b1 = bn;
233 l.head->b1 = bn;
232 l.head++;
234 l.head++;
233 }
235 }
234
236
235 free(pos);
237 free(pos);
236 return l;
238 return l;
237 }
239 }
238
240
239 static PyObject *blocks(PyObject *self, PyObject *args)
241 static PyObject *blocks(PyObject *self, PyObject *args)
240 {
242 {
241 PyObject *sa, *sb, *rl = NULL, *m;
243 PyObject *sa, *sb, *rl = NULL, *m;
242 struct line *a, *b;
244 struct line *a, *b;
243 struct hunklist l;
245 struct hunklist l;
244 struct hunk *h;
246 struct hunk *h;
245 int an, bn, pos = 0;
247 int an, bn, pos = 0;
246
248
247 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
249 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
248 return NULL;
250 return NULL;
249
251
250 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &a);
252 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &a);
251 bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &b);
253 bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &b);
252 if (!a || !b)
254 if (!a || !b)
253 goto nomem;
255 goto nomem;
254
256
255 l = diff(a, an, b, bn);
257 l = diff(a, an, b, bn);
256 rl = PyList_New(l.head - l.base);
258 rl = PyList_New(l.head - l.base);
257 if (!l.head || !rl)
259 if (!l.head || !rl)
258 goto nomem;
260 goto nomem;
259
261
260 for(h = l.base; h != l.head; h++) {
262 for(h = l.base; h != l.head; h++) {
261 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
263 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
262 PyList_SetItem(rl, pos, m);
264 PyList_SetItem(rl, pos, m);
263 pos++;
265 pos++;
264 }
266 }
265
267
266 nomem:
268 nomem:
267 free(a);
269 free(a);
268 free(b);
270 free(b);
269 free(l.base);
271 free(l.base);
270 return rl ? rl : PyErr_NoMemory();
272 return rl ? rl : PyErr_NoMemory();
271 }
273 }
272
274
273 static PyObject *bdiff(PyObject *self, PyObject *args)
275 static PyObject *bdiff(PyObject *self, PyObject *args)
274 {
276 {
275 PyObject *sa, *sb, *result = NULL;
277 PyObject *sa, *sb, *result = NULL;
276 struct line *al, *bl;
278 struct line *al, *bl;
277 struct hunklist l;
279 struct hunklist l;
278 struct hunk *h;
280 struct hunk *h;
279 char encode[12], *rb;
281 char encode[12], *rb;
280 int an, bn, len = 0, la = 0, lb = 0;
282 int an, bn, len = 0, la = 0, lb = 0;
281
283
282 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
284 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
283 return NULL;
285 return NULL;
284
286
285 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &al);
287 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &al);
286 bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &bl);
288 bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &bl);
287 if (!al || !bl)
289 if (!al || !bl)
288 goto nomem;
290 goto nomem;
289
291
290 l = diff(al, an, bl, bn);
292 l = diff(al, an, bl, bn);
291 if (!l.head)
293 if (!l.head)
292 goto nomem;
294 goto nomem;
293
295
294 /* calculate length of output */
296 /* calculate length of output */
295 for(h = l.base; h != l.head; h++) {
297 for(h = l.base; h != l.head; h++) {
296 if (h->a1 != la || h->b1 != lb)
298 if (h->a1 != la || h->b1 != lb)
297 len += 12 + bl[h->b1].l - bl[lb].l;
299 len += 12 + bl[h->b1].l - bl[lb].l;
298 la = h->a2;
300 la = h->a2;
299 lb = h->b2;
301 lb = h->b2;
300 }
302 }
301
303
302 result = PyString_FromStringAndSize(NULL, len);
304 result = PyString_FromStringAndSize(NULL, len);
303 if (!result)
305 if (!result)
304 goto nomem;
306 goto nomem;
305
307
306 /* build binary patch */
308 /* build binary patch */
307 rb = PyString_AsString(result);
309 rb = PyString_AsString(result);
308 la = lb = 0;
310 la = lb = 0;
309
311
310 for(h = l.base; h != l.head; h++) {
312 for(h = l.base; h != l.head; h++) {
311 if (h->a1 != la || h->b1 != lb) {
313 if (h->a1 != la || h->b1 != lb) {
312 len = bl[h->b1].l - bl[lb].l;
314 len = bl[h->b1].l - bl[lb].l;
313 *(uint32_t *)(encode) = htonl(al[la].l - al->l);
315 *(uint32_t *)(encode) = htonl(al[la].l - al->l);
314 *(uint32_t *)(encode + 4) = htonl(al[h->a1].l - al->l);
316 *(uint32_t *)(encode + 4) = htonl(al[h->a1].l - al->l);
315 *(uint32_t *)(encode + 8) = htonl(len);
317 *(uint32_t *)(encode + 8) = htonl(len);
316 memcpy(rb, encode, 12);
318 memcpy(rb, encode, 12);
317 memcpy(rb + 12, bl[lb].l, len);
319 memcpy(rb + 12, bl[lb].l, len);
318 rb += 12 + len;
320 rb += 12 + len;
319 }
321 }
320 la = h->a2;
322 la = h->a2;
321 lb = h->b2;
323 lb = h->b2;
322 }
324 }
323
325
324 nomem:
326 nomem:
325 free(al);
327 free(al);
326 free(bl);
328 free(bl);
327 free(l.base);
329 free(l.base);
328 return result ? result : PyErr_NoMemory();
330 return result ? result : PyErr_NoMemory();
329 }
331 }
330
332
331 static char mdiff_doc[] = "Efficient binary diff.";
333 static char mdiff_doc[] = "Efficient binary diff.";
332
334
333 static PyMethodDef methods[] = {
335 static PyMethodDef methods[] = {
334 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
336 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
335 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
337 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
336 {NULL, NULL}
338 {NULL, NULL}
337 };
339 };
338
340
339 PyMODINIT_FUNC initbdiff(void)
341 PyMODINIT_FUNC initbdiff(void)
340 {
342 {
341 Py_InitModule3("bdiff", methods, mdiff_doc);
343 Py_InitModule3("bdiff", methods, mdiff_doc);
342 }
344 }
@@ -1,324 +1,326 b''
1 /*
1 /*
2 mpatch.c - efficient binary patching for Mercurial
2 mpatch.c - efficient binary patching for Mercurial
3
3
4 This implements a patch algorithm that's O(m + nlog n) where m is the
4 This implements a patch algorithm that's O(m + nlog n) where m is the
5 size of the output and n is the number of patches.
5 size of the output and n is the number of patches.
6
6
7 Given a list of binary patches, it unpacks each into a hunk list,
7 Given a list of binary patches, it unpacks each into a hunk list,
8 then combines the hunk lists with a treewise recursion to form a
8 then combines the hunk lists with a treewise recursion to form a
9 single hunk list. This hunk list is then applied to the original
9 single hunk list. This hunk list is then applied to the original
10 text.
10 text.
11
11
12 The text (or binary) fragments are copied directly from their source
12 The text (or binary) fragments are copied directly from their source
13 Python objects into a preallocated output string to avoid the
13 Python objects into a preallocated output string to avoid the
14 allocation of intermediate Python objects. Working memory is about 2x
14 allocation of intermediate Python objects. Working memory is about 2x
15 the total number of hunks.
15 the total number of hunks.
16
16
17 Copyright 2005 Matt Mackall <mpm@selenic.com>
17 Copyright 2005 Matt Mackall <mpm@selenic.com>
18
18
19 This software may be used and distributed according to the terms
19 This software may be used and distributed according to the terms
20 of the GNU General Public License, incorporated herein by reference.
20 of the GNU General Public License, incorporated herein by reference.
21 */
21 */
22
22
23 #include <Python.h>
23 #include <Python.h>
24 #include <stdlib.h>
24 #include <stdlib.h>
25 #include <string.h>
25 #include <string.h>
26 #ifdef _WIN32
26 #ifdef _WIN32
27 #include <stdint.h>
27 static uint32_t ntohl(uint32_t x)
28 static uint32_t ntohl(uint32_t x)
28 {
29 {
29 return ((x & 0x000000ffUL) << 24) |
30 return ((x & 0x000000ffUL) << 24) |
30 ((x & 0x0000ff00UL) << 8) |
31 ((x & 0x0000ff00UL) << 8) |
31 ((x & 0x00ff0000UL) >> 8) |
32 ((x & 0x00ff0000UL) >> 8) |
32 ((x & 0xff000000UL) >> 24);
33 ((x & 0xff000000UL) >> 24);
33 }
34 }
34 #else
35 #else
36 #include <sys/types.h>
35 #include <netinet/in.h>
37 #include <netinet/in.h>
36 #endif
38 #endif
37
39
38 static char mpatch_doc[] = "Efficient binary patching.";
40 static char mpatch_doc[] = "Efficient binary patching.";
39
41
40 struct frag {
42 struct frag {
41 int start, end, len;
43 int start, end, len;
42 char *data;
44 char *data;
43 };
45 };
44
46
45 struct flist {
47 struct flist {
46 struct frag *base, *head, *tail;
48 struct frag *base, *head, *tail;
47 };
49 };
48
50
49 static struct flist *lalloc(int size)
51 static struct flist *lalloc(int size)
50 {
52 {
51 struct flist *a = NULL;
53 struct flist *a = NULL;
52
54
53 a = malloc(sizeof(struct flist));
55 a = malloc(sizeof(struct flist));
54 if (a) {
56 if (a) {
55 a->base = malloc(sizeof(struct frag) * size);
57 a->base = malloc(sizeof(struct frag) * size);
56 if (!a->base) {
58 if (!a->base) {
57 free(a);
59 free(a);
58 a = NULL;
60 a = NULL;
59 } else
61 } else
60 a->head = a->tail = a->base;
62 a->head = a->tail = a->base;
61 }
63 }
62 return a;
64 return a;
63 }
65 }
64
66
65 static void lfree(struct flist *a)
67 static void lfree(struct flist *a)
66 {
68 {
67 if (a) {
69 if (a) {
68 free(a->base);
70 free(a->base);
69 free(a);
71 free(a);
70 }
72 }
71 }
73 }
72
74
73 static int lsize(struct flist *a)
75 static int lsize(struct flist *a)
74 {
76 {
75 return a->tail - a->head;
77 return a->tail - a->head;
76 }
78 }
77
79
78 /* move hunks in source that are less cut to dest, compensating
80 /* move hunks in source that are less cut to dest, compensating
79 for changes in offset. the last hunk may be split if necessary.
81 for changes in offset. the last hunk may be split if necessary.
80 */
82 */
81 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
83 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
82 {
84 {
83 struct frag *d = dest->tail, *s = src->head;
85 struct frag *d = dest->tail, *s = src->head;
84 int postend, c, l;
86 int postend, c, l;
85
87
86 while (s != src->tail) {
88 while (s != src->tail) {
87 if (s->start + offset >= cut)
89 if (s->start + offset >= cut)
88 break; /* we've gone far enough */
90 break; /* we've gone far enough */
89
91
90 postend = offset + s->start + s->len;
92 postend = offset + s->start + s->len;
91 if (postend <= cut) {
93 if (postend <= cut) {
92 /* save this hunk */
94 /* save this hunk */
93 offset += s->start + s->len - s->end;
95 offset += s->start + s->len - s->end;
94 *d++ = *s++;
96 *d++ = *s++;
95 }
97 }
96 else {
98 else {
97 /* break up this hunk */
99 /* break up this hunk */
98 c = cut - offset;
100 c = cut - offset;
99 if (s->end < c)
101 if (s->end < c)
100 c = s->end;
102 c = s->end;
101 l = cut - offset - s->start;
103 l = cut - offset - s->start;
102 if (s->len < l)
104 if (s->len < l)
103 l = s->len;
105 l = s->len;
104
106
105 offset += s->start + l - c;
107 offset += s->start + l - c;
106
108
107 d->start = s->start;
109 d->start = s->start;
108 d->end = c;
110 d->end = c;
109 d->len = l;
111 d->len = l;
110 d->data = s->data;
112 d->data = s->data;
111 d++;
113 d++;
112 s->start = c;
114 s->start = c;
113 s->len = s->len - l;
115 s->len = s->len - l;
114 s->data = s->data + l;
116 s->data = s->data + l;
115
117
116 break;
118 break;
117 }
119 }
118 }
120 }
119
121
120 dest->tail = d;
122 dest->tail = d;
121 src->head = s;
123 src->head = s;
122 return offset;
124 return offset;
123 }
125 }
124
126
125 /* like gather, but with no output list */
127 /* like gather, but with no output list */
126 static int discard(struct flist *src, int cut, int offset)
128 static int discard(struct flist *src, int cut, int offset)
127 {
129 {
128 struct frag *s = src->head;
130 struct frag *s = src->head;
129 int postend, c, l;
131 int postend, c, l;
130
132
131 while (s != src->tail) {
133 while (s != src->tail) {
132 if (s->start + offset >= cut)
134 if (s->start + offset >= cut)
133 break;
135 break;
134
136
135 postend = offset + s->start + s->len;
137 postend = offset + s->start + s->len;
136 if (postend <= cut) {
138 if (postend <= cut) {
137 offset += s->start + s->len - s->end;
139 offset += s->start + s->len - s->end;
138 s++;
140 s++;
139 }
141 }
140 else {
142 else {
141 c = cut - offset;
143 c = cut - offset;
142 if (s->end < c)
144 if (s->end < c)
143 c = s->end;
145 c = s->end;
144 l = cut - offset - s->start;
146 l = cut - offset - s->start;
145 if (s->len < l)
147 if (s->len < l)
146 l = s->len;
148 l = s->len;
147
149
148 offset += s->start + l - c;
150 offset += s->start + l - c;
149 s->start = c;
151 s->start = c;
150 s->len = s->len - l;
152 s->len = s->len - l;
151 s->data = s->data + l;
153 s->data = s->data + l;
152
154
153 break;
155 break;
154 }
156 }
155 }
157 }
156
158
157 src->head = s;
159 src->head = s;
158 return offset;
160 return offset;
159 }
161 }
160
162
161 /* combine hunk lists a and b, while adjusting b for offset changes in a/
163 /* combine hunk lists a and b, while adjusting b for offset changes in a/
162 this deletes a and b and returns the resultant list. */
164 this deletes a and b and returns the resultant list. */
163 static struct flist *combine(struct flist *a, struct flist *b)
165 static struct flist *combine(struct flist *a, struct flist *b)
164 {
166 {
165 struct flist *c = NULL;
167 struct flist *c = NULL;
166 struct frag *bh, *ct;
168 struct frag *bh, *ct;
167 int offset = 0, post;
169 int offset = 0, post;
168
170
169 if (a && b)
171 if (a && b)
170 c = lalloc((lsize(a) + lsize(b)) * 2);
172 c = lalloc((lsize(a) + lsize(b)) * 2);
171
173
172 if (c) {
174 if (c) {
173
175
174 for (bh = b->head; bh != b->tail; bh++) {
176 for (bh = b->head; bh != b->tail; bh++) {
175 /* save old hunks */
177 /* save old hunks */
176 offset = gather(c, a, bh->start, offset);
178 offset = gather(c, a, bh->start, offset);
177
179
178 /* discard replaced hunks */
180 /* discard replaced hunks */
179 post = discard(a, bh->end, offset);
181 post = discard(a, bh->end, offset);
180
182
181 /* insert new hunk */
183 /* insert new hunk */
182 ct = c->tail;
184 ct = c->tail;
183 ct->start = bh->start - offset;
185 ct->start = bh->start - offset;
184 ct->end = bh->end - post;
186 ct->end = bh->end - post;
185 ct->len = bh->len;
187 ct->len = bh->len;
186 ct->data = bh->data;
188 ct->data = bh->data;
187 c->tail++;
189 c->tail++;
188 offset = post;
190 offset = post;
189 }
191 }
190
192
191 /* hold on to tail from a */
193 /* hold on to tail from a */
192 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
194 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
193 c->tail += lsize(a);
195 c->tail += lsize(a);
194 }
196 }
195
197
196 lfree(a);
198 lfree(a);
197 lfree(b);
199 lfree(b);
198 return c;
200 return c;
199 }
201 }
200
202
201 /* decode a binary patch into a hunk list */
203 /* decode a binary patch into a hunk list */
202 static struct flist *decode(char *bin, int len)
204 static struct flist *decode(char *bin, int len)
203 {
205 {
204 struct flist *l;
206 struct flist *l;
205 struct frag *lt;
207 struct frag *lt;
206 char *end = bin + len;
208 char *end = bin + len;
207 char decode[12]; /* for dealing with alignment issues */
209 char decode[12]; /* for dealing with alignment issues */
208
210
209 /* assume worst case size, we won't have many of these lists */
211 /* assume worst case size, we won't have many of these lists */
210 l = lalloc(len / 12);
212 l = lalloc(len / 12);
211 lt = l->tail;
213 lt = l->tail;
212
214
213 while (bin < end) {
215 while (bin < end) {
214 memcpy(decode, bin, 12);
216 memcpy(decode, bin, 12);
215 lt->start = ntohl(*(uint32_t *)decode);
217 lt->start = ntohl(*(uint32_t *)decode);
216 lt->end = ntohl(*(uint32_t *)(decode + 4));
218 lt->end = ntohl(*(uint32_t *)(decode + 4));
217 lt->len = ntohl(*(uint32_t *)(decode + 8));
219 lt->len = ntohl(*(uint32_t *)(decode + 8));
218 lt->data = bin + 12;
220 lt->data = bin + 12;
219 bin += 12 + lt->len;
221 bin += 12 + lt->len;
220 lt++;
222 lt++;
221 }
223 }
222
224
223 l->tail = lt;
225 l->tail = lt;
224 return l;
226 return l;
225 }
227 }
226
228
227 /* calculate the size of resultant text */
229 /* calculate the size of resultant text */
228 static int calcsize(int len, struct flist *l)
230 static int calcsize(int len, struct flist *l)
229 {
231 {
230 int outlen = 0, last = 0;
232 int outlen = 0, last = 0;
231 struct frag *f = l->head;
233 struct frag *f = l->head;
232
234
233 while (f != l->tail) {
235 while (f != l->tail) {
234 outlen += f->start - last;
236 outlen += f->start - last;
235 last = f->end;
237 last = f->end;
236 outlen += f->len;
238 outlen += f->len;
237 f++;
239 f++;
238 }
240 }
239
241
240 outlen += len - last;
242 outlen += len - last;
241 return outlen;
243 return outlen;
242 }
244 }
243
245
244 static void apply(char *buf, char *orig, int len, struct flist *l)
246 static void apply(char *buf, char *orig, int len, struct flist *l)
245 {
247 {
246 struct frag *f = l->head;
248 struct frag *f = l->head;
247 int last = 0;
249 int last = 0;
248 char *p = buf;
250 char *p = buf;
249
251
250 while (f != l->tail) {
252 while (f != l->tail) {
251 memcpy(p, orig + last, f->start - last);
253 memcpy(p, orig + last, f->start - last);
252 p += f->start - last;
254 p += f->start - last;
253 memcpy(p, f->data, f->len);
255 memcpy(p, f->data, f->len);
254 last = f->end;
256 last = f->end;
255 p += f->len;
257 p += f->len;
256 f++;
258 f++;
257 }
259 }
258 memcpy(p, orig + last, len - last);
260 memcpy(p, orig + last, len - last);
259 }
261 }
260
262
261 /* recursively generate a patch of all bins between start and end */
263 /* recursively generate a patch of all bins between start and end */
262 static struct flist *fold(PyObject *bins, int start, int end)
264 static struct flist *fold(PyObject *bins, int start, int end)
263 {
265 {
264 int len;
266 int len;
265
267
266 if (start + 1 == end) {
268 if (start + 1 == end) {
267 /* trivial case, output a decoded list */
269 /* trivial case, output a decoded list */
268 PyObject *tmp = PyList_GetItem(bins, start);
270 PyObject *tmp = PyList_GetItem(bins, start);
269 if (!tmp)
271 if (!tmp)
270 return NULL;
272 return NULL;
271 return decode(PyString_AsString(tmp), PyString_Size(tmp));
273 return decode(PyString_AsString(tmp), PyString_Size(tmp));
272 }
274 }
273
275
274 /* divide and conquer, memory management is elsewhere */
276 /* divide and conquer, memory management is elsewhere */
275 len = (end - start) / 2;
277 len = (end - start) / 2;
276 return combine(fold(bins, start, start + len),
278 return combine(fold(bins, start, start + len),
277 fold(bins, start + len, end));
279 fold(bins, start + len, end));
278 }
280 }
279
281
280 static PyObject *
282 static PyObject *
281 patches(PyObject *self, PyObject *args)
283 patches(PyObject *self, PyObject *args)
282 {
284 {
283 PyObject *text, *bins, *result;
285 PyObject *text, *bins, *result;
284 struct flist *patch;
286 struct flist *patch;
285 char *in, *out;
287 char *in, *out;
286 int len, outlen;
288 int len, outlen;
287
289
288 if (!PyArg_ParseTuple(args, "SO:mpatch", &text, &bins))
290 if (!PyArg_ParseTuple(args, "SO:mpatch", &text, &bins))
289 return NULL;
291 return NULL;
290
292
291 len = PyList_Size(bins);
293 len = PyList_Size(bins);
292 if (!len) {
294 if (!len) {
293 /* nothing to do */
295 /* nothing to do */
294 Py_INCREF(text);
296 Py_INCREF(text);
295 return text;
297 return text;
296 }
298 }
297
299
298 patch = fold(bins, 0, len);
300 patch = fold(bins, 0, len);
299 if (!patch)
301 if (!patch)
300 return PyErr_NoMemory();
302 return PyErr_NoMemory();
301
303
302 outlen = calcsize(PyString_Size(text), patch);
304 outlen = calcsize(PyString_Size(text), patch);
303 result = PyString_FromStringAndSize(NULL, outlen);
305 result = PyString_FromStringAndSize(NULL, outlen);
304 if (result) {
306 if (result) {
305 in = PyString_AsString(text);
307 in = PyString_AsString(text);
306 out = PyString_AsString(result);
308 out = PyString_AsString(result);
307 apply(out, in, PyString_Size(text), patch);
309 apply(out, in, PyString_Size(text), patch);
308 }
310 }
309
311
310 lfree(patch);
312 lfree(patch);
311 return result;
313 return result;
312 }
314 }
313
315
314 static PyMethodDef methods[] = {
316 static PyMethodDef methods[] = {
315 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
317 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
316 {NULL, NULL}
318 {NULL, NULL}
317 };
319 };
318
320
319 PyMODINIT_FUNC
321 PyMODINIT_FUNC
320 initmpatch(void)
322 initmpatch(void)
321 {
323 {
322 Py_InitModule3("mpatch", methods, mpatch_doc);
324 Py_InitModule3("mpatch", methods, mpatch_doc);
323 }
325 }
324
326
General Comments 0
You need to be logged in to leave comments. Login now