##// END OF EJS Templates
Add 'other OS' bits to bdiff.c / style cleanups...
mpm@selenic.com -
r411:9e9f7ab4 default
parent child Browse files
Show More
@@ -1,279 +1,293
1 /*
1 /*
2 bdiff.c - efficient binary diff extension for Mercurial
2 bdiff.c - efficient binary diff extension for Mercurial
3
3
4 Copyright 2005 Matt Mackall <mpm@selenic.com>
4 Copyright 2005 Matt Mackall <mpm@selenic.com>
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8
8
9 Based roughly on Python difflib
9 Based roughly on Python difflib
10 */
10 */
11
11
12 #include <Python.h>
12 #include <Python.h>
13 #include <stdlib.h>
13 #include <stdlib.h>
14 #include <string.h>
14 #include <string.h>
15 #ifdef _WIN32
16
17 typedef unsigned long uint32_t;
18
19 static uint32_t htonl(uint32_t x)
20 {
21 return ((x & 0x000000ffUL) << 24) |
22 ((x & 0x0000ff00UL) << 8) |
23 ((x & 0x00ff0000UL) >> 8) |
24 ((x & 0xff000000UL) >> 24);
25 }
26
27 #else
15 #include <netinet/in.h>
28 #include <netinet/in.h>
16 #include <sys/types.h>
29 #include <sys/types.h>
30 #endif
17
31
18 struct line {
32 struct line {
19 int h, len, n;
33 int h, len, n;
20 const char *l;
34 const char *l;
21 };
35 };
22
36
23 struct hunk {
37 struct hunk {
24 int a1, a2, b1, b2;
38 int a1, a2, b1, b2;
25 };
39 };
26
40
27 struct hunklist {
41 struct hunklist {
28 struct hunk *base, *head;
42 struct hunk *base, *head;
29 };
43 };
30
44
31 static inline uint32_t rol32(uint32_t word, unsigned int shift)
45 static inline uint32_t rol32(uint32_t word, unsigned int shift)
32 {
46 {
33 return (word << shift) | (word >> (32 - shift));
47 return (word << shift) | (word >> (32 - shift));
34 }
48 }
35
49
36 int splitlines(const char *a, int len, struct line **lr)
50 int splitlines(const char *a, int len, struct line **lr)
37 {
51 {
38 int h, i;
52 int h, i;
39 const char *p, *b = a;
53 const char *p, *b = a;
40 struct line *l;
54 struct line *l;
41
55
42 /* count the lines */
56 /* count the lines */
43 i = 1; /* extra line for sentinel */
57 i = 1; /* extra line for sentinel */
44 for (p = a; p < a + len; p++)
58 for (p = a; p < a + len; p++)
45 if (*p == '\n' || p == a + len - 1)
59 if (*p == '\n' || p == a + len - 1)
46 i++;
60 i++;
47
61
48 *lr = l = malloc(sizeof(struct line) * i);
62 *lr = l = malloc(sizeof(struct line) * i);
49 if (!l)
63 if (!l)
50 return -1;
64 return -1;
51
65
52 /* build the line array and calculate hashes */
66 /* build the line array and calculate hashes */
53 h = 0;
67 h = 0;
54 for (p = a; p < a + len; p++) {
68 for (p = a; p < a + len; p++) {
55 h = *p + rol32(h, 7); /* a simple hash from GNU diff */
69 h = *p + rol32(h, 7); /* a simple hash from GNU diff */
56 if (*p == '\n' || p == a + len - 1) {
70 if (*p == '\n' || p == a + len - 1) {
57 l->len = p - b + 1;
71 l->len = p - b + 1;
58 l->h = h;
72 l->h = h;
59 l->l = b;
73 l->l = b;
60 l->n = -1;
74 l->n = -1;
61 l++;
75 l++;
62 b = p + 1;
76 b = p + 1;
63 h = 0;
77 h = 0;
64 }
78 }
65 }
79 }
66
80
67 /* set up a sentinel */
81 /* set up a sentinel */
68 l->h = l->len = 0;
82 l->h = l->len = 0;
69 l->l = a + len;
83 l->l = a + len;
70 return i - 1;
84 return i - 1;
71 }
85 }
72
86
73 int inline cmp(struct line *a, struct line *b)
87 int inline cmp(struct line *a, struct line *b)
74 {
88 {
75 return a->len != b->len || memcmp(a->l, b->l, a->len);
89 return a->len != b->len || memcmp(a->l, b->l, a->len);
76 }
90 }
77
91
78 static int equatelines(struct line *a, int an, struct line *b, int bn)
92 static int equatelines(struct line *a, int an, struct line *b, int bn)
79 {
93 {
80 int i, j, buckets = 1, t, *h, *l;
94 int i, j, buckets = 1, t, *h, *l;
81
95
82 /* build a hash table of the next highest power of 2 */
96 /* build a hash table of the next highest power of 2 */
83 while (buckets < bn + 1)
97 while (buckets < bn + 1)
84 buckets *= 2;
98 buckets *= 2;
85
99
86 h = malloc(buckets * sizeof(int));
100 h = malloc(buckets * sizeof(int));
87 l = calloc(buckets, sizeof(int));
101 l = calloc(buckets, sizeof(int));
88 buckets = buckets - 1;
102 buckets = buckets - 1;
89 if (!h || !l) {
103 if (!h || !l) {
90 free(h);
104 free(h);
91 return 0;
105 return 0;
92 }
106 }
93
107
94 /* clear the hash table */
108 /* clear the hash table */
95 for (i = 0; i <= buckets; i++)
109 for (i = 0; i <= buckets; i++)
96 h[i] = -1;
110 h[i] = -1;
97
111
98 /* add lines to the hash table chains */
112 /* add lines to the hash table chains */
99 for (i = bn - 1; i >= 0; i--) {
113 for (i = bn - 1; i >= 0; i--) {
100 /* find the equivalence class */
114 /* find the equivalence class */
101 for (j = b[i].h & buckets; h[j] != -1; j = (j + 1) & buckets)
115 for (j = b[i].h & buckets; h[j] != -1; j = (j + 1) & buckets)
102 if (!cmp(b + i, b + h[j]))
116 if (!cmp(b + i, b + h[j]))
103 break;
117 break;
104
118
105 /* add to the head of the equivalence class */
119 /* add to the head of the equivalence class */
106 b[i].n = h[j];
120 b[i].n = h[j];
107 b[i].h = j;
121 b[i].h = j;
108 h[j] = i;
122 h[j] = i;
109 l[j]++; /* keep track of popularity */
123 l[j]++; /* keep track of popularity */
110 }
124 }
111
125
112 /* compute popularity threshold */
126 /* compute popularity threshold */
113 t = (bn >= 200) ? bn / 100 : bn + 1;
127 t = (bn >= 200) ? bn / 100 : bn + 1;
114
128
115 /* match items in a to their equivalence class in b */
129 /* match items in a to their equivalence class in b */
116 for (i = 0; i < an; i++) {
130 for (i = 0; i < an; i++) {
117 /* find the equivalence class */
131 /* find the equivalence class */
118 for (j = a[i].h & buckets; h[j] != -1; j = (j + 1) & buckets)
132 for (j = a[i].h & buckets; h[j] != -1; j = (j + 1) & buckets)
119 if (!cmp(a + i, b + h[j]))
133 if (!cmp(a + i, b + h[j]))
120 break;
134 break;
121
135
122 a[i].h = j; /* use equivalence class for quick compare */
136 a[i].h = j; /* use equivalence class for quick compare */
123 if(l[j] <= t)
137 if(l[j] <= t)
124 a[i].n = h[j]; /* point to head of match list */
138 a[i].n = h[j]; /* point to head of match list */
125 else
139 else
126 a[i].n = -1; /* too popular */
140 a[i].n = -1; /* too popular */
127 }
141 }
128
142
129 /* discard hash tables */
143 /* discard hash tables */
130 free(h);
144 free(h);
131 free(l);
145 free(l);
132 return 1;
146 return 1;
133 }
147 }
134
148
135 static int longest_match(struct line *a, struct line *b, int *jpos, int *jlen,
149 static int longest_match(struct line *a, struct line *b, int *jpos, int *jlen,
136 int a1, int a2, int b1, int b2, int *omi, int *omj)
150 int a1, int a2, int b1, int b2, int *omi, int *omj)
137 {
151 {
138 int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;
152 int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;
139
153
140 for (i = a1; i < a2; i++) {
154 for (i = a1; i < a2; i++) {
141 /* skip things before the current block */
155 /* skip things before the current block */
142 for (j = a[i].n; j != -1 && j < b1; j = b[j].n)
156 for (j = a[i].n; j != -1 && j < b1; j = b[j].n)
143 ;
157 ;
144
158
145 /* loop through all lines match a[i] in b */
159 /* loop through all lines match a[i] in b */
146 for (; j != -1 && j < b2; j = b[j].n) {
160 for (; j != -1 && j < b2; j = b[j].n) {
147 /* does this extend an earlier match? */
161 /* does this extend an earlier match? */
148 if (i > a1 && j > b1 && jpos[j - 1] == i)
162 if (i > a1 && j > b1 && jpos[j - 1] == i)
149 k = jlen[j - 1] + 1;
163 k = jlen[j - 1] + 1;
150 else
164 else
151 k = 1;
165 k = 1;
152 jpos[j] = i + 1;
166 jpos[j] = i + 1;
153 jlen[j] = k;
167 jlen[j] = k;
154
168
155 /* best match so far? */
169 /* best match so far? */
156 if (k > mk) {
170 if (k > mk) {
157 mi = i;
171 mi = i;
158 mj = j;
172 mj = j;
159 mk = k;
173 mk = k;
160 }
174 }
161 }
175 }
162 }
176 }
163
177
164 if (mk) {
178 if (mk) {
165 mi = mi - mk + 1;
179 mi = mi - mk + 1;
166 mj = mj - mk + 1;
180 mj = mj - mk + 1;
167 }
181 }
168
182
169 /* expand match to include neighboring popular lines */
183 /* expand match to include neighboring popular lines */
170 while (mi - mb > a1 && mj - mb > b1 &&
184 while (mi - mb > a1 && mj - mb > b1 &&
171 a[mi - mb - 1].h == b[mj - mb - 1].h)
185 a[mi - mb - 1].h == b[mj - mb - 1].h)
172 mb++;
186 mb++;
173 while (mi + mk < a2 && mj + mk < b2 &&
187 while (mi + mk < a2 && mj + mk < b2 &&
174 a[mi + mk].h == b[mj + mk].h)
188 a[mi + mk].h == b[mj + mk].h)
175 mk++;
189 mk++;
176
190
177 *omi = mi - mb;
191 *omi = mi - mb;
178 *omj = mj - mb;
192 *omj = mj - mb;
179 return mk + mb;
193 return mk + mb;
180 }
194 }
181
195
182 static void recurse(struct line *a, struct line *b, int *jpos, int *jlen,
196 static void recurse(struct line *a, struct line *b, int *jpos, int *jlen,
183 int a1, int a2, int b1, int b2, struct hunklist *l)
197 int a1, int a2, int b1, int b2, struct hunklist *l)
184 {
198 {
185 int i, j, k;
199 int i, j, k;
186
200
187 /* find the longest match in this chunk */
201 /* find the longest match in this chunk */
188 k = longest_match(a, b, jpos, jlen, a1, a2, b1, b2, &i, &j);
202 k = longest_match(a, b, jpos, jlen, a1, a2, b1, b2, &i, &j);
189 if (!k)
203 if (!k)
190 return;
204 return;
191
205
192 /* and recurse on the remaining chunks on either side */
206 /* and recurse on the remaining chunks on either side */
193 recurse(a, b, jpos, jlen, a1, i, b1, j, l);
207 recurse(a, b, jpos, jlen, a1, i, b1, j, l);
194 l->head->a1 = i;
208 l->head->a1 = i;
195 l->head->a2 = i + k;
209 l->head->a2 = i + k;
196 l->head->b1 = j;
210 l->head->b1 = j;
197 l->head->b2 = j + k;
211 l->head->b2 = j + k;
198 l->head++;
212 l->head++;
199 recurse(a, b, jpos, jlen, i + k, a2, j + k, b2, l);
213 recurse(a, b, jpos, jlen, i + k, a2, j + k, b2, l);
200 }
214 }
201
215
202 static PyObject *bdiff(PyObject *self, PyObject *args)
216 static PyObject *bdiff(PyObject *self, PyObject *args)
203 {
217 {
204 PyObject *sa, *sb, *result = NULL;
218 PyObject *sa, *sb, *result = NULL;
205 struct hunklist l;
219 struct hunklist l;
206 struct hunk *h;
220 struct hunk *h;
207 struct line *al, *bl;
221 struct line *al, *bl;
208 char encode[12], *rb;
222 char encode[12], *rb;
209 int an, bn, len = 0, t, la = 0, lb = 0, *jpos, *jlen;
223 int an, bn, len = 0, t, la = 0, lb = 0, *jpos, *jlen;
210
224
211 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
225 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
212 return NULL;
226 return NULL;
213
227
214 /* allocate and fill arrays */
228 /* allocate and fill arrays */
215 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &al);
229 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &al);
216 bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &bl);
230 bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &bl);
217 t = equatelines(al, an, bl, bn);
231 t = equatelines(al, an, bl, bn);
218 jpos = calloc(bn, sizeof(int));
232 jpos = calloc(bn, sizeof(int));
219 jlen = calloc(bn, sizeof(int));
233 jlen = calloc(bn, sizeof(int));
220 l.head = l.base = malloc(sizeof(struct hunk) * ((an + bn) / 4 + 2));
234 l.head = l.base = malloc(sizeof(struct hunk) * ((an + bn) / 4 + 2));
221 if (!al || !bl || !jpos || !jlen || !l.base || !t)
235 if (!al || !bl || !jpos || !jlen || !l.base || !t)
222 goto nomem;
236 goto nomem;
223
237
224 /* generate the matching block list */
238 /* generate the matching block list */
225 recurse(al, bl, jpos, jlen, 0, an, 0, bn, &l);
239 recurse(al, bl, jpos, jlen, 0, an, 0, bn, &l);
226 l.head->a1 = an;
240 l.head->a1 = an;
227 l.head->b1 = bn;
241 l.head->b1 = bn;
228 l.head++;
242 l.head++;
229
243
230 /* calculate length of output */
244 /* calculate length of output */
231 for(h = l.base; h != l.head; h++) {
245 for(h = l.base; h != l.head; h++) {
232 if (h->a1 != la || h->b1 != lb)
246 if (h->a1 != la || h->b1 != lb)
233 len += 12 + bl[h->b1].l - bl[lb].l;
247 len += 12 + bl[h->b1].l - bl[lb].l;
234 la = h->a2;
248 la = h->a2;
235 lb = h->b2;
249 lb = h->b2;
236 }
250 }
237
251
238 result = PyString_FromStringAndSize(NULL, len);
252 result = PyString_FromStringAndSize(NULL, len);
239 if (!result)
253 if (!result)
240 goto nomem;
254 goto nomem;
241
255
242 /* build binary patch */
256 /* build binary patch */
243 rb = PyString_AsString(result);
257 rb = PyString_AsString(result);
244 la = lb = 0;
258 la = lb = 0;
245
259
246 for(h = l.base; h != l.head; h++) {
260 for(h = l.base; h != l.head; h++) {
247 if (h->a1 != la || h->b1 != lb) {
261 if (h->a1 != la || h->b1 != lb) {
248 len = bl[h->b1].l - bl[lb].l;
262 len = bl[h->b1].l - bl[lb].l;
249 *(uint32_t *)(encode) = htonl(al[la].l - al->l);
263 *(uint32_t *)(encode) = htonl(al[la].l - al->l);
250 *(uint32_t *)(encode + 4) = htonl(al[h->a1].l - al->l);
264 *(uint32_t *)(encode + 4) = htonl(al[h->a1].l - al->l);
251 *(uint32_t *)(encode + 8) = htonl(len);
265 *(uint32_t *)(encode + 8) = htonl(len);
252 memcpy(rb, encode, 12);
266 memcpy(rb, encode, 12);
253 memcpy(rb + 12, bl[lb].l, len);
267 memcpy(rb + 12, bl[lb].l, len);
254 rb += 12 + len;
268 rb += 12 + len;
255 }
269 }
256 la = h->a2;
270 la = h->a2;
257 lb = h->b2;
271 lb = h->b2;
258 }
272 }
259
273
260 nomem:
274 nomem:
261 free(al);
275 free(al);
262 free(bl);
276 free(bl);
263 free(jpos);
277 free(jpos);
264 free(jlen);
278 free(jlen);
265 free(l.base);
279 free(l.base);
266 return result ? result : PyErr_NoMemory();
280 return result ? result : PyErr_NoMemory();
267 }
281 }
268
282
269 static char mdiff_doc[] = "Efficient binary diff.";
283 static char mdiff_doc[] = "Efficient binary diff.";
270
284
271 static PyMethodDef methods[] = {
285 static PyMethodDef methods[] = {
272 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
286 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
273 {NULL, NULL}
287 {NULL, NULL}
274 };
288 };
275
289
276 PyMODINIT_FUNC initbdiff(void)
290 PyMODINIT_FUNC initbdiff(void)
277 {
291 {
278 Py_InitModule3("bdiff", methods, mdiff_doc);
292 Py_InitModule3("bdiff", methods, mdiff_doc);
279 }
293 }
@@ -1,328 +1,329
1 /*
1 /*
2 mpatch.c - efficient binary patching for Mercurial
2 mpatch.c - efficient binary patching for Mercurial
3
3
4 This implements a patch algorithm that's O(m + nlog n) where m is the
4 This implements a patch algorithm that's O(m + nlog n) where m is the
5 size of the output and n is the number of patches.
5 size of the output and n is the number of patches.
6
6
7 Given a list of binary patches, it unpacks each into a hunk list,
7 Given a list of binary patches, it unpacks each into a hunk list,
8 then combines the hunk lists with a treewise recursion to form a
8 then combines the hunk lists with a treewise recursion to form a
9 single hunk list. This hunk list is then applied to the original
9 single hunk list. This hunk list is then applied to the original
10 text.
10 text.
11
11
12 The text (or binary) fragments are copied directly from their source
12 The text (or binary) fragments are copied directly from their source
13 Python objects into a preallocated output string to avoid the
13 Python objects into a preallocated output string to avoid the
14 allocation of intermediate Python objects. Working memory is about 2x
14 allocation of intermediate Python objects. Working memory is about 2x
15 the total number of hunks.
15 the total number of hunks.
16
16
17 Copyright 2005 Matt Mackall <mpm@selenic.com>
17 Copyright 2005 Matt Mackall <mpm@selenic.com>
18
18
19 This software may be used and distributed according to the terms
19 This software may be used and distributed according to the terms
20 of the GNU General Public License, incorporated herein by reference.
20 of the GNU General Public License, incorporated herein by reference.
21 */
21 */
22
22
23 #include <Python.h>
23 #include <Python.h>
24 #include <stdlib.h>
24 #include <stdlib.h>
25 #include <string.h>
25 #include <string.h>
26 #ifdef _WIN32
26 #ifdef _WIN32
27
27
28 typedef unsigned long uint32_t;
28 typedef unsigned long uint32_t;
29
29
30 uint32_t ntohl(uint32_t x) {
30 static uint32_t ntohl(uint32_t x)
31 {
31 return ((x & 0x000000ffUL) << 24) |
32 return ((x & 0x000000ffUL) << 24) |
32 ((x & 0x0000ff00UL) << 8) |
33 ((x & 0x0000ff00UL) << 8) |
33 ((x & 0x00ff0000UL) >> 8) |
34 ((x & 0x00ff0000UL) >> 8) |
34 ((x & 0xff000000UL) >> 24);
35 ((x & 0xff000000UL) >> 24);
35 }
36 }
36
37
37 #else
38 #else
38 #include <netinet/in.h>
39 #include <netinet/in.h>
39 #include <sys/types.h>
40 #include <sys/types.h>
40 #endif
41 #endif
41
42
42 static char mpatch_doc[] = "Efficient binary patching.";
43 static char mpatch_doc[] = "Efficient binary patching.";
43
44
44 struct frag {
45 struct frag {
45 int start, end, len;
46 int start, end, len;
46 char *data;
47 char *data;
47 };
48 };
48
49
49 struct flist {
50 struct flist {
50 struct frag *base, *head, *tail;
51 struct frag *base, *head, *tail;
51 };
52 };
52
53
53 static struct flist *lalloc(int size)
54 static struct flist *lalloc(int size)
54 {
55 {
55 struct flist *a = NULL;
56 struct flist *a = NULL;
56
57
57 a = malloc(sizeof(struct flist));
58 a = malloc(sizeof(struct flist));
58 if (a) {
59 if (a) {
59 a->base = malloc(sizeof(struct frag) * size);
60 a->base = malloc(sizeof(struct frag) * size);
60 if (!a->base) {
61 if (!a->base) {
61 free(a);
62 free(a);
62 a = NULL;
63 a = NULL;
63 } else
64 } else
64 a->head = a->tail = a->base;
65 a->head = a->tail = a->base;
65 }
66 }
66 return a;
67 return a;
67 }
68 }
68
69
69 static void lfree(struct flist *a)
70 static void lfree(struct flist *a)
70 {
71 {
71 if (a) {
72 if (a) {
72 free(a->base);
73 free(a->base);
73 free(a);
74 free(a);
74 }
75 }
75 }
76 }
76
77
77 static int lsize(struct flist *a)
78 static int lsize(struct flist *a)
78 {
79 {
79 return a->tail - a->head;
80 return a->tail - a->head;
80 }
81 }
81
82
82 /* move hunks in source that are less cut to dest, compensating
83 /* move hunks in source that are less cut to dest, compensating
83 for changes in offset. the last hunk may be split if necessary.
84 for changes in offset. the last hunk may be split if necessary.
84 */
85 */
85 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
86 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
86 {
87 {
87 struct frag *d = dest->tail, *s = src->head;
88 struct frag *d = dest->tail, *s = src->head;
88 int postend, c, l;
89 int postend, c, l;
89
90
90 while (s != src->tail) {
91 while (s != src->tail) {
91 if (s->start + offset >= cut)
92 if (s->start + offset >= cut)
92 break; /* we've gone far enough */
93 break; /* we've gone far enough */
93
94
94 postend = offset + s->start + s->len;
95 postend = offset + s->start + s->len;
95 if (postend <= cut) {
96 if (postend <= cut) {
96 /* save this hunk */
97 /* save this hunk */
97 offset += s->start + s->len - s->end;
98 offset += s->start + s->len - s->end;
98 *d++ = *s++;
99 *d++ = *s++;
99 }
100 }
100 else {
101 else {
101 /* break up this hunk */
102 /* break up this hunk */
102 c = cut - offset;
103 c = cut - offset;
103 if (s->end < c)
104 if (s->end < c)
104 c = s->end;
105 c = s->end;
105 l = cut - offset - s->start;
106 l = cut - offset - s->start;
106 if (s->len < l)
107 if (s->len < l)
107 l = s->len;
108 l = s->len;
108
109
109 offset += s->start + l - c;
110 offset += s->start + l - c;
110
111
111 d->start = s->start;
112 d->start = s->start;
112 d->end = c;
113 d->end = c;
113 d->len = l;
114 d->len = l;
114 d->data = s->data;
115 d->data = s->data;
115 d++;
116 d++;
116 s->start = c;
117 s->start = c;
117 s->len = s->len - l;
118 s->len = s->len - l;
118 s->data = s->data + l;
119 s->data = s->data + l;
119
120
120 break;
121 break;
121 }
122 }
122 }
123 }
123
124
124 dest->tail = d;
125 dest->tail = d;
125 src->head = s;
126 src->head = s;
126 return offset;
127 return offset;
127 }
128 }
128
129
129 /* like gather, but with no output list */
130 /* like gather, but with no output list */
130 static int discard(struct flist *src, int cut, int offset)
131 static int discard(struct flist *src, int cut, int offset)
131 {
132 {
132 struct frag *s = src->head;
133 struct frag *s = src->head;
133 int postend, c, l;
134 int postend, c, l;
134
135
135 while (s != src->tail) {
136 while (s != src->tail) {
136 if (s->start + offset >= cut)
137 if (s->start + offset >= cut)
137 break;
138 break;
138
139
139 postend = offset + s->start + s->len;
140 postend = offset + s->start + s->len;
140 if (postend <= cut) {
141 if (postend <= cut) {
141 offset += s->start + s->len - s->end;
142 offset += s->start + s->len - s->end;
142 s++;
143 s++;
143 }
144 }
144 else {
145 else {
145 c = cut - offset;
146 c = cut - offset;
146 if (s->end < c)
147 if (s->end < c)
147 c = s->end;
148 c = s->end;
148 l = cut - offset - s->start;
149 l = cut - offset - s->start;
149 if (s->len < l)
150 if (s->len < l)
150 l = s->len;
151 l = s->len;
151
152
152 offset += s->start + l - c;
153 offset += s->start + l - c;
153 s->start = c;
154 s->start = c;
154 s->len = s->len - l;
155 s->len = s->len - l;
155 s->data = s->data + l;
156 s->data = s->data + l;
156
157
157 break;
158 break;
158 }
159 }
159 }
160 }
160
161
161 src->head = s;
162 src->head = s;
162 return offset;
163 return offset;
163 }
164 }
164
165
165 /* combine hunk lists a and b, while adjusting b for offset changes in a/
166 /* combine hunk lists a and b, while adjusting b for offset changes in a/
166 this deletes a and b and returns the resultant list. */
167 this deletes a and b and returns the resultant list. */
167 static struct flist *combine(struct flist *a, struct flist *b)
168 static struct flist *combine(struct flist *a, struct flist *b)
168 {
169 {
169 struct flist *c = NULL;
170 struct flist *c = NULL;
170 struct frag *bh, *ct;
171 struct frag *bh, *ct;
171 int offset = 0, post;
172 int offset = 0, post;
172
173
173 if (a && b)
174 if (a && b)
174 c = lalloc((lsize(a) + lsize(b)) * 2);
175 c = lalloc((lsize(a) + lsize(b)) * 2);
175
176
176 if (c) {
177 if (c) {
177
178
178 for (bh = b->head; bh != b->tail; bh++) {
179 for (bh = b->head; bh != b->tail; bh++) {
179 /* save old hunks */
180 /* save old hunks */
180 offset = gather(c, a, bh->start, offset);
181 offset = gather(c, a, bh->start, offset);
181
182
182 /* discard replaced hunks */
183 /* discard replaced hunks */
183 post = discard(a, bh->end, offset);
184 post = discard(a, bh->end, offset);
184
185
185 /* insert new hunk */
186 /* insert new hunk */
186 ct = c->tail;
187 ct = c->tail;
187 ct->start = bh->start - offset;
188 ct->start = bh->start - offset;
188 ct->end = bh->end - post;
189 ct->end = bh->end - post;
189 ct->len = bh->len;
190 ct->len = bh->len;
190 ct->data = bh->data;
191 ct->data = bh->data;
191 c->tail++;
192 c->tail++;
192 offset = post;
193 offset = post;
193 }
194 }
194
195
195 /* hold on to tail from a */
196 /* hold on to tail from a */
196 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
197 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
197 c->tail += lsize(a);
198 c->tail += lsize(a);
198 }
199 }
199
200
200 lfree(a);
201 lfree(a);
201 lfree(b);
202 lfree(b);
202 return c;
203 return c;
203 }
204 }
204
205
205 /* decode a binary patch into a hunk list */
206 /* decode a binary patch into a hunk list */
206 static struct flist *decode(char *bin, int len)
207 static struct flist *decode(char *bin, int len)
207 {
208 {
208 struct flist *l;
209 struct flist *l;
209 struct frag *lt;
210 struct frag *lt;
210 char *end = bin + len;
211 char *end = bin + len;
211 char decode[12]; /* for dealing with alignment issues */
212 char decode[12]; /* for dealing with alignment issues */
212
213
213 /* assume worst case size, we won't have many of these lists */
214 /* assume worst case size, we won't have many of these lists */
214 l = lalloc(len / 12);
215 l = lalloc(len / 12);
215 lt = l->tail;
216 lt = l->tail;
216
217
217 while (bin < end) {
218 while (bin < end) {
218 memcpy(decode, bin, 12);
219 memcpy(decode, bin, 12);
219 lt->start = ntohl(*(uint32_t *)decode);
220 lt->start = ntohl(*(uint32_t *)decode);
220 lt->end = ntohl(*(uint32_t *)(decode + 4));
221 lt->end = ntohl(*(uint32_t *)(decode + 4));
221 lt->len = ntohl(*(uint32_t *)(decode + 8));
222 lt->len = ntohl(*(uint32_t *)(decode + 8));
222 lt->data = bin + 12;
223 lt->data = bin + 12;
223 bin += 12 + lt->len;
224 bin += 12 + lt->len;
224 lt++;
225 lt++;
225 }
226 }
226
227
227 l->tail = lt;
228 l->tail = lt;
228 return l;
229 return l;
229 }
230 }
230
231
231 /* calculate the size of resultant text */
232 /* calculate the size of resultant text */
232 static int calcsize(int len, struct flist *l)
233 static int calcsize(int len, struct flist *l)
233 {
234 {
234 int outlen = 0, last = 0;
235 int outlen = 0, last = 0;
235 struct frag *f = l->head;
236 struct frag *f = l->head;
236
237
237 while (f != l->tail) {
238 while (f != l->tail) {
238 outlen += f->start - last;
239 outlen += f->start - last;
239 last = f->end;
240 last = f->end;
240 outlen += f->len;
241 outlen += f->len;
241 f++;
242 f++;
242 }
243 }
243
244
244 outlen += len - last;
245 outlen += len - last;
245 return outlen;
246 return outlen;
246 }
247 }
247
248
248 static void apply(char *buf, char *orig, int len, struct flist *l)
249 static void apply(char *buf, char *orig, int len, struct flist *l)
249 {
250 {
250 struct frag *f = l->head;
251 struct frag *f = l->head;
251 int last = 0;
252 int last = 0;
252 char *p = buf;
253 char *p = buf;
253
254
254 while (f != l->tail) {
255 while (f != l->tail) {
255 memcpy(p, orig + last, f->start - last);
256 memcpy(p, orig + last, f->start - last);
256 p += f->start - last;
257 p += f->start - last;
257 memcpy(p, f->data, f->len);
258 memcpy(p, f->data, f->len);
258 last = f->end;
259 last = f->end;
259 p += f->len;
260 p += f->len;
260 f++;
261 f++;
261 }
262 }
262 memcpy(p, orig + last, len - last);
263 memcpy(p, orig + last, len - last);
263 }
264 }
264
265
265 /* recursively generate a patch of all bins between start and end */
266 /* recursively generate a patch of all bins between start and end */
266 static struct flist *fold(PyObject *bins, int start, int end)
267 static struct flist *fold(PyObject *bins, int start, int end)
267 {
268 {
268 int len;
269 int len;
269
270
270 if (start + 1 == end) {
271 if (start + 1 == end) {
271 /* trivial case, output a decoded list */
272 /* trivial case, output a decoded list */
272 PyObject *tmp = PyList_GetItem(bins, start);
273 PyObject *tmp = PyList_GetItem(bins, start);
273 if (!tmp)
274 if (!tmp)
274 return NULL;
275 return NULL;
275 return decode(PyString_AsString(tmp), PyString_Size(tmp));
276 return decode(PyString_AsString(tmp), PyString_Size(tmp));
276 }
277 }
277
278
278 /* divide and conquer, memory management is elsewhere */
279 /* divide and conquer, memory management is elsewhere */
279 len = (end - start) / 2;
280 len = (end - start) / 2;
280 return combine(fold(bins, start, start + len),
281 return combine(fold(bins, start, start + len),
281 fold(bins, start + len, end));
282 fold(bins, start + len, end));
282 }
283 }
283
284
284 static PyObject *
285 static PyObject *
285 patches(PyObject *self, PyObject *args)
286 patches(PyObject *self, PyObject *args)
286 {
287 {
287 PyObject *text, *bins, *result;
288 PyObject *text, *bins, *result;
288 struct flist *patch;
289 struct flist *patch;
289 char *in, *out;
290 char *in, *out;
290 int len, outlen;
291 int len, outlen;
291
292
292 if (!PyArg_ParseTuple(args, "SO:mpatch", &text, &bins))
293 if (!PyArg_ParseTuple(args, "SO:mpatch", &text, &bins))
293 return NULL;
294 return NULL;
294
295
295 len = PyList_Size(bins);
296 len = PyList_Size(bins);
296 if (!len) {
297 if (!len) {
297 /* nothing to do */
298 /* nothing to do */
298 Py_INCREF(text);
299 Py_INCREF(text);
299 return text;
300 return text;
300 }
301 }
301
302
302 patch = fold(bins, 0, len);
303 patch = fold(bins, 0, len);
303 if (!patch)
304 if (!patch)
304 return PyErr_NoMemory();
305 return PyErr_NoMemory();
305
306
306 outlen = calcsize(PyString_Size(text), patch);
307 outlen = calcsize(PyString_Size(text), patch);
307 result = PyString_FromStringAndSize(NULL, outlen);
308 result = PyString_FromStringAndSize(NULL, outlen);
308 if (result) {
309 if (result) {
309 in = PyString_AsString(text);
310 in = PyString_AsString(text);
310 out = PyString_AsString(result);
311 out = PyString_AsString(result);
311 apply(out, in, PyString_Size(text), patch);
312 apply(out, in, PyString_Size(text), patch);
312 }
313 }
313
314
314 lfree(patch);
315 lfree(patch);
315 return result;
316 return result;
316 }
317 }
317
318
318 static PyMethodDef methods[] = {
319 static PyMethodDef methods[] = {
319 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
320 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
320 {NULL, NULL}
321 {NULL, NULL}
321 };
322 };
322
323
323 PyMODINIT_FUNC
324 PyMODINIT_FUNC
324 initmpatch(void)
325 initmpatch(void)
325 {
326 {
326 Py_InitModule3("mpatch", methods, mpatch_doc);
327 Py_InitModule3("mpatch", methods, mpatch_doc);
327 }
328 }
328
329
General Comments 0
You need to be logged in to leave comments. Login now