##// END OF EJS Templates
mac os x: fixes for 10.2 from chris monson <monpublic@gmail.com>
Vadim Gelfer -
r2468:1ac0574f default
parent child Browse files
Show More
@@ -1,361 +1,362
1 /*
1 /*
2 bdiff.c - efficient binary diff extension for Mercurial
2 bdiff.c - efficient binary diff extension for Mercurial
3
3
4 Copyright 2005 Matt Mackall <mpm@selenic.com>
4 Copyright 2005 Matt Mackall <mpm@selenic.com>
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8
8
9 Based roughly on Python difflib
9 Based roughly on Python difflib
10 */
10 */
11
11
12 #include <Python.h>
12 #include <Python.h>
13 #include <stdint.h>
13 #include <stdlib.h>
14 #include <stdlib.h>
14 #include <string.h>
15 #include <string.h>
15
16
16 #ifdef __hpux
17 #ifdef __hpux
17 #define inline
18 #define inline
18 #endif
19 #endif
19
20
20 #ifdef __SUNPRO_C
21 #ifdef __SUNPRO_C
21 # define inline
22 # define inline
22 #endif
23 #endif
23
24
24 #ifdef _WIN32
25 #ifdef _WIN32
25 #ifdef _MSC_VER
26 #ifdef _MSC_VER
26 #define inline __inline
27 #define inline __inline
27 typedef unsigned long uint32_t;
28 typedef unsigned long uint32_t;
28 #else
29 #else
29 #include <stdint.h>
30 #include <stdint.h>
30 #endif
31 #endif
31 static uint32_t htonl(uint32_t x)
32 static uint32_t htonl(uint32_t x)
32 {
33 {
33 return ((x & 0x000000ffUL) << 24) |
34 return ((x & 0x000000ffUL) << 24) |
34 ((x & 0x0000ff00UL) << 8) |
35 ((x & 0x0000ff00UL) << 8) |
35 ((x & 0x00ff0000UL) >> 8) |
36 ((x & 0x00ff0000UL) >> 8) |
36 ((x & 0xff000000UL) >> 24);
37 ((x & 0xff000000UL) >> 24);
37 }
38 }
38 #else
39 #else
39 #include <sys/types.h>
40 #include <sys/types.h>
40 #include <arpa/inet.h>
41 #include <arpa/inet.h>
41 #endif
42 #endif
42
43
43 struct line {
44 struct line {
44 int h, len, n, e;
45 int h, len, n, e;
45 const char *l;
46 const char *l;
46 };
47 };
47
48
48 struct pos {
49 struct pos {
49 int pos, len;
50 int pos, len;
50 };
51 };
51
52
52 struct hunk {
53 struct hunk {
53 int a1, a2, b1, b2;
54 int a1, a2, b1, b2;
54 };
55 };
55
56
56 struct hunklist {
57 struct hunklist {
57 struct hunk *base, *head;
58 struct hunk *base, *head;
58 };
59 };
59
60
60 static inline uint32_t rol32(uint32_t word, unsigned int shift)
61 static inline uint32_t rol32(uint32_t word, unsigned int shift)
61 {
62 {
62 return (word << shift) | (word >> (32 - shift));
63 return (word << shift) | (word >> (32 - shift));
63 }
64 }
64
65
65 int splitlines(const char *a, int len, struct line **lr)
66 int splitlines(const char *a, int len, struct line **lr)
66 {
67 {
67 int h, i;
68 int h, i;
68 const char *p, *b = a;
69 const char *p, *b = a;
69 struct line *l;
70 struct line *l;
70
71
71 /* count the lines */
72 /* count the lines */
72 i = 1; /* extra line for sentinel */
73 i = 1; /* extra line for sentinel */
73 for (p = a; p < a + len; p++)
74 for (p = a; p < a + len; p++)
74 if (*p == '\n' || p == a + len - 1)
75 if (*p == '\n' || p == a + len - 1)
75 i++;
76 i++;
76
77
77 *lr = l = (struct line *)malloc(sizeof(struct line) * i);
78 *lr = l = (struct line *)malloc(sizeof(struct line) * i);
78 if (!l)
79 if (!l)
79 return -1;
80 return -1;
80
81
81 /* build the line array and calculate hashes */
82 /* build the line array and calculate hashes */
82 h = 0;
83 h = 0;
83 for (p = a; p < a + len; p++) {
84 for (p = a; p < a + len; p++) {
84 h = *p + rol32(h, 7); /* a simple hash from GNU diff */
85 h = *p + rol32(h, 7); /* a simple hash from GNU diff */
85 if (*p == '\n' || p == a + len - 1) {
86 if (*p == '\n' || p == a + len - 1) {
86 l->len = p - b + 1;
87 l->len = p - b + 1;
87 l->h = h * l->len;
88 l->h = h * l->len;
88 l->l = b;
89 l->l = b;
89 l->n = -1;
90 l->n = -1;
90 l++;
91 l++;
91 b = p + 1;
92 b = p + 1;
92 h = 0;
93 h = 0;
93 }
94 }
94 }
95 }
95
96
96 /* set up a sentinel */
97 /* set up a sentinel */
97 l->h = l->len = 0;
98 l->h = l->len = 0;
98 l->l = a + len;
99 l->l = a + len;
99 return i - 1;
100 return i - 1;
100 }
101 }
101
102
102 int inline cmp(struct line *a, struct line *b)
103 int inline cmp(struct line *a, struct line *b)
103 {
104 {
104 return a->h != b->h || a->len != b->len || memcmp(a->l, b->l, a->len);
105 return a->h != b->h || a->len != b->len || memcmp(a->l, b->l, a->len);
105 }
106 }
106
107
107 static int equatelines(struct line *a, int an, struct line *b, int bn)
108 static int equatelines(struct line *a, int an, struct line *b, int bn)
108 {
109 {
109 int i, j, buckets = 1, t;
110 int i, j, buckets = 1, t;
110 struct pos *h;
111 struct pos *h;
111
112
112 /* build a hash table of the next highest power of 2 */
113 /* build a hash table of the next highest power of 2 */
113 while (buckets < bn + 1)
114 while (buckets < bn + 1)
114 buckets *= 2;
115 buckets *= 2;
115
116
116 h = (struct pos *)malloc(buckets * sizeof(struct pos));
117 h = (struct pos *)malloc(buckets * sizeof(struct pos));
117 buckets = buckets - 1;
118 buckets = buckets - 1;
118 if (!h)
119 if (!h)
119 return 0;
120 return 0;
120
121
121 /* clear the hash table */
122 /* clear the hash table */
122 for (i = 0; i <= buckets; i++) {
123 for (i = 0; i <= buckets; i++) {
123 h[i].pos = -1;
124 h[i].pos = -1;
124 h[i].len = 0;
125 h[i].len = 0;
125 }
126 }
126
127
127 /* add lines to the hash table chains */
128 /* add lines to the hash table chains */
128 for (i = bn - 1; i >= 0; i--) {
129 for (i = bn - 1; i >= 0; i--) {
129 /* find the equivalence class */
130 /* find the equivalence class */
130 for (j = b[i].h & buckets; h[j].pos != -1;
131 for (j = b[i].h & buckets; h[j].pos != -1;
131 j = (j + 1) & buckets)
132 j = (j + 1) & buckets)
132 if (!cmp(b + i, b + h[j].pos))
133 if (!cmp(b + i, b + h[j].pos))
133 break;
134 break;
134
135
135 /* add to the head of the equivalence class */
136 /* add to the head of the equivalence class */
136 b[i].n = h[j].pos;
137 b[i].n = h[j].pos;
137 b[i].e = j;
138 b[i].e = j;
138 h[j].pos = i;
139 h[j].pos = i;
139 h[j].len++; /* keep track of popularity */
140 h[j].len++; /* keep track of popularity */
140 }
141 }
141
142
142 /* compute popularity threshold */
143 /* compute popularity threshold */
143 t = (bn >= 200) ? bn / 100 : bn + 1;
144 t = (bn >= 200) ? bn / 100 : bn + 1;
144
145
145 /* match items in a to their equivalence class in b */
146 /* match items in a to their equivalence class in b */
146 for (i = 0; i < an; i++) {
147 for (i = 0; i < an; i++) {
147 /* find the equivalence class */
148 /* find the equivalence class */
148 for (j = a[i].h & buckets; h[j].pos != -1;
149 for (j = a[i].h & buckets; h[j].pos != -1;
149 j = (j + 1) & buckets)
150 j = (j + 1) & buckets)
150 if (!cmp(a + i, b + h[j].pos))
151 if (!cmp(a + i, b + h[j].pos))
151 break;
152 break;
152
153
153 a[i].e = j; /* use equivalence class for quick compare */
154 a[i].e = j; /* use equivalence class for quick compare */
154 if (h[j].len <= t)
155 if (h[j].len <= t)
155 a[i].n = h[j].pos; /* point to head of match list */
156 a[i].n = h[j].pos; /* point to head of match list */
156 else
157 else
157 a[i].n = -1; /* too popular */
158 a[i].n = -1; /* too popular */
158 }
159 }
159
160
160 /* discard hash tables */
161 /* discard hash tables */
161 free(h);
162 free(h);
162 return 1;
163 return 1;
163 }
164 }
164
165
165 static int longest_match(struct line *a, struct line *b, struct pos *pos,
166 static int longest_match(struct line *a, struct line *b, struct pos *pos,
166 int a1, int a2, int b1, int b2, int *omi, int *omj)
167 int a1, int a2, int b1, int b2, int *omi, int *omj)
167 {
168 {
168 int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;
169 int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;
169
170
170 for (i = a1; i < a2; i++) {
171 for (i = a1; i < a2; i++) {
171 /* skip things before the current block */
172 /* skip things before the current block */
172 for (j = a[i].n; j != -1 && j < b1; j = b[j].n)
173 for (j = a[i].n; j != -1 && j < b1; j = b[j].n)
173 ;
174 ;
174
175
175 /* loop through all lines match a[i] in b */
176 /* loop through all lines match a[i] in b */
176 for (; j != -1 && j < b2; j = b[j].n) {
177 for (; j != -1 && j < b2; j = b[j].n) {
177 /* does this extend an earlier match? */
178 /* does this extend an earlier match? */
178 if (i > a1 && j > b1 && pos[j - 1].pos == i - 1)
179 if (i > a1 && j > b1 && pos[j - 1].pos == i - 1)
179 k = pos[j - 1].len + 1;
180 k = pos[j - 1].len + 1;
180 else
181 else
181 k = 1;
182 k = 1;
182 pos[j].pos = i;
183 pos[j].pos = i;
183 pos[j].len = k;
184 pos[j].len = k;
184
185
185 /* best match so far? */
186 /* best match so far? */
186 if (k > mk) {
187 if (k > mk) {
187 mi = i;
188 mi = i;
188 mj = j;
189 mj = j;
189 mk = k;
190 mk = k;
190 }
191 }
191 }
192 }
192 }
193 }
193
194
194 if (mk) {
195 if (mk) {
195 mi = mi - mk + 1;
196 mi = mi - mk + 1;
196 mj = mj - mk + 1;
197 mj = mj - mk + 1;
197 }
198 }
198
199
199 /* expand match to include neighboring popular lines */
200 /* expand match to include neighboring popular lines */
200 while (mi - mb > a1 && mj - mb > b1 &&
201 while (mi - mb > a1 && mj - mb > b1 &&
201 a[mi - mb - 1].e == b[mj - mb - 1].e)
202 a[mi - mb - 1].e == b[mj - mb - 1].e)
202 mb++;
203 mb++;
203 while (mi + mk < a2 && mj + mk < b2 &&
204 while (mi + mk < a2 && mj + mk < b2 &&
204 a[mi + mk].e == b[mj + mk].e)
205 a[mi + mk].e == b[mj + mk].e)
205 mk++;
206 mk++;
206
207
207 *omi = mi - mb;
208 *omi = mi - mb;
208 *omj = mj - mb;
209 *omj = mj - mb;
209 return mk + mb;
210 return mk + mb;
210 }
211 }
211
212
212 static void recurse(struct line *a, struct line *b, struct pos *pos,
213 static void recurse(struct line *a, struct line *b, struct pos *pos,
213 int a1, int a2, int b1, int b2, struct hunklist *l)
214 int a1, int a2, int b1, int b2, struct hunklist *l)
214 {
215 {
215 int i, j, k;
216 int i, j, k;
216
217
217 /* find the longest match in this chunk */
218 /* find the longest match in this chunk */
218 k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
219 k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
219 if (!k)
220 if (!k)
220 return;
221 return;
221
222
222 /* and recurse on the remaining chunks on either side */
223 /* and recurse on the remaining chunks on either side */
223 recurse(a, b, pos, a1, i, b1, j, l);
224 recurse(a, b, pos, a1, i, b1, j, l);
224 l->head->a1 = i;
225 l->head->a1 = i;
225 l->head->a2 = i + k;
226 l->head->a2 = i + k;
226 l->head->b1 = j;
227 l->head->b1 = j;
227 l->head->b2 = j + k;
228 l->head->b2 = j + k;
228 l->head++;
229 l->head++;
229 recurse(a, b, pos, i + k, a2, j + k, b2, l);
230 recurse(a, b, pos, i + k, a2, j + k, b2, l);
230 }
231 }
231
232
232 static struct hunklist diff(struct line *a, int an, struct line *b, int bn)
233 static struct hunklist diff(struct line *a, int an, struct line *b, int bn)
233 {
234 {
234 struct hunklist l;
235 struct hunklist l;
235 struct pos *pos;
236 struct pos *pos;
236 int t;
237 int t;
237
238
238 /* allocate and fill arrays */
239 /* allocate and fill arrays */
239 t = equatelines(a, an, b, bn);
240 t = equatelines(a, an, b, bn);
240 pos = (struct pos *)calloc(bn, sizeof(struct pos));
241 pos = (struct pos *)calloc(bn, sizeof(struct pos));
241 /* we can't have more matches than lines in the shorter file */
242 /* we can't have more matches than lines in the shorter file */
242 l.head = l.base = (struct hunk *)malloc(sizeof(struct hunk) *
243 l.head = l.base = (struct hunk *)malloc(sizeof(struct hunk) *
243 ((an<bn ? an:bn) + 1));
244 ((an<bn ? an:bn) + 1));
244
245
245 if (pos && l.base && t) {
246 if (pos && l.base && t) {
246 /* generate the matching block list */
247 /* generate the matching block list */
247 recurse(a, b, pos, 0, an, 0, bn, &l);
248 recurse(a, b, pos, 0, an, 0, bn, &l);
248 l.head->a1 = an;
249 l.head->a1 = an;
249 l.head->b1 = bn;
250 l.head->b1 = bn;
250 l.head++;
251 l.head++;
251 }
252 }
252
253
253 free(pos);
254 free(pos);
254 return l;
255 return l;
255 }
256 }
256
257
257 static PyObject *blocks(PyObject *self, PyObject *args)
258 static PyObject *blocks(PyObject *self, PyObject *args)
258 {
259 {
259 PyObject *sa, *sb, *rl = NULL, *m;
260 PyObject *sa, *sb, *rl = NULL, *m;
260 struct line *a, *b;
261 struct line *a, *b;
261 struct hunklist l = {NULL, NULL};
262 struct hunklist l = {NULL, NULL};
262 struct hunk *h;
263 struct hunk *h;
263 int an, bn, pos = 0;
264 int an, bn, pos = 0;
264
265
265 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
266 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
266 return NULL;
267 return NULL;
267
268
268 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &a);
269 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &a);
269 bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &b);
270 bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &b);
270 if (!a || !b)
271 if (!a || !b)
271 goto nomem;
272 goto nomem;
272
273
273 l = diff(a, an, b, bn);
274 l = diff(a, an, b, bn);
274 rl = PyList_New(l.head - l.base);
275 rl = PyList_New(l.head - l.base);
275 if (!l.head || !rl)
276 if (!l.head || !rl)
276 goto nomem;
277 goto nomem;
277
278
278 for (h = l.base; h != l.head; h++) {
279 for (h = l.base; h != l.head; h++) {
279 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
280 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
280 PyList_SetItem(rl, pos, m);
281 PyList_SetItem(rl, pos, m);
281 pos++;
282 pos++;
282 }
283 }
283
284
284 nomem:
285 nomem:
285 free(a);
286 free(a);
286 free(b);
287 free(b);
287 free(l.base);
288 free(l.base);
288 return rl ? rl : PyErr_NoMemory();
289 return rl ? rl : PyErr_NoMemory();
289 }
290 }
290
291
291 static PyObject *bdiff(PyObject *self, PyObject *args)
292 static PyObject *bdiff(PyObject *self, PyObject *args)
292 {
293 {
293 PyObject *sa, *sb, *result = NULL;
294 PyObject *sa, *sb, *result = NULL;
294 struct line *al, *bl;
295 struct line *al, *bl;
295 struct hunklist l = {NULL, NULL};
296 struct hunklist l = {NULL, NULL};
296 struct hunk *h;
297 struct hunk *h;
297 char encode[12], *rb;
298 char encode[12], *rb;
298 int an, bn, len = 0, la = 0, lb = 0;
299 int an, bn, len = 0, la = 0, lb = 0;
299
300
300 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
301 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
301 return NULL;
302 return NULL;
302
303
303 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &al);
304 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &al);
304 bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &bl);
305 bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &bl);
305 if (!al || !bl)
306 if (!al || !bl)
306 goto nomem;
307 goto nomem;
307
308
308 l = diff(al, an, bl, bn);
309 l = diff(al, an, bl, bn);
309 if (!l.head)
310 if (!l.head)
310 goto nomem;
311 goto nomem;
311
312
312 /* calculate length of output */
313 /* calculate length of output */
313 for (h = l.base; h != l.head; h++) {
314 for (h = l.base; h != l.head; h++) {
314 if (h->a1 != la || h->b1 != lb)
315 if (h->a1 != la || h->b1 != lb)
315 len += 12 + bl[h->b1].l - bl[lb].l;
316 len += 12 + bl[h->b1].l - bl[lb].l;
316 la = h->a2;
317 la = h->a2;
317 lb = h->b2;
318 lb = h->b2;
318 }
319 }
319
320
320 result = PyString_FromStringAndSize(NULL, len);
321 result = PyString_FromStringAndSize(NULL, len);
321 if (!result)
322 if (!result)
322 goto nomem;
323 goto nomem;
323
324
324 /* build binary patch */
325 /* build binary patch */
325 rb = PyString_AsString(result);
326 rb = PyString_AsString(result);
326 la = lb = 0;
327 la = lb = 0;
327
328
328 for (h = l.base; h != l.head; h++) {
329 for (h = l.base; h != l.head; h++) {
329 if (h->a1 != la || h->b1 != lb) {
330 if (h->a1 != la || h->b1 != lb) {
330 len = bl[h->b1].l - bl[lb].l;
331 len = bl[h->b1].l - bl[lb].l;
331 *(uint32_t *)(encode) = htonl(al[la].l - al->l);
332 *(uint32_t *)(encode) = htonl(al[la].l - al->l);
332 *(uint32_t *)(encode + 4) = htonl(al[h->a1].l - al->l);
333 *(uint32_t *)(encode + 4) = htonl(al[h->a1].l - al->l);
333 *(uint32_t *)(encode + 8) = htonl(len);
334 *(uint32_t *)(encode + 8) = htonl(len);
334 memcpy(rb, encode, 12);
335 memcpy(rb, encode, 12);
335 memcpy(rb + 12, bl[lb].l, len);
336 memcpy(rb + 12, bl[lb].l, len);
336 rb += 12 + len;
337 rb += 12 + len;
337 }
338 }
338 la = h->a2;
339 la = h->a2;
339 lb = h->b2;
340 lb = h->b2;
340 }
341 }
341
342
342 nomem:
343 nomem:
343 free(al);
344 free(al);
344 free(bl);
345 free(bl);
345 free(l.base);
346 free(l.base);
346 return result ? result : PyErr_NoMemory();
347 return result ? result : PyErr_NoMemory();
347 }
348 }
348
349
349 static char mdiff_doc[] = "Efficient binary diff.";
350 static char mdiff_doc[] = "Efficient binary diff.";
350
351
351 static PyMethodDef methods[] = {
352 static PyMethodDef methods[] = {
352 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
353 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
353 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
354 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
354 {NULL, NULL}
355 {NULL, NULL}
355 };
356 };
356
357
357 PyMODINIT_FUNC initbdiff(void)
358 PyMODINIT_FUNC initbdiff(void)
358 {
359 {
359 Py_InitModule3("bdiff", methods, mdiff_doc);
360 Py_InitModule3("bdiff", methods, mdiff_doc);
360 }
361 }
361
362
@@ -1,404 +1,408
1 /*
1 /*
2 mpatch.c - efficient binary patching for Mercurial
2 mpatch.c - efficient binary patching for Mercurial
3
3
4 This implements a patch algorithm that's O(m + nlog n) where m is the
4 This implements a patch algorithm that's O(m + nlog n) where m is the
5 size of the output and n is the number of patches.
5 size of the output and n is the number of patches.
6
6
7 Given a list of binary patches, it unpacks each into a hunk list,
7 Given a list of binary patches, it unpacks each into a hunk list,
8 then combines the hunk lists with a treewise recursion to form a
8 then combines the hunk lists with a treewise recursion to form a
9 single hunk list. This hunk list is then applied to the original
9 single hunk list. This hunk list is then applied to the original
10 text.
10 text.
11
11
12 The text (or binary) fragments are copied directly from their source
12 The text (or binary) fragments are copied directly from their source
13 Python objects into a preallocated output string to avoid the
13 Python objects into a preallocated output string to avoid the
14 allocation of intermediate Python objects. Working memory is about 2x
14 allocation of intermediate Python objects. Working memory is about 2x
15 the total number of hunks.
15 the total number of hunks.
16
16
17 Copyright 2005 Matt Mackall <mpm@selenic.com>
17 Copyright 2005 Matt Mackall <mpm@selenic.com>
18
18
19 This software may be used and distributed according to the terms
19 This software may be used and distributed according to the terms
20 of the GNU General Public License, incorporated herein by reference.
20 of the GNU General Public License, incorporated herein by reference.
21 */
21 */
22
22
23 #include <Python.h>
23 #include <Python.h>
24 #include <stdlib.h>
24 #include <stdlib.h>
25 #include <string.h>
25 #include <string.h>
26
26 #ifdef _WIN32
27 #ifdef _WIN32
27 #ifdef _MSC_VER
28 # ifdef _MSC_VER
29 /* msvc 6.0 has problems */
28 #define inline __inline
30 # define inline __inline
29 typedef unsigned long uint32_t;
31 typedef unsigned long uint32_t;
30 #else
32 # else
31 #include <stdint.h>
33 # include <stdint.h>
32 #endif
34 # endif
33 static uint32_t ntohl(uint32_t x)
35 static uint32_t ntohl(uint32_t x)
34 {
36 {
35 return ((x & 0x000000ffUL) << 24) |
37 return ((x & 0x000000ffUL) << 24) |
36 ((x & 0x0000ff00UL) << 8) |
38 ((x & 0x0000ff00UL) << 8) |
37 ((x & 0x00ff0000UL) >> 8) |
39 ((x & 0x00ff0000UL) >> 8) |
38 ((x & 0xff000000UL) >> 24);
40 ((x & 0xff000000UL) >> 24);
39 }
41 }
40 #else
42 #else
43 /* not windows */
41 #include <sys/types.h>
44 # include <sys/types.h>
42 #include <arpa/inet.h>
45 # include <arpa/inet.h>
46 # include <stdint.h>
43 #endif
47 #endif
44
48
45 static char mpatch_doc[] = "Efficient binary patching.";
49 static char mpatch_doc[] = "Efficient binary patching.";
46 static PyObject *mpatch_Error;
50 static PyObject *mpatch_Error;
47
51
48 struct frag {
52 struct frag {
49 int start, end, len;
53 int start, end, len;
50 char *data;
54 char *data;
51 };
55 };
52
56
53 struct flist {
57 struct flist {
54 struct frag *base, *head, *tail;
58 struct frag *base, *head, *tail;
55 };
59 };
56
60
57 static struct flist *lalloc(int size)
61 static struct flist *lalloc(int size)
58 {
62 {
59 struct flist *a = NULL;
63 struct flist *a = NULL;
60
64
61 a = (struct flist *)malloc(sizeof(struct flist));
65 a = (struct flist *)malloc(sizeof(struct flist));
62 if (a) {
66 if (a) {
63 a->base = (struct frag *)malloc(sizeof(struct frag) * size);
67 a->base = (struct frag *)malloc(sizeof(struct frag) * size);
64 if (a->base) {
68 if (a->base) {
65 a->head = a->tail = a->base;
69 a->head = a->tail = a->base;
66 return a;
70 return a;
67 }
71 }
68 free(a);
72 free(a);
69 a = NULL;
73 a = NULL;
70 }
74 }
71 if (!PyErr_Occurred())
75 if (!PyErr_Occurred())
72 PyErr_NoMemory();
76 PyErr_NoMemory();
73 return NULL;
77 return NULL;
74 }
78 }
75
79
76 static void lfree(struct flist *a)
80 static void lfree(struct flist *a)
77 {
81 {
78 if (a) {
82 if (a) {
79 free(a->base);
83 free(a->base);
80 free(a);
84 free(a);
81 }
85 }
82 }
86 }
83
87
84 static int lsize(struct flist *a)
88 static int lsize(struct flist *a)
85 {
89 {
86 return a->tail - a->head;
90 return a->tail - a->head;
87 }
91 }
88
92
89 /* move hunks in source that are less cut to dest, compensating
93 /* move hunks in source that are less cut to dest, compensating
90 for changes in offset. the last hunk may be split if necessary.
94 for changes in offset. the last hunk may be split if necessary.
91 */
95 */
92 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
96 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
93 {
97 {
94 struct frag *d = dest->tail, *s = src->head;
98 struct frag *d = dest->tail, *s = src->head;
95 int postend, c, l;
99 int postend, c, l;
96
100
97 while (s != src->tail) {
101 while (s != src->tail) {
98 if (s->start + offset >= cut)
102 if (s->start + offset >= cut)
99 break; /* we've gone far enough */
103 break; /* we've gone far enough */
100
104
101 postend = offset + s->start + s->len;
105 postend = offset + s->start + s->len;
102 if (postend <= cut) {
106 if (postend <= cut) {
103 /* save this hunk */
107 /* save this hunk */
104 offset += s->start + s->len - s->end;
108 offset += s->start + s->len - s->end;
105 *d++ = *s++;
109 *d++ = *s++;
106 }
110 }
107 else {
111 else {
108 /* break up this hunk */
112 /* break up this hunk */
109 c = cut - offset;
113 c = cut - offset;
110 if (s->end < c)
114 if (s->end < c)
111 c = s->end;
115 c = s->end;
112 l = cut - offset - s->start;
116 l = cut - offset - s->start;
113 if (s->len < l)
117 if (s->len < l)
114 l = s->len;
118 l = s->len;
115
119
116 offset += s->start + l - c;
120 offset += s->start + l - c;
117
121
118 d->start = s->start;
122 d->start = s->start;
119 d->end = c;
123 d->end = c;
120 d->len = l;
124 d->len = l;
121 d->data = s->data;
125 d->data = s->data;
122 d++;
126 d++;
123 s->start = c;
127 s->start = c;
124 s->len = s->len - l;
128 s->len = s->len - l;
125 s->data = s->data + l;
129 s->data = s->data + l;
126
130
127 break;
131 break;
128 }
132 }
129 }
133 }
130
134
131 dest->tail = d;
135 dest->tail = d;
132 src->head = s;
136 src->head = s;
133 return offset;
137 return offset;
134 }
138 }
135
139
136 /* like gather, but with no output list */
140 /* like gather, but with no output list */
137 static int discard(struct flist *src, int cut, int offset)
141 static int discard(struct flist *src, int cut, int offset)
138 {
142 {
139 struct frag *s = src->head;
143 struct frag *s = src->head;
140 int postend, c, l;
144 int postend, c, l;
141
145
142 while (s != src->tail) {
146 while (s != src->tail) {
143 if (s->start + offset >= cut)
147 if (s->start + offset >= cut)
144 break;
148 break;
145
149
146 postend = offset + s->start + s->len;
150 postend = offset + s->start + s->len;
147 if (postend <= cut) {
151 if (postend <= cut) {
148 offset += s->start + s->len - s->end;
152 offset += s->start + s->len - s->end;
149 s++;
153 s++;
150 }
154 }
151 else {
155 else {
152 c = cut - offset;
156 c = cut - offset;
153 if (s->end < c)
157 if (s->end < c)
154 c = s->end;
158 c = s->end;
155 l = cut - offset - s->start;
159 l = cut - offset - s->start;
156 if (s->len < l)
160 if (s->len < l)
157 l = s->len;
161 l = s->len;
158
162
159 offset += s->start + l - c;
163 offset += s->start + l - c;
160 s->start = c;
164 s->start = c;
161 s->len = s->len - l;
165 s->len = s->len - l;
162 s->data = s->data + l;
166 s->data = s->data + l;
163
167
164 break;
168 break;
165 }
169 }
166 }
170 }
167
171
168 src->head = s;
172 src->head = s;
169 return offset;
173 return offset;
170 }
174 }
171
175
172 /* combine hunk lists a and b, while adjusting b for offset changes in a/
176 /* combine hunk lists a and b, while adjusting b for offset changes in a/
173 this deletes a and b and returns the resultant list. */
177 this deletes a and b and returns the resultant list. */
174 static struct flist *combine(struct flist *a, struct flist *b)
178 static struct flist *combine(struct flist *a, struct flist *b)
175 {
179 {
176 struct flist *c = NULL;
180 struct flist *c = NULL;
177 struct frag *bh, *ct;
181 struct frag *bh, *ct;
178 int offset = 0, post;
182 int offset = 0, post;
179
183
180 if (a && b)
184 if (a && b)
181 c = lalloc((lsize(a) + lsize(b)) * 2);
185 c = lalloc((lsize(a) + lsize(b)) * 2);
182
186
183 if (c) {
187 if (c) {
184
188
185 for (bh = b->head; bh != b->tail; bh++) {
189 for (bh = b->head; bh != b->tail; bh++) {
186 /* save old hunks */
190 /* save old hunks */
187 offset = gather(c, a, bh->start, offset);
191 offset = gather(c, a, bh->start, offset);
188
192
189 /* discard replaced hunks */
193 /* discard replaced hunks */
190 post = discard(a, bh->end, offset);
194 post = discard(a, bh->end, offset);
191
195
192 /* insert new hunk */
196 /* insert new hunk */
193 ct = c->tail;
197 ct = c->tail;
194 ct->start = bh->start - offset;
198 ct->start = bh->start - offset;
195 ct->end = bh->end - post;
199 ct->end = bh->end - post;
196 ct->len = bh->len;
200 ct->len = bh->len;
197 ct->data = bh->data;
201 ct->data = bh->data;
198 c->tail++;
202 c->tail++;
199 offset = post;
203 offset = post;
200 }
204 }
201
205
202 /* hold on to tail from a */
206 /* hold on to tail from a */
203 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
207 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
204 c->tail += lsize(a);
208 c->tail += lsize(a);
205 }
209 }
206
210
207 lfree(a);
211 lfree(a);
208 lfree(b);
212 lfree(b);
209 return c;
213 return c;
210 }
214 }
211
215
212 /* decode a binary patch into a hunk list */
216 /* decode a binary patch into a hunk list */
213 static struct flist *decode(char *bin, int len)
217 static struct flist *decode(char *bin, int len)
214 {
218 {
215 struct flist *l;
219 struct flist *l;
216 struct frag *lt;
220 struct frag *lt;
217 char *end = bin + len;
221 char *end = bin + len;
218 char decode[12]; /* for dealing with alignment issues */
222 char decode[12]; /* for dealing with alignment issues */
219
223
220 /* assume worst case size, we won't have many of these lists */
224 /* assume worst case size, we won't have many of these lists */
221 l = lalloc(len / 12);
225 l = lalloc(len / 12);
222 if (!l)
226 if (!l)
223 return NULL;
227 return NULL;
224
228
225 lt = l->tail;
229 lt = l->tail;
226
230
227 while (bin < end) {
231 while (bin < end) {
228 memcpy(decode, bin, 12);
232 memcpy(decode, bin, 12);
229 lt->start = ntohl(*(uint32_t *)decode);
233 lt->start = ntohl(*(uint32_t *)decode);
230 lt->end = ntohl(*(uint32_t *)(decode + 4));
234 lt->end = ntohl(*(uint32_t *)(decode + 4));
231 lt->len = ntohl(*(uint32_t *)(decode + 8));
235 lt->len = ntohl(*(uint32_t *)(decode + 8));
232 lt->data = bin + 12;
236 lt->data = bin + 12;
233 bin += 12 + lt->len;
237 bin += 12 + lt->len;
234 lt++;
238 lt++;
235 }
239 }
236
240
237 if (bin != end) {
241 if (bin != end) {
238 if (!PyErr_Occurred())
242 if (!PyErr_Occurred())
239 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
243 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
240 lfree(l);
244 lfree(l);
241 return NULL;
245 return NULL;
242 }
246 }
243
247
244 l->tail = lt;
248 l->tail = lt;
245 return l;
249 return l;
246 }
250 }
247
251
248 /* calculate the size of resultant text */
252 /* calculate the size of resultant text */
249 static int calcsize(int len, struct flist *l)
253 static int calcsize(int len, struct flist *l)
250 {
254 {
251 int outlen = 0, last = 0;
255 int outlen = 0, last = 0;
252 struct frag *f = l->head;
256 struct frag *f = l->head;
253
257
254 while (f != l->tail) {
258 while (f != l->tail) {
255 if (f->start < last || f->end > len) {
259 if (f->start < last || f->end > len) {
256 if (!PyErr_Occurred())
260 if (!PyErr_Occurred())
257 PyErr_SetString(mpatch_Error,
261 PyErr_SetString(mpatch_Error,
258 "invalid patch");
262 "invalid patch");
259 return -1;
263 return -1;
260 }
264 }
261 outlen += f->start - last;
265 outlen += f->start - last;
262 last = f->end;
266 last = f->end;
263 outlen += f->len;
267 outlen += f->len;
264 f++;
268 f++;
265 }
269 }
266
270
267 outlen += len - last;
271 outlen += len - last;
268 return outlen;
272 return outlen;
269 }
273 }
270
274
271 static int apply(char *buf, char *orig, int len, struct flist *l)
275 static int apply(char *buf, char *orig, int len, struct flist *l)
272 {
276 {
273 struct frag *f = l->head;
277 struct frag *f = l->head;
274 int last = 0;
278 int last = 0;
275 char *p = buf;
279 char *p = buf;
276
280
277 while (f != l->tail) {
281 while (f != l->tail) {
278 if (f->start < last || f->end > len) {
282 if (f->start < last || f->end > len) {
279 if (!PyErr_Occurred())
283 if (!PyErr_Occurred())
280 PyErr_SetString(mpatch_Error,
284 PyErr_SetString(mpatch_Error,
281 "invalid patch");
285 "invalid patch");
282 return 0;
286 return 0;
283 }
287 }
284 memcpy(p, orig + last, f->start - last);
288 memcpy(p, orig + last, f->start - last);
285 p += f->start - last;
289 p += f->start - last;
286 memcpy(p, f->data, f->len);
290 memcpy(p, f->data, f->len);
287 last = f->end;
291 last = f->end;
288 p += f->len;
292 p += f->len;
289 f++;
293 f++;
290 }
294 }
291 memcpy(p, orig + last, len - last);
295 memcpy(p, orig + last, len - last);
292 return 1;
296 return 1;
293 }
297 }
294
298
295 /* recursively generate a patch of all bins between start and end */
299 /* recursively generate a patch of all bins between start and end */
296 static struct flist *fold(PyObject *bins, int start, int end)
300 static struct flist *fold(PyObject *bins, int start, int end)
297 {
301 {
298 int len;
302 int len;
299
303
300 if (start + 1 == end) {
304 if (start + 1 == end) {
301 /* trivial case, output a decoded list */
305 /* trivial case, output a decoded list */
302 PyObject *tmp = PyList_GetItem(bins, start);
306 PyObject *tmp = PyList_GetItem(bins, start);
303 if (!tmp)
307 if (!tmp)
304 return NULL;
308 return NULL;
305 return decode(PyString_AsString(tmp), PyString_Size(tmp));
309 return decode(PyString_AsString(tmp), PyString_Size(tmp));
306 }
310 }
307
311
308 /* divide and conquer, memory management is elsewhere */
312 /* divide and conquer, memory management is elsewhere */
309 len = (end - start) / 2;
313 len = (end - start) / 2;
310 return combine(fold(bins, start, start + len),
314 return combine(fold(bins, start, start + len),
311 fold(bins, start + len, end));
315 fold(bins, start + len, end));
312 }
316 }
313
317
314 static PyObject *
318 static PyObject *
315 patches(PyObject *self, PyObject *args)
319 patches(PyObject *self, PyObject *args)
316 {
320 {
317 PyObject *text, *bins, *result;
321 PyObject *text, *bins, *result;
318 struct flist *patch;
322 struct flist *patch;
319 char *in, *out;
323 char *in, *out;
320 int len, outlen;
324 int len, outlen;
321
325
322 if (!PyArg_ParseTuple(args, "SO:mpatch", &text, &bins))
326 if (!PyArg_ParseTuple(args, "SO:mpatch", &text, &bins))
323 return NULL;
327 return NULL;
324
328
325 len = PyList_Size(bins);
329 len = PyList_Size(bins);
326 if (!len) {
330 if (!len) {
327 /* nothing to do */
331 /* nothing to do */
328 Py_INCREF(text);
332 Py_INCREF(text);
329 return text;
333 return text;
330 }
334 }
331
335
332 patch = fold(bins, 0, len);
336 patch = fold(bins, 0, len);
333 if (!patch)
337 if (!patch)
334 return NULL;
338 return NULL;
335
339
336 outlen = calcsize(PyString_Size(text), patch);
340 outlen = calcsize(PyString_Size(text), patch);
337 if (outlen < 0) {
341 if (outlen < 0) {
338 result = NULL;
342 result = NULL;
339 goto cleanup;
343 goto cleanup;
340 }
344 }
341 result = PyString_FromStringAndSize(NULL, outlen);
345 result = PyString_FromStringAndSize(NULL, outlen);
342 if (!result) {
346 if (!result) {
343 result = NULL;
347 result = NULL;
344 goto cleanup;
348 goto cleanup;
345 }
349 }
346 in = PyString_AsString(text);
350 in = PyString_AsString(text);
347 out = PyString_AsString(result);
351 out = PyString_AsString(result);
348 if (!apply(out, in, PyString_Size(text), patch)) {
352 if (!apply(out, in, PyString_Size(text), patch)) {
349 Py_DECREF(result);
353 Py_DECREF(result);
350 result = NULL;
354 result = NULL;
351 }
355 }
352 cleanup:
356 cleanup:
353 lfree(patch);
357 lfree(patch);
354 return result;
358 return result;
355 }
359 }
356
360
357 /* calculate size of a patched file directly */
361 /* calculate size of a patched file directly */
358 static PyObject *
362 static PyObject *
359 patchedsize(PyObject *self, PyObject *args)
363 patchedsize(PyObject *self, PyObject *args)
360 {
364 {
361 long orig, start, end, len, outlen = 0, last = 0;
365 long orig, start, end, len, outlen = 0, last = 0;
362 int patchlen;
366 int patchlen;
363 char *bin, *binend;
367 char *bin, *binend;
364 char decode[12]; /* for dealing with alignment issues */
368 char decode[12]; /* for dealing with alignment issues */
365
369
366 if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen))
370 if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen))
367 return NULL;
371 return NULL;
368
372
369 binend = bin + patchlen;
373 binend = bin + patchlen;
370
374
371 while (bin < binend) {
375 while (bin < binend) {
372 memcpy(decode, bin, 12);
376 memcpy(decode, bin, 12);
373 start = ntohl(*(uint32_t *)decode);
377 start = ntohl(*(uint32_t *)decode);
374 end = ntohl(*(uint32_t *)(decode + 4));
378 end = ntohl(*(uint32_t *)(decode + 4));
375 len = ntohl(*(uint32_t *)(decode + 8));
379 len = ntohl(*(uint32_t *)(decode + 8));
376 bin += 12 + len;
380 bin += 12 + len;
377 outlen += start - last;
381 outlen += start - last;
378 last = end;
382 last = end;
379 outlen += len;
383 outlen += len;
380 }
384 }
381
385
382 if (bin != binend) {
386 if (bin != binend) {
383 if (!PyErr_Occurred())
387 if (!PyErr_Occurred())
384 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
388 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
385 return NULL;
389 return NULL;
386 }
390 }
387
391
388 outlen += orig - last;
392 outlen += orig - last;
389 return Py_BuildValue("l", outlen);
393 return Py_BuildValue("l", outlen);
390 }
394 }
391
395
392 static PyMethodDef methods[] = {
396 static PyMethodDef methods[] = {
393 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
397 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
394 {"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"},
398 {"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"},
395 {NULL, NULL}
399 {NULL, NULL}
396 };
400 };
397
401
398 PyMODINIT_FUNC
402 PyMODINIT_FUNC
399 initmpatch(void)
403 initmpatch(void)
400 {
404 {
401 Py_InitModule3("mpatch", methods, mpatch_doc);
405 Py_InitModule3("mpatch", methods, mpatch_doc);
402 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
406 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
403 }
407 }
404
408
General Comments 0
You need to be logged in to leave comments. Login now