##// END OF EJS Templates
allow Mercurial to compile on Haiku
Scott McCreary -
r7036:bfad9865 default
parent child Browse files
Show More
@@ -1,371 +1,371
1 /*
1 /*
2 bdiff.c - efficient binary diff extension for Mercurial
2 bdiff.c - efficient binary diff extension for Mercurial
3
3
4 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8
8
9 Based roughly on Python difflib
9 Based roughly on Python difflib
10 */
10 */
11
11
12 #include <Python.h>
12 #include <Python.h>
13 #include <stdlib.h>
13 #include <stdlib.h>
14 #include <string.h>
14 #include <string.h>
15 #include <limits.h>
15 #include <limits.h>
16
16
17 #if defined __hpux || defined __SUNPRO_C || defined _AIX
17 #if defined __hpux || defined __SUNPRO_C || defined _AIX
18 # define inline
18 # define inline
19 #endif
19 #endif
20
20
21 #ifdef _WIN32
21 #ifdef _WIN32
22 #ifdef _MSC_VER
22 #ifdef _MSC_VER
23 #define inline __inline
23 #define inline __inline
24 typedef unsigned long uint32_t;
24 typedef unsigned long uint32_t;
25 #else
25 #else
26 #include <stdint.h>
26 #include <stdint.h>
27 #endif
27 #endif
28 static uint32_t htonl(uint32_t x)
28 static uint32_t htonl(uint32_t x)
29 {
29 {
30 return ((x & 0x000000ffUL) << 24) |
30 return ((x & 0x000000ffUL) << 24) |
31 ((x & 0x0000ff00UL) << 8) |
31 ((x & 0x0000ff00UL) << 8) |
32 ((x & 0x00ff0000UL) >> 8) |
32 ((x & 0x00ff0000UL) >> 8) |
33 ((x & 0xff000000UL) >> 24);
33 ((x & 0xff000000UL) >> 24);
34 }
34 }
35 #else
35 #else
36 #include <sys/types.h>
36 #include <sys/types.h>
37 #ifdef __BEOS__
37 #if defined __BEOS__ && !defined __HAIKU__
38 #include <ByteOrder.h>
38 #include <ByteOrder.h>
39 #else
39 #else
40 #include <arpa/inet.h>
40 #include <arpa/inet.h>
41 #endif
41 #endif
42 #include <inttypes.h>
42 #include <inttypes.h>
43 #endif
43 #endif
44
44
45 struct line {
45 struct line {
46 int h, len, n, e;
46 int h, len, n, e;
47 const char *l;
47 const char *l;
48 };
48 };
49
49
50 struct pos {
50 struct pos {
51 int pos, len;
51 int pos, len;
52 };
52 };
53
53
54 struct hunk {
54 struct hunk {
55 int a1, a2, b1, b2;
55 int a1, a2, b1, b2;
56 };
56 };
57
57
58 struct hunklist {
58 struct hunklist {
59 struct hunk *base, *head;
59 struct hunk *base, *head;
60 };
60 };
61
61
62 int splitlines(const char *a, int len, struct line **lr)
62 int splitlines(const char *a, int len, struct line **lr)
63 {
63 {
64 int h, i;
64 int h, i;
65 const char *p, *b = a;
65 const char *p, *b = a;
66 const char * const plast = a + len - 1;
66 const char * const plast = a + len - 1;
67 struct line *l;
67 struct line *l;
68
68
69 /* count the lines */
69 /* count the lines */
70 i = 1; /* extra line for sentinel */
70 i = 1; /* extra line for sentinel */
71 for (p = a; p < a + len; p++)
71 for (p = a; p < a + len; p++)
72 if (*p == '\n' || p == plast)
72 if (*p == '\n' || p == plast)
73 i++;
73 i++;
74
74
75 *lr = l = (struct line *)malloc(sizeof(struct line) * i);
75 *lr = l = (struct line *)malloc(sizeof(struct line) * i);
76 if (!l)
76 if (!l)
77 return -1;
77 return -1;
78
78
79 /* build the line array and calculate hashes */
79 /* build the line array and calculate hashes */
80 h = 0;
80 h = 0;
81 for (p = a; p < a + len; p++) {
81 for (p = a; p < a + len; p++) {
82 /* Leonid Yuriev's hash */
82 /* Leonid Yuriev's hash */
83 h = (h * 1664525) + *p + 1013904223;
83 h = (h * 1664525) + *p + 1013904223;
84
84
85 if (*p == '\n' || p == plast) {
85 if (*p == '\n' || p == plast) {
86 l->h = h;
86 l->h = h;
87 h = 0;
87 h = 0;
88 l->len = p - b + 1;
88 l->len = p - b + 1;
89 l->l = b;
89 l->l = b;
90 l->n = INT_MAX;
90 l->n = INT_MAX;
91 l++;
91 l++;
92 b = p + 1;
92 b = p + 1;
93 }
93 }
94 }
94 }
95
95
96 /* set up a sentinel */
96 /* set up a sentinel */
97 l->h = l->len = 0;
97 l->h = l->len = 0;
98 l->l = a + len;
98 l->l = a + len;
99 return i - 1;
99 return i - 1;
100 }
100 }
101
101
102 int inline cmp(struct line *a, struct line *b)
102 int inline cmp(struct line *a, struct line *b)
103 {
103 {
104 return a->h != b->h || a->len != b->len || memcmp(a->l, b->l, a->len);
104 return a->h != b->h || a->len != b->len || memcmp(a->l, b->l, a->len);
105 }
105 }
106
106
107 static int equatelines(struct line *a, int an, struct line *b, int bn)
107 static int equatelines(struct line *a, int an, struct line *b, int bn)
108 {
108 {
109 int i, j, buckets = 1, t, scale;
109 int i, j, buckets = 1, t, scale;
110 struct pos *h = NULL;
110 struct pos *h = NULL;
111
111
112 /* build a hash table of the next highest power of 2 */
112 /* build a hash table of the next highest power of 2 */
113 while (buckets < bn + 1)
113 while (buckets < bn + 1)
114 buckets *= 2;
114 buckets *= 2;
115
115
116 /* try to allocate a large hash table to avoid collisions */
116 /* try to allocate a large hash table to avoid collisions */
117 for (scale = 4; scale; scale /= 2) {
117 for (scale = 4; scale; scale /= 2) {
118 h = (struct pos *)malloc(scale * buckets * sizeof(struct pos));
118 h = (struct pos *)malloc(scale * buckets * sizeof(struct pos));
119 if (h)
119 if (h)
120 break;
120 break;
121 }
121 }
122
122
123 if (!h)
123 if (!h)
124 return 0;
124 return 0;
125
125
126 buckets = buckets * scale - 1;
126 buckets = buckets * scale - 1;
127
127
128 /* clear the hash table */
128 /* clear the hash table */
129 for (i = 0; i <= buckets; i++) {
129 for (i = 0; i <= buckets; i++) {
130 h[i].pos = INT_MAX;
130 h[i].pos = INT_MAX;
131 h[i].len = 0;
131 h[i].len = 0;
132 }
132 }
133
133
134 /* add lines to the hash table chains */
134 /* add lines to the hash table chains */
135 for (i = bn - 1; i >= 0; i--) {
135 for (i = bn - 1; i >= 0; i--) {
136 /* find the equivalence class */
136 /* find the equivalence class */
137 for (j = b[i].h & buckets; h[j].pos != INT_MAX;
137 for (j = b[i].h & buckets; h[j].pos != INT_MAX;
138 j = (j + 1) & buckets)
138 j = (j + 1) & buckets)
139 if (!cmp(b + i, b + h[j].pos))
139 if (!cmp(b + i, b + h[j].pos))
140 break;
140 break;
141
141
142 /* add to the head of the equivalence class */
142 /* add to the head of the equivalence class */
143 b[i].n = h[j].pos;
143 b[i].n = h[j].pos;
144 b[i].e = j;
144 b[i].e = j;
145 h[j].pos = i;
145 h[j].pos = i;
146 h[j].len++; /* keep track of popularity */
146 h[j].len++; /* keep track of popularity */
147 }
147 }
148
148
149 /* compute popularity threshold */
149 /* compute popularity threshold */
150 t = (bn >= 4000) ? bn / 1000 : bn + 1;
150 t = (bn >= 4000) ? bn / 1000 : bn + 1;
151
151
152 /* match items in a to their equivalence class in b */
152 /* match items in a to their equivalence class in b */
153 for (i = 0; i < an; i++) {
153 for (i = 0; i < an; i++) {
154 /* find the equivalence class */
154 /* find the equivalence class */
155 for (j = a[i].h & buckets; h[j].pos != INT_MAX;
155 for (j = a[i].h & buckets; h[j].pos != INT_MAX;
156 j = (j + 1) & buckets)
156 j = (j + 1) & buckets)
157 if (!cmp(a + i, b + h[j].pos))
157 if (!cmp(a + i, b + h[j].pos))
158 break;
158 break;
159
159
160 a[i].e = j; /* use equivalence class for quick compare */
160 a[i].e = j; /* use equivalence class for quick compare */
161 if (h[j].len <= t)
161 if (h[j].len <= t)
162 a[i].n = h[j].pos; /* point to head of match list */
162 a[i].n = h[j].pos; /* point to head of match list */
163 else
163 else
164 a[i].n = INT_MAX; /* too popular */
164 a[i].n = INT_MAX; /* too popular */
165 }
165 }
166
166
167 /* discard hash tables */
167 /* discard hash tables */
168 free(h);
168 free(h);
169 return 1;
169 return 1;
170 }
170 }
171
171
172 static int longest_match(struct line *a, struct line *b, struct pos *pos,
172 static int longest_match(struct line *a, struct line *b, struct pos *pos,
173 int a1, int a2, int b1, int b2, int *omi, int *omj)
173 int a1, int a2, int b1, int b2, int *omi, int *omj)
174 {
174 {
175 int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;
175 int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;
176
176
177 for (i = a1; i < a2; i++) {
177 for (i = a1; i < a2; i++) {
178 /* skip things before the current block */
178 /* skip things before the current block */
179 for (j = a[i].n; j < b1; j = b[j].n)
179 for (j = a[i].n; j < b1; j = b[j].n)
180 ;
180 ;
181
181
182 /* loop through all lines match a[i] in b */
182 /* loop through all lines match a[i] in b */
183 for (; j < b2; j = b[j].n) {
183 for (; j < b2; j = b[j].n) {
184 /* does this extend an earlier match? */
184 /* does this extend an earlier match? */
185 if (i > a1 && j > b1 && pos[j - 1].pos == i - 1)
185 if (i > a1 && j > b1 && pos[j - 1].pos == i - 1)
186 k = pos[j - 1].len + 1;
186 k = pos[j - 1].len + 1;
187 else
187 else
188 k = 1;
188 k = 1;
189 pos[j].pos = i;
189 pos[j].pos = i;
190 pos[j].len = k;
190 pos[j].len = k;
191
191
192 /* best match so far? */
192 /* best match so far? */
193 if (k > mk) {
193 if (k > mk) {
194 mi = i;
194 mi = i;
195 mj = j;
195 mj = j;
196 mk = k;
196 mk = k;
197 }
197 }
198 }
198 }
199 }
199 }
200
200
201 if (mk) {
201 if (mk) {
202 mi = mi - mk + 1;
202 mi = mi - mk + 1;
203 mj = mj - mk + 1;
203 mj = mj - mk + 1;
204 }
204 }
205
205
206 /* expand match to include neighboring popular lines */
206 /* expand match to include neighboring popular lines */
207 while (mi - mb > a1 && mj - mb > b1 &&
207 while (mi - mb > a1 && mj - mb > b1 &&
208 a[mi - mb - 1].e == b[mj - mb - 1].e)
208 a[mi - mb - 1].e == b[mj - mb - 1].e)
209 mb++;
209 mb++;
210 while (mi + mk < a2 && mj + mk < b2 &&
210 while (mi + mk < a2 && mj + mk < b2 &&
211 a[mi + mk].e == b[mj + mk].e)
211 a[mi + mk].e == b[mj + mk].e)
212 mk++;
212 mk++;
213
213
214 *omi = mi - mb;
214 *omi = mi - mb;
215 *omj = mj - mb;
215 *omj = mj - mb;
216
216
217 return mk + mb;
217 return mk + mb;
218 }
218 }
219
219
220 static void recurse(struct line *a, struct line *b, struct pos *pos,
220 static void recurse(struct line *a, struct line *b, struct pos *pos,
221 int a1, int a2, int b1, int b2, struct hunklist *l)
221 int a1, int a2, int b1, int b2, struct hunklist *l)
222 {
222 {
223 int i, j, k;
223 int i, j, k;
224
224
225 /* find the longest match in this chunk */
225 /* find the longest match in this chunk */
226 k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
226 k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
227 if (!k)
227 if (!k)
228 return;
228 return;
229
229
230 /* and recurse on the remaining chunks on either side */
230 /* and recurse on the remaining chunks on either side */
231 recurse(a, b, pos, a1, i, b1, j, l);
231 recurse(a, b, pos, a1, i, b1, j, l);
232 l->head->a1 = i;
232 l->head->a1 = i;
233 l->head->a2 = i + k;
233 l->head->a2 = i + k;
234 l->head->b1 = j;
234 l->head->b1 = j;
235 l->head->b2 = j + k;
235 l->head->b2 = j + k;
236 l->head++;
236 l->head++;
237 recurse(a, b, pos, i + k, a2, j + k, b2, l);
237 recurse(a, b, pos, i + k, a2, j + k, b2, l);
238 }
238 }
239
239
240 static struct hunklist diff(struct line *a, int an, struct line *b, int bn)
240 static struct hunklist diff(struct line *a, int an, struct line *b, int bn)
241 {
241 {
242 struct hunklist l;
242 struct hunklist l;
243 struct pos *pos;
243 struct pos *pos;
244 int t;
244 int t;
245
245
246 /* allocate and fill arrays */
246 /* allocate and fill arrays */
247 t = equatelines(a, an, b, bn);
247 t = equatelines(a, an, b, bn);
248 pos = (struct pos *)calloc(bn ? bn : 1, sizeof(struct pos));
248 pos = (struct pos *)calloc(bn ? bn : 1, sizeof(struct pos));
249 /* we can't have more matches than lines in the shorter file */
249 /* we can't have more matches than lines in the shorter file */
250 l.head = l.base = (struct hunk *)malloc(sizeof(struct hunk) *
250 l.head = l.base = (struct hunk *)malloc(sizeof(struct hunk) *
251 ((an<bn ? an:bn) + 1));
251 ((an<bn ? an:bn) + 1));
252
252
253 if (pos && l.base && t) {
253 if (pos && l.base && t) {
254 /* generate the matching block list */
254 /* generate the matching block list */
255 recurse(a, b, pos, 0, an, 0, bn, &l);
255 recurse(a, b, pos, 0, an, 0, bn, &l);
256 l.head->a1 = l.head->a2 = an;
256 l.head->a1 = l.head->a2 = an;
257 l.head->b1 = l.head->b2 = bn;
257 l.head->b1 = l.head->b2 = bn;
258 l.head++;
258 l.head++;
259 }
259 }
260
260
261 free(pos);
261 free(pos);
262 return l;
262 return l;
263 }
263 }
264
264
265 static PyObject *blocks(PyObject *self, PyObject *args)
265 static PyObject *blocks(PyObject *self, PyObject *args)
266 {
266 {
267 PyObject *sa, *sb, *rl = NULL, *m;
267 PyObject *sa, *sb, *rl = NULL, *m;
268 struct line *a, *b;
268 struct line *a, *b;
269 struct hunklist l = {NULL, NULL};
269 struct hunklist l = {NULL, NULL};
270 struct hunk *h;
270 struct hunk *h;
271 int an, bn, pos = 0;
271 int an, bn, pos = 0;
272
272
273 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
273 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
274 return NULL;
274 return NULL;
275
275
276 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &a);
276 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &a);
277 bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &b);
277 bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &b);
278 if (!a || !b)
278 if (!a || !b)
279 goto nomem;
279 goto nomem;
280
280
281 l = diff(a, an, b, bn);
281 l = diff(a, an, b, bn);
282 rl = PyList_New(l.head - l.base);
282 rl = PyList_New(l.head - l.base);
283 if (!l.head || !rl)
283 if (!l.head || !rl)
284 goto nomem;
284 goto nomem;
285
285
286 for (h = l.base; h != l.head; h++) {
286 for (h = l.base; h != l.head; h++) {
287 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
287 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
288 PyList_SetItem(rl, pos, m);
288 PyList_SetItem(rl, pos, m);
289 pos++;
289 pos++;
290 }
290 }
291
291
292 nomem:
292 nomem:
293 free(a);
293 free(a);
294 free(b);
294 free(b);
295 free(l.base);
295 free(l.base);
296 return rl ? rl : PyErr_NoMemory();
296 return rl ? rl : PyErr_NoMemory();
297 }
297 }
298
298
299 static PyObject *bdiff(PyObject *self, PyObject *args)
299 static PyObject *bdiff(PyObject *self, PyObject *args)
300 {
300 {
301 char *sa, *sb;
301 char *sa, *sb;
302 PyObject *result = NULL;
302 PyObject *result = NULL;
303 struct line *al, *bl;
303 struct line *al, *bl;
304 struct hunklist l = {NULL, NULL};
304 struct hunklist l = {NULL, NULL};
305 struct hunk *h;
305 struct hunk *h;
306 char encode[12], *rb;
306 char encode[12], *rb;
307 int an, bn, len = 0, la, lb;
307 int an, bn, len = 0, la, lb;
308
308
309 if (!PyArg_ParseTuple(args, "s#s#:bdiff", &sa, &la, &sb, &lb))
309 if (!PyArg_ParseTuple(args, "s#s#:bdiff", &sa, &la, &sb, &lb))
310 return NULL;
310 return NULL;
311
311
312 an = splitlines(sa, la, &al);
312 an = splitlines(sa, la, &al);
313 bn = splitlines(sb, lb, &bl);
313 bn = splitlines(sb, lb, &bl);
314 if (!al || !bl)
314 if (!al || !bl)
315 goto nomem;
315 goto nomem;
316
316
317 l = diff(al, an, bl, bn);
317 l = diff(al, an, bl, bn);
318 if (!l.head)
318 if (!l.head)
319 goto nomem;
319 goto nomem;
320
320
321 /* calculate length of output */
321 /* calculate length of output */
322 la = lb = 0;
322 la = lb = 0;
323 for (h = l.base; h != l.head; h++) {
323 for (h = l.base; h != l.head; h++) {
324 if (h->a1 != la || h->b1 != lb)
324 if (h->a1 != la || h->b1 != lb)
325 len += 12 + bl[h->b1].l - bl[lb].l;
325 len += 12 + bl[h->b1].l - bl[lb].l;
326 la = h->a2;
326 la = h->a2;
327 lb = h->b2;
327 lb = h->b2;
328 }
328 }
329
329
330 result = PyString_FromStringAndSize(NULL, len);
330 result = PyString_FromStringAndSize(NULL, len);
331 if (!result)
331 if (!result)
332 goto nomem;
332 goto nomem;
333
333
334 /* build binary patch */
334 /* build binary patch */
335 rb = PyString_AsString(result);
335 rb = PyString_AsString(result);
336 la = lb = 0;
336 la = lb = 0;
337
337
338 for (h = l.base; h != l.head; h++) {
338 for (h = l.base; h != l.head; h++) {
339 if (h->a1 != la || h->b1 != lb) {
339 if (h->a1 != la || h->b1 != lb) {
340 len = bl[h->b1].l - bl[lb].l;
340 len = bl[h->b1].l - bl[lb].l;
341 *(uint32_t *)(encode) = htonl(al[la].l - al->l);
341 *(uint32_t *)(encode) = htonl(al[la].l - al->l);
342 *(uint32_t *)(encode + 4) = htonl(al[h->a1].l - al->l);
342 *(uint32_t *)(encode + 4) = htonl(al[h->a1].l - al->l);
343 *(uint32_t *)(encode + 8) = htonl(len);
343 *(uint32_t *)(encode + 8) = htonl(len);
344 memcpy(rb, encode, 12);
344 memcpy(rb, encode, 12);
345 memcpy(rb + 12, bl[lb].l, len);
345 memcpy(rb + 12, bl[lb].l, len);
346 rb += 12 + len;
346 rb += 12 + len;
347 }
347 }
348 la = h->a2;
348 la = h->a2;
349 lb = h->b2;
349 lb = h->b2;
350 }
350 }
351
351
352 nomem:
352 nomem:
353 free(al);
353 free(al);
354 free(bl);
354 free(bl);
355 free(l.base);
355 free(l.base);
356 return result ? result : PyErr_NoMemory();
356 return result ? result : PyErr_NoMemory();
357 }
357 }
358
358
359 static char mdiff_doc[] = "Efficient binary diff.";
359 static char mdiff_doc[] = "Efficient binary diff.";
360
360
361 static PyMethodDef methods[] = {
361 static PyMethodDef methods[] = {
362 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
362 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
363 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
363 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
364 {NULL, NULL}
364 {NULL, NULL}
365 };
365 };
366
366
367 PyMODINIT_FUNC initbdiff(void)
367 PyMODINIT_FUNC initbdiff(void)
368 {
368 {
369 Py_InitModule3("bdiff", methods, mdiff_doc);
369 Py_InitModule3("bdiff", methods, mdiff_doc);
370 }
370 }
371
371
@@ -1,444 +1,444
1 /*
1 /*
2 mpatch.c - efficient binary patching for Mercurial
2 mpatch.c - efficient binary patching for Mercurial
3
3
4 This implements a patch algorithm that's O(m + nlog n) where m is the
4 This implements a patch algorithm that's O(m + nlog n) where m is the
5 size of the output and n is the number of patches.
5 size of the output and n is the number of patches.
6
6
7 Given a list of binary patches, it unpacks each into a hunk list,
7 Given a list of binary patches, it unpacks each into a hunk list,
8 then combines the hunk lists with a treewise recursion to form a
8 then combines the hunk lists with a treewise recursion to form a
9 single hunk list. This hunk list is then applied to the original
9 single hunk list. This hunk list is then applied to the original
10 text.
10 text.
11
11
12 The text (or binary) fragments are copied directly from their source
12 The text (or binary) fragments are copied directly from their source
13 Python objects into a preallocated output string to avoid the
13 Python objects into a preallocated output string to avoid the
14 allocation of intermediate Python objects. Working memory is about 2x
14 allocation of intermediate Python objects. Working memory is about 2x
15 the total number of hunks.
15 the total number of hunks.
16
16
17 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
17 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
18
18
19 This software may be used and distributed according to the terms
19 This software may be used and distributed according to the terms
20 of the GNU General Public License, incorporated herein by reference.
20 of the GNU General Public License, incorporated herein by reference.
21 */
21 */
22
22
23 #include <Python.h>
23 #include <Python.h>
24 #include <stdlib.h>
24 #include <stdlib.h>
25 #include <string.h>
25 #include <string.h>
26
26
27 /* Definitions to get compatibility with python 2.4 and earlier which
27 /* Definitions to get compatibility with python 2.4 and earlier which
28 does not have Py_ssize_t. See also PEP 353.
28 does not have Py_ssize_t. See also PEP 353.
29 Note: msvc (8 or earlier) does not have ssize_t, so we use Py_ssize_t.
29 Note: msvc (8 or earlier) does not have ssize_t, so we use Py_ssize_t.
30 */
30 */
31 #if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
31 #if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
32 typedef int Py_ssize_t;
32 typedef int Py_ssize_t;
33 #define PY_SSIZE_T_MAX INT_MAX
33 #define PY_SSIZE_T_MAX INT_MAX
34 #define PY_SSIZE_T_MIN INT_MIN
34 #define PY_SSIZE_T_MIN INT_MIN
35 #endif
35 #endif
36
36
37 #ifdef _WIN32
37 #ifdef _WIN32
38 # ifdef _MSC_VER
38 # ifdef _MSC_VER
39 /* msvc 6.0 has problems */
39 /* msvc 6.0 has problems */
40 # define inline __inline
40 # define inline __inline
41 typedef unsigned long uint32_t;
41 typedef unsigned long uint32_t;
42 # else
42 # else
43 # include <stdint.h>
43 # include <stdint.h>
44 # endif
44 # endif
45 static uint32_t ntohl(uint32_t x)
45 static uint32_t ntohl(uint32_t x)
46 {
46 {
47 return ((x & 0x000000ffUL) << 24) |
47 return ((x & 0x000000ffUL) << 24) |
48 ((x & 0x0000ff00UL) << 8) |
48 ((x & 0x0000ff00UL) << 8) |
49 ((x & 0x00ff0000UL) >> 8) |
49 ((x & 0x00ff0000UL) >> 8) |
50 ((x & 0xff000000UL) >> 24);
50 ((x & 0xff000000UL) >> 24);
51 }
51 }
52 #else
52 #else
53 /* not windows */
53 /* not windows */
54 # include <sys/types.h>
54 # include <sys/types.h>
55 # ifdef __BEOS__
55 # if defined __BEOS__ && !defined __HAIKU__
56 # include <ByteOrder.h>
56 # include <ByteOrder.h>
57 # else
57 # else
58 # include <arpa/inet.h>
58 # include <arpa/inet.h>
59 # endif
59 # endif
60 # include <inttypes.h>
60 # include <inttypes.h>
61 #endif
61 #endif
62
62
63 static char mpatch_doc[] = "Efficient binary patching.";
63 static char mpatch_doc[] = "Efficient binary patching.";
64 static PyObject *mpatch_Error;
64 static PyObject *mpatch_Error;
65
65
66 struct frag {
66 struct frag {
67 int start, end, len;
67 int start, end, len;
68 const char *data;
68 const char *data;
69 };
69 };
70
70
71 struct flist {
71 struct flist {
72 struct frag *base, *head, *tail;
72 struct frag *base, *head, *tail;
73 };
73 };
74
74
75 static struct flist *lalloc(int size)
75 static struct flist *lalloc(int size)
76 {
76 {
77 struct flist *a = NULL;
77 struct flist *a = NULL;
78
78
79 if (size < 1)
79 if (size < 1)
80 size = 1;
80 size = 1;
81
81
82 a = (struct flist *)malloc(sizeof(struct flist));
82 a = (struct flist *)malloc(sizeof(struct flist));
83 if (a) {
83 if (a) {
84 a->base = (struct frag *)malloc(sizeof(struct frag) * size);
84 a->base = (struct frag *)malloc(sizeof(struct frag) * size);
85 if (a->base) {
85 if (a->base) {
86 a->head = a->tail = a->base;
86 a->head = a->tail = a->base;
87 return a;
87 return a;
88 }
88 }
89 free(a);
89 free(a);
90 a = NULL;
90 a = NULL;
91 }
91 }
92 if (!PyErr_Occurred())
92 if (!PyErr_Occurred())
93 PyErr_NoMemory();
93 PyErr_NoMemory();
94 return NULL;
94 return NULL;
95 }
95 }
96
96
97 static void lfree(struct flist *a)
97 static void lfree(struct flist *a)
98 {
98 {
99 if (a) {
99 if (a) {
100 free(a->base);
100 free(a->base);
101 free(a);
101 free(a);
102 }
102 }
103 }
103 }
104
104
105 static int lsize(struct flist *a)
105 static int lsize(struct flist *a)
106 {
106 {
107 return a->tail - a->head;
107 return a->tail - a->head;
108 }
108 }
109
109
110 /* move hunks in source that are less cut to dest, compensating
110 /* move hunks in source that are less cut to dest, compensating
111 for changes in offset. the last hunk may be split if necessary.
111 for changes in offset. the last hunk may be split if necessary.
112 */
112 */
113 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
113 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
114 {
114 {
115 struct frag *d = dest->tail, *s = src->head;
115 struct frag *d = dest->tail, *s = src->head;
116 int postend, c, l;
116 int postend, c, l;
117
117
118 while (s != src->tail) {
118 while (s != src->tail) {
119 if (s->start + offset >= cut)
119 if (s->start + offset >= cut)
120 break; /* we've gone far enough */
120 break; /* we've gone far enough */
121
121
122 postend = offset + s->start + s->len;
122 postend = offset + s->start + s->len;
123 if (postend <= cut) {
123 if (postend <= cut) {
124 /* save this hunk */
124 /* save this hunk */
125 offset += s->start + s->len - s->end;
125 offset += s->start + s->len - s->end;
126 *d++ = *s++;
126 *d++ = *s++;
127 }
127 }
128 else {
128 else {
129 /* break up this hunk */
129 /* break up this hunk */
130 c = cut - offset;
130 c = cut - offset;
131 if (s->end < c)
131 if (s->end < c)
132 c = s->end;
132 c = s->end;
133 l = cut - offset - s->start;
133 l = cut - offset - s->start;
134 if (s->len < l)
134 if (s->len < l)
135 l = s->len;
135 l = s->len;
136
136
137 offset += s->start + l - c;
137 offset += s->start + l - c;
138
138
139 d->start = s->start;
139 d->start = s->start;
140 d->end = c;
140 d->end = c;
141 d->len = l;
141 d->len = l;
142 d->data = s->data;
142 d->data = s->data;
143 d++;
143 d++;
144 s->start = c;
144 s->start = c;
145 s->len = s->len - l;
145 s->len = s->len - l;
146 s->data = s->data + l;
146 s->data = s->data + l;
147
147
148 break;
148 break;
149 }
149 }
150 }
150 }
151
151
152 dest->tail = d;
152 dest->tail = d;
153 src->head = s;
153 src->head = s;
154 return offset;
154 return offset;
155 }
155 }
156
156
157 /* like gather, but with no output list */
157 /* like gather, but with no output list */
158 static int discard(struct flist *src, int cut, int offset)
158 static int discard(struct flist *src, int cut, int offset)
159 {
159 {
160 struct frag *s = src->head;
160 struct frag *s = src->head;
161 int postend, c, l;
161 int postend, c, l;
162
162
163 while (s != src->tail) {
163 while (s != src->tail) {
164 if (s->start + offset >= cut)
164 if (s->start + offset >= cut)
165 break;
165 break;
166
166
167 postend = offset + s->start + s->len;
167 postend = offset + s->start + s->len;
168 if (postend <= cut) {
168 if (postend <= cut) {
169 offset += s->start + s->len - s->end;
169 offset += s->start + s->len - s->end;
170 s++;
170 s++;
171 }
171 }
172 else {
172 else {
173 c = cut - offset;
173 c = cut - offset;
174 if (s->end < c)
174 if (s->end < c)
175 c = s->end;
175 c = s->end;
176 l = cut - offset - s->start;
176 l = cut - offset - s->start;
177 if (s->len < l)
177 if (s->len < l)
178 l = s->len;
178 l = s->len;
179
179
180 offset += s->start + l - c;
180 offset += s->start + l - c;
181 s->start = c;
181 s->start = c;
182 s->len = s->len - l;
182 s->len = s->len - l;
183 s->data = s->data + l;
183 s->data = s->data + l;
184
184
185 break;
185 break;
186 }
186 }
187 }
187 }
188
188
189 src->head = s;
189 src->head = s;
190 return offset;
190 return offset;
191 }
191 }
192
192
193 /* combine hunk lists a and b, while adjusting b for offset changes in a/
193 /* combine hunk lists a and b, while adjusting b for offset changes in a/
194 this deletes a and b and returns the resultant list. */
194 this deletes a and b and returns the resultant list. */
195 static struct flist *combine(struct flist *a, struct flist *b)
195 static struct flist *combine(struct flist *a, struct flist *b)
196 {
196 {
197 struct flist *c = NULL;
197 struct flist *c = NULL;
198 struct frag *bh, *ct;
198 struct frag *bh, *ct;
199 int offset = 0, post;
199 int offset = 0, post;
200
200
201 if (a && b)
201 if (a && b)
202 c = lalloc((lsize(a) + lsize(b)) * 2);
202 c = lalloc((lsize(a) + lsize(b)) * 2);
203
203
204 if (c) {
204 if (c) {
205
205
206 for (bh = b->head; bh != b->tail; bh++) {
206 for (bh = b->head; bh != b->tail; bh++) {
207 /* save old hunks */
207 /* save old hunks */
208 offset = gather(c, a, bh->start, offset);
208 offset = gather(c, a, bh->start, offset);
209
209
210 /* discard replaced hunks */
210 /* discard replaced hunks */
211 post = discard(a, bh->end, offset);
211 post = discard(a, bh->end, offset);
212
212
213 /* insert new hunk */
213 /* insert new hunk */
214 ct = c->tail;
214 ct = c->tail;
215 ct->start = bh->start - offset;
215 ct->start = bh->start - offset;
216 ct->end = bh->end - post;
216 ct->end = bh->end - post;
217 ct->len = bh->len;
217 ct->len = bh->len;
218 ct->data = bh->data;
218 ct->data = bh->data;
219 c->tail++;
219 c->tail++;
220 offset = post;
220 offset = post;
221 }
221 }
222
222
223 /* hold on to tail from a */
223 /* hold on to tail from a */
224 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
224 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
225 c->tail += lsize(a);
225 c->tail += lsize(a);
226 }
226 }
227
227
228 lfree(a);
228 lfree(a);
229 lfree(b);
229 lfree(b);
230 return c;
230 return c;
231 }
231 }
232
232
233 /* decode a binary patch into a hunk list */
233 /* decode a binary patch into a hunk list */
234 static struct flist *decode(const char *bin, int len)
234 static struct flist *decode(const char *bin, int len)
235 {
235 {
236 struct flist *l;
236 struct flist *l;
237 struct frag *lt;
237 struct frag *lt;
238 const char *data = bin + 12, *end = bin + len;
238 const char *data = bin + 12, *end = bin + len;
239 char decode[12]; /* for dealing with alignment issues */
239 char decode[12]; /* for dealing with alignment issues */
240
240
241 /* assume worst case size, we won't have many of these lists */
241 /* assume worst case size, we won't have many of these lists */
242 l = lalloc(len / 12);
242 l = lalloc(len / 12);
243 if (!l)
243 if (!l)
244 return NULL;
244 return NULL;
245
245
246 lt = l->tail;
246 lt = l->tail;
247
247
248 while (data <= end) {
248 while (data <= end) {
249 memcpy(decode, bin, 12);
249 memcpy(decode, bin, 12);
250 lt->start = ntohl(*(uint32_t *)decode);
250 lt->start = ntohl(*(uint32_t *)decode);
251 lt->end = ntohl(*(uint32_t *)(decode + 4));
251 lt->end = ntohl(*(uint32_t *)(decode + 4));
252 lt->len = ntohl(*(uint32_t *)(decode + 8));
252 lt->len = ntohl(*(uint32_t *)(decode + 8));
253 if (lt->start > lt->end)
253 if (lt->start > lt->end)
254 break; /* sanity check */
254 break; /* sanity check */
255 bin = data + lt->len;
255 bin = data + lt->len;
256 if (bin < data)
256 if (bin < data)
257 break; /* big data + big (bogus) len can wrap around */
257 break; /* big data + big (bogus) len can wrap around */
258 lt->data = data;
258 lt->data = data;
259 data = bin + 12;
259 data = bin + 12;
260 lt++;
260 lt++;
261 }
261 }
262
262
263 if (bin != end) {
263 if (bin != end) {
264 if (!PyErr_Occurred())
264 if (!PyErr_Occurred())
265 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
265 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
266 lfree(l);
266 lfree(l);
267 return NULL;
267 return NULL;
268 }
268 }
269
269
270 l->tail = lt;
270 l->tail = lt;
271 return l;
271 return l;
272 }
272 }
273
273
274 /* calculate the size of resultant text */
274 /* calculate the size of resultant text */
275 static int calcsize(int len, struct flist *l)
275 static int calcsize(int len, struct flist *l)
276 {
276 {
277 int outlen = 0, last = 0;
277 int outlen = 0, last = 0;
278 struct frag *f = l->head;
278 struct frag *f = l->head;
279
279
280 while (f != l->tail) {
280 while (f != l->tail) {
281 if (f->start < last || f->end > len) {
281 if (f->start < last || f->end > len) {
282 if (!PyErr_Occurred())
282 if (!PyErr_Occurred())
283 PyErr_SetString(mpatch_Error,
283 PyErr_SetString(mpatch_Error,
284 "invalid patch");
284 "invalid patch");
285 return -1;
285 return -1;
286 }
286 }
287 outlen += f->start - last;
287 outlen += f->start - last;
288 last = f->end;
288 last = f->end;
289 outlen += f->len;
289 outlen += f->len;
290 f++;
290 f++;
291 }
291 }
292
292
293 outlen += len - last;
293 outlen += len - last;
294 return outlen;
294 return outlen;
295 }
295 }
296
296
297 static int apply(char *buf, const char *orig, int len, struct flist *l)
297 static int apply(char *buf, const char *orig, int len, struct flist *l)
298 {
298 {
299 struct frag *f = l->head;
299 struct frag *f = l->head;
300 int last = 0;
300 int last = 0;
301 char *p = buf;
301 char *p = buf;
302
302
303 while (f != l->tail) {
303 while (f != l->tail) {
304 if (f->start < last || f->end > len) {
304 if (f->start < last || f->end > len) {
305 if (!PyErr_Occurred())
305 if (!PyErr_Occurred())
306 PyErr_SetString(mpatch_Error,
306 PyErr_SetString(mpatch_Error,
307 "invalid patch");
307 "invalid patch");
308 return 0;
308 return 0;
309 }
309 }
310 memcpy(p, orig + last, f->start - last);
310 memcpy(p, orig + last, f->start - last);
311 p += f->start - last;
311 p += f->start - last;
312 memcpy(p, f->data, f->len);
312 memcpy(p, f->data, f->len);
313 last = f->end;
313 last = f->end;
314 p += f->len;
314 p += f->len;
315 f++;
315 f++;
316 }
316 }
317 memcpy(p, orig + last, len - last);
317 memcpy(p, orig + last, len - last);
318 return 1;
318 return 1;
319 }
319 }
320
320
321 /* recursively generate a patch of all bins between start and end */
321 /* recursively generate a patch of all bins between start and end */
322 static struct flist *fold(PyObject *bins, int start, int end)
322 static struct flist *fold(PyObject *bins, int start, int end)
323 {
323 {
324 int len;
324 int len;
325 Py_ssize_t blen;
325 Py_ssize_t blen;
326 const char *buffer;
326 const char *buffer;
327
327
328 if (start + 1 == end) {
328 if (start + 1 == end) {
329 /* trivial case, output a decoded list */
329 /* trivial case, output a decoded list */
330 PyObject *tmp = PyList_GetItem(bins, start);
330 PyObject *tmp = PyList_GetItem(bins, start);
331 if (!tmp)
331 if (!tmp)
332 return NULL;
332 return NULL;
333 if (PyObject_AsCharBuffer(tmp, &buffer, &blen))
333 if (PyObject_AsCharBuffer(tmp, &buffer, &blen))
334 return NULL;
334 return NULL;
335 return decode(buffer, blen);
335 return decode(buffer, blen);
336 }
336 }
337
337
338 /* divide and conquer, memory management is elsewhere */
338 /* divide and conquer, memory management is elsewhere */
339 len = (end - start) / 2;
339 len = (end - start) / 2;
340 return combine(fold(bins, start, start + len),
340 return combine(fold(bins, start, start + len),
341 fold(bins, start + len, end));
341 fold(bins, start + len, end));
342 }
342 }
343
343
344 static PyObject *
344 static PyObject *
345 patches(PyObject *self, PyObject *args)
345 patches(PyObject *self, PyObject *args)
346 {
346 {
347 PyObject *text, *bins, *result;
347 PyObject *text, *bins, *result;
348 struct flist *patch;
348 struct flist *patch;
349 const char *in;
349 const char *in;
350 char *out;
350 char *out;
351 int len, outlen;
351 int len, outlen;
352 Py_ssize_t inlen;
352 Py_ssize_t inlen;
353
353
354 if (!PyArg_ParseTuple(args, "OO:mpatch", &text, &bins))
354 if (!PyArg_ParseTuple(args, "OO:mpatch", &text, &bins))
355 return NULL;
355 return NULL;
356
356
357 len = PyList_Size(bins);
357 len = PyList_Size(bins);
358 if (!len) {
358 if (!len) {
359 /* nothing to do */
359 /* nothing to do */
360 Py_INCREF(text);
360 Py_INCREF(text);
361 return text;
361 return text;
362 }
362 }
363
363
364 if (PyObject_AsCharBuffer(text, &in, &inlen))
364 if (PyObject_AsCharBuffer(text, &in, &inlen))
365 return NULL;
365 return NULL;
366
366
367 patch = fold(bins, 0, len);
367 patch = fold(bins, 0, len);
368 if (!patch)
368 if (!patch)
369 return NULL;
369 return NULL;
370
370
371 outlen = calcsize(inlen, patch);
371 outlen = calcsize(inlen, patch);
372 if (outlen < 0) {
372 if (outlen < 0) {
373 result = NULL;
373 result = NULL;
374 goto cleanup;
374 goto cleanup;
375 }
375 }
376 result = PyString_FromStringAndSize(NULL, outlen);
376 result = PyString_FromStringAndSize(NULL, outlen);
377 if (!result) {
377 if (!result) {
378 result = NULL;
378 result = NULL;
379 goto cleanup;
379 goto cleanup;
380 }
380 }
381 out = PyString_AsString(result);
381 out = PyString_AsString(result);
382 if (!apply(out, in, inlen, patch)) {
382 if (!apply(out, in, inlen, patch)) {
383 Py_DECREF(result);
383 Py_DECREF(result);
384 result = NULL;
384 result = NULL;
385 }
385 }
386 cleanup:
386 cleanup:
387 lfree(patch);
387 lfree(patch);
388 return result;
388 return result;
389 }
389 }
390
390
391 /* calculate size of a patched file directly */
391 /* calculate size of a patched file directly */
392 static PyObject *
392 static PyObject *
393 patchedsize(PyObject *self, PyObject *args)
393 patchedsize(PyObject *self, PyObject *args)
394 {
394 {
395 long orig, start, end, len, outlen = 0, last = 0;
395 long orig, start, end, len, outlen = 0, last = 0;
396 int patchlen;
396 int patchlen;
397 char *bin, *binend, *data;
397 char *bin, *binend, *data;
398 char decode[12]; /* for dealing with alignment issues */
398 char decode[12]; /* for dealing with alignment issues */
399
399
400 if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen))
400 if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen))
401 return NULL;
401 return NULL;
402
402
403 binend = bin + patchlen;
403 binend = bin + patchlen;
404 data = bin + 12;
404 data = bin + 12;
405
405
406 while (data <= binend) {
406 while (data <= binend) {
407 memcpy(decode, bin, 12);
407 memcpy(decode, bin, 12);
408 start = ntohl(*(uint32_t *)decode);
408 start = ntohl(*(uint32_t *)decode);
409 end = ntohl(*(uint32_t *)(decode + 4));
409 end = ntohl(*(uint32_t *)(decode + 4));
410 len = ntohl(*(uint32_t *)(decode + 8));
410 len = ntohl(*(uint32_t *)(decode + 8));
411 if (start > end)
411 if (start > end)
412 break; /* sanity check */
412 break; /* sanity check */
413 bin = data + len;
413 bin = data + len;
414 if (bin < data)
414 if (bin < data)
415 break; /* big data + big (bogus) len can wrap around */
415 break; /* big data + big (bogus) len can wrap around */
416 data = bin + 12;
416 data = bin + 12;
417 outlen += start - last;
417 outlen += start - last;
418 last = end;
418 last = end;
419 outlen += len;
419 outlen += len;
420 }
420 }
421
421
422 if (bin != binend) {
422 if (bin != binend) {
423 if (!PyErr_Occurred())
423 if (!PyErr_Occurred())
424 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
424 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
425 return NULL;
425 return NULL;
426 }
426 }
427
427
428 outlen += orig - last;
428 outlen += orig - last;
429 return Py_BuildValue("l", outlen);
429 return Py_BuildValue("l", outlen);
430 }
430 }
431
431
432 static PyMethodDef methods[] = {
432 static PyMethodDef methods[] = {
433 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
433 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
434 {"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"},
434 {"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"},
435 {NULL, NULL}
435 {NULL, NULL}
436 };
436 };
437
437
438 PyMODINIT_FUNC
438 PyMODINIT_FUNC
439 initmpatch(void)
439 initmpatch(void)
440 {
440 {
441 Py_InitModule3("mpatch", methods, mpatch_doc);
441 Py_InitModule3("mpatch", methods, mpatch_doc);
442 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
442 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
443 }
443 }
444
444
General Comments 0
You need to be logged in to leave comments. Login now