##// END OF EJS Templates
Include inttypes.h instead of stdint.h (fixes issue299)...
Thomas Arendsen Hein -
r2543:860e9c83 default
parent child Browse files
Show More
@@ -1,362 +1,362
1 /*
1 /*
2 bdiff.c - efficient binary diff extension for Mercurial
2 bdiff.c - efficient binary diff extension for Mercurial
3
3
4 Copyright 2005 Matt Mackall <mpm@selenic.com>
4 Copyright 2005 Matt Mackall <mpm@selenic.com>
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8
8
9 Based roughly on Python difflib
9 Based roughly on Python difflib
10 */
10 */
11
11
12 #include <Python.h>
12 #include <Python.h>
13 #include <stdlib.h>
13 #include <stdlib.h>
14 #include <string.h>
14 #include <string.h>
15
15
16 #ifdef __hpux
16 #ifdef __hpux
17 #define inline
17 #define inline
18 #endif
18 #endif
19
19
20 #ifdef __SUNPRO_C
20 #ifdef __SUNPRO_C
21 # define inline
21 # define inline
22 #endif
22 #endif
23
23
24 #ifdef _WIN32
24 #ifdef _WIN32
25 #ifdef _MSC_VER
25 #ifdef _MSC_VER
26 #define inline __inline
26 #define inline __inline
27 typedef unsigned long uint32_t;
27 typedef unsigned long uint32_t;
28 #else
28 #else
29 #include <stdint.h>
29 #include <stdint.h>
30 #endif
30 #endif
31 static uint32_t htonl(uint32_t x)
31 static uint32_t htonl(uint32_t x)
32 {
32 {
33 return ((x & 0x000000ffUL) << 24) |
33 return ((x & 0x000000ffUL) << 24) |
34 ((x & 0x0000ff00UL) << 8) |
34 ((x & 0x0000ff00UL) << 8) |
35 ((x & 0x00ff0000UL) >> 8) |
35 ((x & 0x00ff0000UL) >> 8) |
36 ((x & 0xff000000UL) >> 24);
36 ((x & 0xff000000UL) >> 24);
37 }
37 }
38 #else
38 #else
39 #include <sys/types.h>
39 #include <sys/types.h>
40 #include <arpa/inet.h>
40 #include <arpa/inet.h>
41 #include <stdint.h>
41 #include <inttypes.h>
42 #endif
42 #endif
43
43
44 struct line {
44 struct line {
45 int h, len, n, e;
45 int h, len, n, e;
46 const char *l;
46 const char *l;
47 };
47 };
48
48
49 struct pos {
49 struct pos {
50 int pos, len;
50 int pos, len;
51 };
51 };
52
52
53 struct hunk {
53 struct hunk {
54 int a1, a2, b1, b2;
54 int a1, a2, b1, b2;
55 };
55 };
56
56
57 struct hunklist {
57 struct hunklist {
58 struct hunk *base, *head;
58 struct hunk *base, *head;
59 };
59 };
60
60
61 static inline uint32_t rol32(uint32_t word, unsigned int shift)
61 static inline uint32_t rol32(uint32_t word, unsigned int shift)
62 {
62 {
63 return (word << shift) | (word >> (32 - shift));
63 return (word << shift) | (word >> (32 - shift));
64 }
64 }
65
65
66 int splitlines(const char *a, int len, struct line **lr)
66 int splitlines(const char *a, int len, struct line **lr)
67 {
67 {
68 int h, i;
68 int h, i;
69 const char *p, *b = a;
69 const char *p, *b = a;
70 struct line *l;
70 struct line *l;
71
71
72 /* count the lines */
72 /* count the lines */
73 i = 1; /* extra line for sentinel */
73 i = 1; /* extra line for sentinel */
74 for (p = a; p < a + len; p++)
74 for (p = a; p < a + len; p++)
75 if (*p == '\n' || p == a + len - 1)
75 if (*p == '\n' || p == a + len - 1)
76 i++;
76 i++;
77
77
78 *lr = l = (struct line *)malloc(sizeof(struct line) * i);
78 *lr = l = (struct line *)malloc(sizeof(struct line) * i);
79 if (!l)
79 if (!l)
80 return -1;
80 return -1;
81
81
82 /* build the line array and calculate hashes */
82 /* build the line array and calculate hashes */
83 h = 0;
83 h = 0;
84 for (p = a; p < a + len; p++) {
84 for (p = a; p < a + len; p++) {
85 h = *p + rol32(h, 7); /* a simple hash from GNU diff */
85 h = *p + rol32(h, 7); /* a simple hash from GNU diff */
86 if (*p == '\n' || p == a + len - 1) {
86 if (*p == '\n' || p == a + len - 1) {
87 l->len = p - b + 1;
87 l->len = p - b + 1;
88 l->h = h * l->len;
88 l->h = h * l->len;
89 l->l = b;
89 l->l = b;
90 l->n = -1;
90 l->n = -1;
91 l++;
91 l++;
92 b = p + 1;
92 b = p + 1;
93 h = 0;
93 h = 0;
94 }
94 }
95 }
95 }
96
96
97 /* set up a sentinel */
97 /* set up a sentinel */
98 l->h = l->len = 0;
98 l->h = l->len = 0;
99 l->l = a + len;
99 l->l = a + len;
100 return i - 1;
100 return i - 1;
101 }
101 }
102
102
103 int inline cmp(struct line *a, struct line *b)
103 int inline cmp(struct line *a, struct line *b)
104 {
104 {
105 return a->h != b->h || a->len != b->len || memcmp(a->l, b->l, a->len);
105 return a->h != b->h || a->len != b->len || memcmp(a->l, b->l, a->len);
106 }
106 }
107
107
108 static int equatelines(struct line *a, int an, struct line *b, int bn)
108 static int equatelines(struct line *a, int an, struct line *b, int bn)
109 {
109 {
110 int i, j, buckets = 1, t;
110 int i, j, buckets = 1, t;
111 struct pos *h;
111 struct pos *h;
112
112
113 /* build a hash table of the next highest power of 2 */
113 /* build a hash table of the next highest power of 2 */
114 while (buckets < bn + 1)
114 while (buckets < bn + 1)
115 buckets *= 2;
115 buckets *= 2;
116
116
117 h = (struct pos *)malloc(buckets * sizeof(struct pos));
117 h = (struct pos *)malloc(buckets * sizeof(struct pos));
118 buckets = buckets - 1;
118 buckets = buckets - 1;
119 if (!h)
119 if (!h)
120 return 0;
120 return 0;
121
121
122 /* clear the hash table */
122 /* clear the hash table */
123 for (i = 0; i <= buckets; i++) {
123 for (i = 0; i <= buckets; i++) {
124 h[i].pos = -1;
124 h[i].pos = -1;
125 h[i].len = 0;
125 h[i].len = 0;
126 }
126 }
127
127
128 /* add lines to the hash table chains */
128 /* add lines to the hash table chains */
129 for (i = bn - 1; i >= 0; i--) {
129 for (i = bn - 1; i >= 0; i--) {
130 /* find the equivalence class */
130 /* find the equivalence class */
131 for (j = b[i].h & buckets; h[j].pos != -1;
131 for (j = b[i].h & buckets; h[j].pos != -1;
132 j = (j + 1) & buckets)
132 j = (j + 1) & buckets)
133 if (!cmp(b + i, b + h[j].pos))
133 if (!cmp(b + i, b + h[j].pos))
134 break;
134 break;
135
135
136 /* add to the head of the equivalence class */
136 /* add to the head of the equivalence class */
137 b[i].n = h[j].pos;
137 b[i].n = h[j].pos;
138 b[i].e = j;
138 b[i].e = j;
139 h[j].pos = i;
139 h[j].pos = i;
140 h[j].len++; /* keep track of popularity */
140 h[j].len++; /* keep track of popularity */
141 }
141 }
142
142
143 /* compute popularity threshold */
143 /* compute popularity threshold */
144 t = (bn >= 200) ? bn / 100 : bn + 1;
144 t = (bn >= 200) ? bn / 100 : bn + 1;
145
145
146 /* match items in a to their equivalence class in b */
146 /* match items in a to their equivalence class in b */
147 for (i = 0; i < an; i++) {
147 for (i = 0; i < an; i++) {
148 /* find the equivalence class */
148 /* find the equivalence class */
149 for (j = a[i].h & buckets; h[j].pos != -1;
149 for (j = a[i].h & buckets; h[j].pos != -1;
150 j = (j + 1) & buckets)
150 j = (j + 1) & buckets)
151 if (!cmp(a + i, b + h[j].pos))
151 if (!cmp(a + i, b + h[j].pos))
152 break;
152 break;
153
153
154 a[i].e = j; /* use equivalence class for quick compare */
154 a[i].e = j; /* use equivalence class for quick compare */
155 if (h[j].len <= t)
155 if (h[j].len <= t)
156 a[i].n = h[j].pos; /* point to head of match list */
156 a[i].n = h[j].pos; /* point to head of match list */
157 else
157 else
158 a[i].n = -1; /* too popular */
158 a[i].n = -1; /* too popular */
159 }
159 }
160
160
161 /* discard hash tables */
161 /* discard hash tables */
162 free(h);
162 free(h);
163 return 1;
163 return 1;
164 }
164 }
165
165
166 static int longest_match(struct line *a, struct line *b, struct pos *pos,
166 static int longest_match(struct line *a, struct line *b, struct pos *pos,
167 int a1, int a2, int b1, int b2, int *omi, int *omj)
167 int a1, int a2, int b1, int b2, int *omi, int *omj)
168 {
168 {
169 int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;
169 int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;
170
170
171 for (i = a1; i < a2; i++) {
171 for (i = a1; i < a2; i++) {
172 /* skip things before the current block */
172 /* skip things before the current block */
173 for (j = a[i].n; j != -1 && j < b1; j = b[j].n)
173 for (j = a[i].n; j != -1 && j < b1; j = b[j].n)
174 ;
174 ;
175
175
176 /* loop through all lines match a[i] in b */
176 /* loop through all lines match a[i] in b */
177 for (; j != -1 && j < b2; j = b[j].n) {
177 for (; j != -1 && j < b2; j = b[j].n) {
178 /* does this extend an earlier match? */
178 /* does this extend an earlier match? */
179 if (i > a1 && j > b1 && pos[j - 1].pos == i - 1)
179 if (i > a1 && j > b1 && pos[j - 1].pos == i - 1)
180 k = pos[j - 1].len + 1;
180 k = pos[j - 1].len + 1;
181 else
181 else
182 k = 1;
182 k = 1;
183 pos[j].pos = i;
183 pos[j].pos = i;
184 pos[j].len = k;
184 pos[j].len = k;
185
185
186 /* best match so far? */
186 /* best match so far? */
187 if (k > mk) {
187 if (k > mk) {
188 mi = i;
188 mi = i;
189 mj = j;
189 mj = j;
190 mk = k;
190 mk = k;
191 }
191 }
192 }
192 }
193 }
193 }
194
194
195 if (mk) {
195 if (mk) {
196 mi = mi - mk + 1;
196 mi = mi - mk + 1;
197 mj = mj - mk + 1;
197 mj = mj - mk + 1;
198 }
198 }
199
199
200 /* expand match to include neighboring popular lines */
200 /* expand match to include neighboring popular lines */
201 while (mi - mb > a1 && mj - mb > b1 &&
201 while (mi - mb > a1 && mj - mb > b1 &&
202 a[mi - mb - 1].e == b[mj - mb - 1].e)
202 a[mi - mb - 1].e == b[mj - mb - 1].e)
203 mb++;
203 mb++;
204 while (mi + mk < a2 && mj + mk < b2 &&
204 while (mi + mk < a2 && mj + mk < b2 &&
205 a[mi + mk].e == b[mj + mk].e)
205 a[mi + mk].e == b[mj + mk].e)
206 mk++;
206 mk++;
207
207
208 *omi = mi - mb;
208 *omi = mi - mb;
209 *omj = mj - mb;
209 *omj = mj - mb;
210 return mk + mb;
210 return mk + mb;
211 }
211 }
212
212
213 static void recurse(struct line *a, struct line *b, struct pos *pos,
213 static void recurse(struct line *a, struct line *b, struct pos *pos,
214 int a1, int a2, int b1, int b2, struct hunklist *l)
214 int a1, int a2, int b1, int b2, struct hunklist *l)
215 {
215 {
216 int i, j, k;
216 int i, j, k;
217
217
218 /* find the longest match in this chunk */
218 /* find the longest match in this chunk */
219 k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
219 k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
220 if (!k)
220 if (!k)
221 return;
221 return;
222
222
223 /* and recurse on the remaining chunks on either side */
223 /* and recurse on the remaining chunks on either side */
224 recurse(a, b, pos, a1, i, b1, j, l);
224 recurse(a, b, pos, a1, i, b1, j, l);
225 l->head->a1 = i;
225 l->head->a1 = i;
226 l->head->a2 = i + k;
226 l->head->a2 = i + k;
227 l->head->b1 = j;
227 l->head->b1 = j;
228 l->head->b2 = j + k;
228 l->head->b2 = j + k;
229 l->head++;
229 l->head++;
230 recurse(a, b, pos, i + k, a2, j + k, b2, l);
230 recurse(a, b, pos, i + k, a2, j + k, b2, l);
231 }
231 }
232
232
233 static struct hunklist diff(struct line *a, int an, struct line *b, int bn)
233 static struct hunklist diff(struct line *a, int an, struct line *b, int bn)
234 {
234 {
235 struct hunklist l;
235 struct hunklist l;
236 struct pos *pos;
236 struct pos *pos;
237 int t;
237 int t;
238
238
239 /* allocate and fill arrays */
239 /* allocate and fill arrays */
240 t = equatelines(a, an, b, bn);
240 t = equatelines(a, an, b, bn);
241 pos = (struct pos *)calloc(bn, sizeof(struct pos));
241 pos = (struct pos *)calloc(bn, sizeof(struct pos));
242 /* we can't have more matches than lines in the shorter file */
242 /* we can't have more matches than lines in the shorter file */
243 l.head = l.base = (struct hunk *)malloc(sizeof(struct hunk) *
243 l.head = l.base = (struct hunk *)malloc(sizeof(struct hunk) *
244 ((an<bn ? an:bn) + 1));
244 ((an<bn ? an:bn) + 1));
245
245
246 if (pos && l.base && t) {
246 if (pos && l.base && t) {
247 /* generate the matching block list */
247 /* generate the matching block list */
248 recurse(a, b, pos, 0, an, 0, bn, &l);
248 recurse(a, b, pos, 0, an, 0, bn, &l);
249 l.head->a1 = an;
249 l.head->a1 = an;
250 l.head->b1 = bn;
250 l.head->b1 = bn;
251 l.head++;
251 l.head++;
252 }
252 }
253
253
254 free(pos);
254 free(pos);
255 return l;
255 return l;
256 }
256 }
257
257
258 static PyObject *blocks(PyObject *self, PyObject *args)
258 static PyObject *blocks(PyObject *self, PyObject *args)
259 {
259 {
260 PyObject *sa, *sb, *rl = NULL, *m;
260 PyObject *sa, *sb, *rl = NULL, *m;
261 struct line *a, *b;
261 struct line *a, *b;
262 struct hunklist l = {NULL, NULL};
262 struct hunklist l = {NULL, NULL};
263 struct hunk *h;
263 struct hunk *h;
264 int an, bn, pos = 0;
264 int an, bn, pos = 0;
265
265
266 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
266 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
267 return NULL;
267 return NULL;
268
268
269 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &a);
269 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &a);
270 bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &b);
270 bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &b);
271 if (!a || !b)
271 if (!a || !b)
272 goto nomem;
272 goto nomem;
273
273
274 l = diff(a, an, b, bn);
274 l = diff(a, an, b, bn);
275 rl = PyList_New(l.head - l.base);
275 rl = PyList_New(l.head - l.base);
276 if (!l.head || !rl)
276 if (!l.head || !rl)
277 goto nomem;
277 goto nomem;
278
278
279 for (h = l.base; h != l.head; h++) {
279 for (h = l.base; h != l.head; h++) {
280 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
280 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
281 PyList_SetItem(rl, pos, m);
281 PyList_SetItem(rl, pos, m);
282 pos++;
282 pos++;
283 }
283 }
284
284
285 nomem:
285 nomem:
286 free(a);
286 free(a);
287 free(b);
287 free(b);
288 free(l.base);
288 free(l.base);
289 return rl ? rl : PyErr_NoMemory();
289 return rl ? rl : PyErr_NoMemory();
290 }
290 }
291
291
292 static PyObject *bdiff(PyObject *self, PyObject *args)
292 static PyObject *bdiff(PyObject *self, PyObject *args)
293 {
293 {
294 PyObject *sa, *sb, *result = NULL;
294 PyObject *sa, *sb, *result = NULL;
295 struct line *al, *bl;
295 struct line *al, *bl;
296 struct hunklist l = {NULL, NULL};
296 struct hunklist l = {NULL, NULL};
297 struct hunk *h;
297 struct hunk *h;
298 char encode[12], *rb;
298 char encode[12], *rb;
299 int an, bn, len = 0, la = 0, lb = 0;
299 int an, bn, len = 0, la = 0, lb = 0;
300
300
301 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
301 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
302 return NULL;
302 return NULL;
303
303
304 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &al);
304 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &al);
305 bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &bl);
305 bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &bl);
306 if (!al || !bl)
306 if (!al || !bl)
307 goto nomem;
307 goto nomem;
308
308
309 l = diff(al, an, bl, bn);
309 l = diff(al, an, bl, bn);
310 if (!l.head)
310 if (!l.head)
311 goto nomem;
311 goto nomem;
312
312
313 /* calculate length of output */
313 /* calculate length of output */
314 for (h = l.base; h != l.head; h++) {
314 for (h = l.base; h != l.head; h++) {
315 if (h->a1 != la || h->b1 != lb)
315 if (h->a1 != la || h->b1 != lb)
316 len += 12 + bl[h->b1].l - bl[lb].l;
316 len += 12 + bl[h->b1].l - bl[lb].l;
317 la = h->a2;
317 la = h->a2;
318 lb = h->b2;
318 lb = h->b2;
319 }
319 }
320
320
321 result = PyString_FromStringAndSize(NULL, len);
321 result = PyString_FromStringAndSize(NULL, len);
322 if (!result)
322 if (!result)
323 goto nomem;
323 goto nomem;
324
324
325 /* build binary patch */
325 /* build binary patch */
326 rb = PyString_AsString(result);
326 rb = PyString_AsString(result);
327 la = lb = 0;
327 la = lb = 0;
328
328
329 for (h = l.base; h != l.head; h++) {
329 for (h = l.base; h != l.head; h++) {
330 if (h->a1 != la || h->b1 != lb) {
330 if (h->a1 != la || h->b1 != lb) {
331 len = bl[h->b1].l - bl[lb].l;
331 len = bl[h->b1].l - bl[lb].l;
332 *(uint32_t *)(encode) = htonl(al[la].l - al->l);
332 *(uint32_t *)(encode) = htonl(al[la].l - al->l);
333 *(uint32_t *)(encode + 4) = htonl(al[h->a1].l - al->l);
333 *(uint32_t *)(encode + 4) = htonl(al[h->a1].l - al->l);
334 *(uint32_t *)(encode + 8) = htonl(len);
334 *(uint32_t *)(encode + 8) = htonl(len);
335 memcpy(rb, encode, 12);
335 memcpy(rb, encode, 12);
336 memcpy(rb + 12, bl[lb].l, len);
336 memcpy(rb + 12, bl[lb].l, len);
337 rb += 12 + len;
337 rb += 12 + len;
338 }
338 }
339 la = h->a2;
339 la = h->a2;
340 lb = h->b2;
340 lb = h->b2;
341 }
341 }
342
342
343 nomem:
343 nomem:
344 free(al);
344 free(al);
345 free(bl);
345 free(bl);
346 free(l.base);
346 free(l.base);
347 return result ? result : PyErr_NoMemory();
347 return result ? result : PyErr_NoMemory();
348 }
348 }
349
349
350 static char mdiff_doc[] = "Efficient binary diff.";
350 static char mdiff_doc[] = "Efficient binary diff.";
351
351
352 static PyMethodDef methods[] = {
352 static PyMethodDef methods[] = {
353 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
353 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
354 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
354 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
355 {NULL, NULL}
355 {NULL, NULL}
356 };
356 };
357
357
358 PyMODINIT_FUNC initbdiff(void)
358 PyMODINIT_FUNC initbdiff(void)
359 {
359 {
360 Py_InitModule3("bdiff", methods, mdiff_doc);
360 Py_InitModule3("bdiff", methods, mdiff_doc);
361 }
361 }
362
362
@@ -1,408 +1,408
1 /*
1 /*
2 mpatch.c - efficient binary patching for Mercurial
2 mpatch.c - efficient binary patching for Mercurial
3
3
4 This implements a patch algorithm that's O(m + nlog n) where m is the
4 This implements a patch algorithm that's O(m + nlog n) where m is the
5 size of the output and n is the number of patches.
5 size of the output and n is the number of patches.
6
6
7 Given a list of binary patches, it unpacks each into a hunk list,
7 Given a list of binary patches, it unpacks each into a hunk list,
8 then combines the hunk lists with a treewise recursion to form a
8 then combines the hunk lists with a treewise recursion to form a
9 single hunk list. This hunk list is then applied to the original
9 single hunk list. This hunk list is then applied to the original
10 text.
10 text.
11
11
12 The text (or binary) fragments are copied directly from their source
12 The text (or binary) fragments are copied directly from their source
13 Python objects into a preallocated output string to avoid the
13 Python objects into a preallocated output string to avoid the
14 allocation of intermediate Python objects. Working memory is about 2x
14 allocation of intermediate Python objects. Working memory is about 2x
15 the total number of hunks.
15 the total number of hunks.
16
16
17 Copyright 2005 Matt Mackall <mpm@selenic.com>
17 Copyright 2005 Matt Mackall <mpm@selenic.com>
18
18
19 This software may be used and distributed according to the terms
19 This software may be used and distributed according to the terms
20 of the GNU General Public License, incorporated herein by reference.
20 of the GNU General Public License, incorporated herein by reference.
21 */
21 */
22
22
23 #include <Python.h>
23 #include <Python.h>
24 #include <stdlib.h>
24 #include <stdlib.h>
25 #include <string.h>
25 #include <string.h>
26
26
27 #ifdef _WIN32
27 #ifdef _WIN32
28 # ifdef _MSC_VER
28 # ifdef _MSC_VER
29 /* msvc 6.0 has problems */
29 /* msvc 6.0 has problems */
30 # define inline __inline
30 # define inline __inline
31 typedef unsigned long uint32_t;
31 typedef unsigned long uint32_t;
32 # else
32 # else
33 # include <stdint.h>
33 # include <stdint.h>
34 # endif
34 # endif
35 static uint32_t ntohl(uint32_t x)
35 static uint32_t ntohl(uint32_t x)
36 {
36 {
37 return ((x & 0x000000ffUL) << 24) |
37 return ((x & 0x000000ffUL) << 24) |
38 ((x & 0x0000ff00UL) << 8) |
38 ((x & 0x0000ff00UL) << 8) |
39 ((x & 0x00ff0000UL) >> 8) |
39 ((x & 0x00ff0000UL) >> 8) |
40 ((x & 0xff000000UL) >> 24);
40 ((x & 0xff000000UL) >> 24);
41 }
41 }
42 #else
42 #else
43 /* not windows */
43 /* not windows */
44 # include <sys/types.h>
44 # include <sys/types.h>
45 # include <arpa/inet.h>
45 # include <arpa/inet.h>
46 # include <stdint.h>
46 # include <inttypes.h>
47 #endif
47 #endif
48
48
49 static char mpatch_doc[] = "Efficient binary patching.";
49 static char mpatch_doc[] = "Efficient binary patching.";
50 static PyObject *mpatch_Error;
50 static PyObject *mpatch_Error;
51
51
52 struct frag {
52 struct frag {
53 int start, end, len;
53 int start, end, len;
54 char *data;
54 char *data;
55 };
55 };
56
56
57 struct flist {
57 struct flist {
58 struct frag *base, *head, *tail;
58 struct frag *base, *head, *tail;
59 };
59 };
60
60
61 static struct flist *lalloc(int size)
61 static struct flist *lalloc(int size)
62 {
62 {
63 struct flist *a = NULL;
63 struct flist *a = NULL;
64
64
65 a = (struct flist *)malloc(sizeof(struct flist));
65 a = (struct flist *)malloc(sizeof(struct flist));
66 if (a) {
66 if (a) {
67 a->base = (struct frag *)malloc(sizeof(struct frag) * size);
67 a->base = (struct frag *)malloc(sizeof(struct frag) * size);
68 if (a->base) {
68 if (a->base) {
69 a->head = a->tail = a->base;
69 a->head = a->tail = a->base;
70 return a;
70 return a;
71 }
71 }
72 free(a);
72 free(a);
73 a = NULL;
73 a = NULL;
74 }
74 }
75 if (!PyErr_Occurred())
75 if (!PyErr_Occurred())
76 PyErr_NoMemory();
76 PyErr_NoMemory();
77 return NULL;
77 return NULL;
78 }
78 }
79
79
80 static void lfree(struct flist *a)
80 static void lfree(struct flist *a)
81 {
81 {
82 if (a) {
82 if (a) {
83 free(a->base);
83 free(a->base);
84 free(a);
84 free(a);
85 }
85 }
86 }
86 }
87
87
88 static int lsize(struct flist *a)
88 static int lsize(struct flist *a)
89 {
89 {
90 return a->tail - a->head;
90 return a->tail - a->head;
91 }
91 }
92
92
93 /* move hunks in source that are less cut to dest, compensating
93 /* move hunks in source that are less cut to dest, compensating
94 for changes in offset. the last hunk may be split if necessary.
94 for changes in offset. the last hunk may be split if necessary.
95 */
95 */
96 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
96 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
97 {
97 {
98 struct frag *d = dest->tail, *s = src->head;
98 struct frag *d = dest->tail, *s = src->head;
99 int postend, c, l;
99 int postend, c, l;
100
100
101 while (s != src->tail) {
101 while (s != src->tail) {
102 if (s->start + offset >= cut)
102 if (s->start + offset >= cut)
103 break; /* we've gone far enough */
103 break; /* we've gone far enough */
104
104
105 postend = offset + s->start + s->len;
105 postend = offset + s->start + s->len;
106 if (postend <= cut) {
106 if (postend <= cut) {
107 /* save this hunk */
107 /* save this hunk */
108 offset += s->start + s->len - s->end;
108 offset += s->start + s->len - s->end;
109 *d++ = *s++;
109 *d++ = *s++;
110 }
110 }
111 else {
111 else {
112 /* break up this hunk */
112 /* break up this hunk */
113 c = cut - offset;
113 c = cut - offset;
114 if (s->end < c)
114 if (s->end < c)
115 c = s->end;
115 c = s->end;
116 l = cut - offset - s->start;
116 l = cut - offset - s->start;
117 if (s->len < l)
117 if (s->len < l)
118 l = s->len;
118 l = s->len;
119
119
120 offset += s->start + l - c;
120 offset += s->start + l - c;
121
121
122 d->start = s->start;
122 d->start = s->start;
123 d->end = c;
123 d->end = c;
124 d->len = l;
124 d->len = l;
125 d->data = s->data;
125 d->data = s->data;
126 d++;
126 d++;
127 s->start = c;
127 s->start = c;
128 s->len = s->len - l;
128 s->len = s->len - l;
129 s->data = s->data + l;
129 s->data = s->data + l;
130
130
131 break;
131 break;
132 }
132 }
133 }
133 }
134
134
135 dest->tail = d;
135 dest->tail = d;
136 src->head = s;
136 src->head = s;
137 return offset;
137 return offset;
138 }
138 }
139
139
140 /* like gather, but with no output list */
140 /* like gather, but with no output list */
141 static int discard(struct flist *src, int cut, int offset)
141 static int discard(struct flist *src, int cut, int offset)
142 {
142 {
143 struct frag *s = src->head;
143 struct frag *s = src->head;
144 int postend, c, l;
144 int postend, c, l;
145
145
146 while (s != src->tail) {
146 while (s != src->tail) {
147 if (s->start + offset >= cut)
147 if (s->start + offset >= cut)
148 break;
148 break;
149
149
150 postend = offset + s->start + s->len;
150 postend = offset + s->start + s->len;
151 if (postend <= cut) {
151 if (postend <= cut) {
152 offset += s->start + s->len - s->end;
152 offset += s->start + s->len - s->end;
153 s++;
153 s++;
154 }
154 }
155 else {
155 else {
156 c = cut - offset;
156 c = cut - offset;
157 if (s->end < c)
157 if (s->end < c)
158 c = s->end;
158 c = s->end;
159 l = cut - offset - s->start;
159 l = cut - offset - s->start;
160 if (s->len < l)
160 if (s->len < l)
161 l = s->len;
161 l = s->len;
162
162
163 offset += s->start + l - c;
163 offset += s->start + l - c;
164 s->start = c;
164 s->start = c;
165 s->len = s->len - l;
165 s->len = s->len - l;
166 s->data = s->data + l;
166 s->data = s->data + l;
167
167
168 break;
168 break;
169 }
169 }
170 }
170 }
171
171
172 src->head = s;
172 src->head = s;
173 return offset;
173 return offset;
174 }
174 }
175
175
176 /* combine hunk lists a and b, while adjusting b for offset changes in a/
176 /* combine hunk lists a and b, while adjusting b for offset changes in a/
177 this deletes a and b and returns the resultant list. */
177 this deletes a and b and returns the resultant list. */
178 static struct flist *combine(struct flist *a, struct flist *b)
178 static struct flist *combine(struct flist *a, struct flist *b)
179 {
179 {
180 struct flist *c = NULL;
180 struct flist *c = NULL;
181 struct frag *bh, *ct;
181 struct frag *bh, *ct;
182 int offset = 0, post;
182 int offset = 0, post;
183
183
184 if (a && b)
184 if (a && b)
185 c = lalloc((lsize(a) + lsize(b)) * 2);
185 c = lalloc((lsize(a) + lsize(b)) * 2);
186
186
187 if (c) {
187 if (c) {
188
188
189 for (bh = b->head; bh != b->tail; bh++) {
189 for (bh = b->head; bh != b->tail; bh++) {
190 /* save old hunks */
190 /* save old hunks */
191 offset = gather(c, a, bh->start, offset);
191 offset = gather(c, a, bh->start, offset);
192
192
193 /* discard replaced hunks */
193 /* discard replaced hunks */
194 post = discard(a, bh->end, offset);
194 post = discard(a, bh->end, offset);
195
195
196 /* insert new hunk */
196 /* insert new hunk */
197 ct = c->tail;
197 ct = c->tail;
198 ct->start = bh->start - offset;
198 ct->start = bh->start - offset;
199 ct->end = bh->end - post;
199 ct->end = bh->end - post;
200 ct->len = bh->len;
200 ct->len = bh->len;
201 ct->data = bh->data;
201 ct->data = bh->data;
202 c->tail++;
202 c->tail++;
203 offset = post;
203 offset = post;
204 }
204 }
205
205
206 /* hold on to tail from a */
206 /* hold on to tail from a */
207 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
207 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
208 c->tail += lsize(a);
208 c->tail += lsize(a);
209 }
209 }
210
210
211 lfree(a);
211 lfree(a);
212 lfree(b);
212 lfree(b);
213 return c;
213 return c;
214 }
214 }
215
215
216 /* decode a binary patch into a hunk list */
216 /* decode a binary patch into a hunk list */
217 static struct flist *decode(char *bin, int len)
217 static struct flist *decode(char *bin, int len)
218 {
218 {
219 struct flist *l;
219 struct flist *l;
220 struct frag *lt;
220 struct frag *lt;
221 char *end = bin + len;
221 char *end = bin + len;
222 char decode[12]; /* for dealing with alignment issues */
222 char decode[12]; /* for dealing with alignment issues */
223
223
224 /* assume worst case size, we won't have many of these lists */
224 /* assume worst case size, we won't have many of these lists */
225 l = lalloc(len / 12);
225 l = lalloc(len / 12);
226 if (!l)
226 if (!l)
227 return NULL;
227 return NULL;
228
228
229 lt = l->tail;
229 lt = l->tail;
230
230
231 while (bin < end) {
231 while (bin < end) {
232 memcpy(decode, bin, 12);
232 memcpy(decode, bin, 12);
233 lt->start = ntohl(*(uint32_t *)decode);
233 lt->start = ntohl(*(uint32_t *)decode);
234 lt->end = ntohl(*(uint32_t *)(decode + 4));
234 lt->end = ntohl(*(uint32_t *)(decode + 4));
235 lt->len = ntohl(*(uint32_t *)(decode + 8));
235 lt->len = ntohl(*(uint32_t *)(decode + 8));
236 lt->data = bin + 12;
236 lt->data = bin + 12;
237 bin += 12 + lt->len;
237 bin += 12 + lt->len;
238 lt++;
238 lt++;
239 }
239 }
240
240
241 if (bin != end) {
241 if (bin != end) {
242 if (!PyErr_Occurred())
242 if (!PyErr_Occurred())
243 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
243 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
244 lfree(l);
244 lfree(l);
245 return NULL;
245 return NULL;
246 }
246 }
247
247
248 l->tail = lt;
248 l->tail = lt;
249 return l;
249 return l;
250 }
250 }
251
251
252 /* calculate the size of resultant text */
252 /* calculate the size of resultant text */
253 static int calcsize(int len, struct flist *l)
253 static int calcsize(int len, struct flist *l)
254 {
254 {
255 int outlen = 0, last = 0;
255 int outlen = 0, last = 0;
256 struct frag *f = l->head;
256 struct frag *f = l->head;
257
257
258 while (f != l->tail) {
258 while (f != l->tail) {
259 if (f->start < last || f->end > len) {
259 if (f->start < last || f->end > len) {
260 if (!PyErr_Occurred())
260 if (!PyErr_Occurred())
261 PyErr_SetString(mpatch_Error,
261 PyErr_SetString(mpatch_Error,
262 "invalid patch");
262 "invalid patch");
263 return -1;
263 return -1;
264 }
264 }
265 outlen += f->start - last;
265 outlen += f->start - last;
266 last = f->end;
266 last = f->end;
267 outlen += f->len;
267 outlen += f->len;
268 f++;
268 f++;
269 }
269 }
270
270
271 outlen += len - last;
271 outlen += len - last;
272 return outlen;
272 return outlen;
273 }
273 }
274
274
275 static int apply(char *buf, char *orig, int len, struct flist *l)
275 static int apply(char *buf, char *orig, int len, struct flist *l)
276 {
276 {
277 struct frag *f = l->head;
277 struct frag *f = l->head;
278 int last = 0;
278 int last = 0;
279 char *p = buf;
279 char *p = buf;
280
280
281 while (f != l->tail) {
281 while (f != l->tail) {
282 if (f->start < last || f->end > len) {
282 if (f->start < last || f->end > len) {
283 if (!PyErr_Occurred())
283 if (!PyErr_Occurred())
284 PyErr_SetString(mpatch_Error,
284 PyErr_SetString(mpatch_Error,
285 "invalid patch");
285 "invalid patch");
286 return 0;
286 return 0;
287 }
287 }
288 memcpy(p, orig + last, f->start - last);
288 memcpy(p, orig + last, f->start - last);
289 p += f->start - last;
289 p += f->start - last;
290 memcpy(p, f->data, f->len);
290 memcpy(p, f->data, f->len);
291 last = f->end;
291 last = f->end;
292 p += f->len;
292 p += f->len;
293 f++;
293 f++;
294 }
294 }
295 memcpy(p, orig + last, len - last);
295 memcpy(p, orig + last, len - last);
296 return 1;
296 return 1;
297 }
297 }
298
298
299 /* recursively generate a patch of all bins between start and end */
299 /* recursively generate a patch of all bins between start and end */
300 static struct flist *fold(PyObject *bins, int start, int end)
300 static struct flist *fold(PyObject *bins, int start, int end)
301 {
301 {
302 int len;
302 int len;
303
303
304 if (start + 1 == end) {
304 if (start + 1 == end) {
305 /* trivial case, output a decoded list */
305 /* trivial case, output a decoded list */
306 PyObject *tmp = PyList_GetItem(bins, start);
306 PyObject *tmp = PyList_GetItem(bins, start);
307 if (!tmp)
307 if (!tmp)
308 return NULL;
308 return NULL;
309 return decode(PyString_AsString(tmp), PyString_Size(tmp));
309 return decode(PyString_AsString(tmp), PyString_Size(tmp));
310 }
310 }
311
311
312 /* divide and conquer, memory management is elsewhere */
312 /* divide and conquer, memory management is elsewhere */
313 len = (end - start) / 2;
313 len = (end - start) / 2;
314 return combine(fold(bins, start, start + len),
314 return combine(fold(bins, start, start + len),
315 fold(bins, start + len, end));
315 fold(bins, start + len, end));
316 }
316 }
317
317
318 static PyObject *
318 static PyObject *
319 patches(PyObject *self, PyObject *args)
319 patches(PyObject *self, PyObject *args)
320 {
320 {
321 PyObject *text, *bins, *result;
321 PyObject *text, *bins, *result;
322 struct flist *patch;
322 struct flist *patch;
323 char *in, *out;
323 char *in, *out;
324 int len, outlen;
324 int len, outlen;
325
325
326 if (!PyArg_ParseTuple(args, "SO:mpatch", &text, &bins))
326 if (!PyArg_ParseTuple(args, "SO:mpatch", &text, &bins))
327 return NULL;
327 return NULL;
328
328
329 len = PyList_Size(bins);
329 len = PyList_Size(bins);
330 if (!len) {
330 if (!len) {
331 /* nothing to do */
331 /* nothing to do */
332 Py_INCREF(text);
332 Py_INCREF(text);
333 return text;
333 return text;
334 }
334 }
335
335
336 patch = fold(bins, 0, len);
336 patch = fold(bins, 0, len);
337 if (!patch)
337 if (!patch)
338 return NULL;
338 return NULL;
339
339
340 outlen = calcsize(PyString_Size(text), patch);
340 outlen = calcsize(PyString_Size(text), patch);
341 if (outlen < 0) {
341 if (outlen < 0) {
342 result = NULL;
342 result = NULL;
343 goto cleanup;
343 goto cleanup;
344 }
344 }
345 result = PyString_FromStringAndSize(NULL, outlen);
345 result = PyString_FromStringAndSize(NULL, outlen);
346 if (!result) {
346 if (!result) {
347 result = NULL;
347 result = NULL;
348 goto cleanup;
348 goto cleanup;
349 }
349 }
350 in = PyString_AsString(text);
350 in = PyString_AsString(text);
351 out = PyString_AsString(result);
351 out = PyString_AsString(result);
352 if (!apply(out, in, PyString_Size(text), patch)) {
352 if (!apply(out, in, PyString_Size(text), patch)) {
353 Py_DECREF(result);
353 Py_DECREF(result);
354 result = NULL;
354 result = NULL;
355 }
355 }
356 cleanup:
356 cleanup:
357 lfree(patch);
357 lfree(patch);
358 return result;
358 return result;
359 }
359 }
360
360
361 /* calculate size of a patched file directly */
361 /* calculate size of a patched file directly */
362 static PyObject *
362 static PyObject *
363 patchedsize(PyObject *self, PyObject *args)
363 patchedsize(PyObject *self, PyObject *args)
364 {
364 {
365 long orig, start, end, len, outlen = 0, last = 0;
365 long orig, start, end, len, outlen = 0, last = 0;
366 int patchlen;
366 int patchlen;
367 char *bin, *binend;
367 char *bin, *binend;
368 char decode[12]; /* for dealing with alignment issues */
368 char decode[12]; /* for dealing with alignment issues */
369
369
370 if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen))
370 if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen))
371 return NULL;
371 return NULL;
372
372
373 binend = bin + patchlen;
373 binend = bin + patchlen;
374
374
375 while (bin < binend) {
375 while (bin < binend) {
376 memcpy(decode, bin, 12);
376 memcpy(decode, bin, 12);
377 start = ntohl(*(uint32_t *)decode);
377 start = ntohl(*(uint32_t *)decode);
378 end = ntohl(*(uint32_t *)(decode + 4));
378 end = ntohl(*(uint32_t *)(decode + 4));
379 len = ntohl(*(uint32_t *)(decode + 8));
379 len = ntohl(*(uint32_t *)(decode + 8));
380 bin += 12 + len;
380 bin += 12 + len;
381 outlen += start - last;
381 outlen += start - last;
382 last = end;
382 last = end;
383 outlen += len;
383 outlen += len;
384 }
384 }
385
385
386 if (bin != binend) {
386 if (bin != binend) {
387 if (!PyErr_Occurred())
387 if (!PyErr_Occurred())
388 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
388 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
389 return NULL;
389 return NULL;
390 }
390 }
391
391
392 outlen += orig - last;
392 outlen += orig - last;
393 return Py_BuildValue("l", outlen);
393 return Py_BuildValue("l", outlen);
394 }
394 }
395
395
396 static PyMethodDef methods[] = {
396 static PyMethodDef methods[] = {
397 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
397 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
398 {"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"},
398 {"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"},
399 {NULL, NULL}
399 {NULL, NULL}
400 };
400 };
401
401
402 PyMODINIT_FUNC
402 PyMODINIT_FUNC
403 initmpatch(void)
403 initmpatch(void)
404 {
404 {
405 Py_InitModule3("mpatch", methods, mpatch_doc);
405 Py_InitModule3("mpatch", methods, mpatch_doc);
406 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
406 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
407 }
407 }
408
408
General Comments 0
You need to be logged in to leave comments. Login now