##// END OF EJS Templates
[PATCH] use <arpa/inet.h> instead of <netinet/in.h> for ntohl/htonl...
mpm@selenic.com -
r597:e530637e default
parent child Browse files
Show More
@@ -1,349 +1,349 b''
1 1 /*
2 2 bdiff.c - efficient binary diff extension for Mercurial
3 3
4 4 Copyright 2005 Matt Mackall <mpm@selenic.com>
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8
9 9 Based roughly on Python difflib
10 10 */
11 11
12 12 #include <Python.h>
13 13 #include <stdlib.h>
14 14 #include <string.h>
15 15 #ifdef _WIN32
16 16 #ifdef _MSC_VER
17 17 #define inline __inline
18 18 typedef unsigned long uint32_t;
19 19 #else
20 20 #include <stdint.h>
21 21 #endif
22 22 static uint32_t htonl(uint32_t x)
23 23 {
24 24 return ((x & 0x000000ffUL) << 24) |
25 25 ((x & 0x0000ff00UL) << 8) |
26 26 ((x & 0x00ff0000UL) >> 8) |
27 27 ((x & 0xff000000UL) >> 24);
28 28 }
29 29 #else
30 30 #include <sys/types.h>
31 #include <netinet/in.h>
31 #include <arpa/inet.h>
32 32 #endif
33 33
34 34 struct line {
35 35 int h, len, n, e;
36 36 const char *l;
37 37 };
38 38
39 39 struct pos {
40 40 int pos, len;
41 41 };
42 42
43 43 struct hunk {
44 44 int a1, a2, b1, b2;
45 45 };
46 46
47 47 struct hunklist {
48 48 struct hunk *base, *head;
49 49 };
50 50
51 51 static __inline uint32_t rol32(uint32_t word, unsigned int shift)
52 52 {
53 53 return (word << shift) | (word >> (32 - shift));
54 54 }
55 55
56 56 int splitlines(const char *a, int len, struct line **lr)
57 57 {
58 58 int h, i;
59 59 const char *p, *b = a;
60 60 struct line *l;
61 61
62 62 /* count the lines */
63 63 i = 1; /* extra line for sentinel */
64 64 for (p = a; p < a + len; p++)
65 65 if (*p == '\n' || p == a + len - 1)
66 66 i++;
67 67
68 68 *lr = l = malloc(sizeof(struct line) * i);
69 69 if (!l)
70 70 return -1;
71 71
72 72 /* build the line array and calculate hashes */
73 73 h = 0;
74 74 for (p = a; p < a + len; p++) {
75 75 h = *p + rol32(h, 7); /* a simple hash from GNU diff */
76 76 if (*p == '\n' || p == a + len - 1) {
77 77 l->len = p - b + 1;
78 78 l->h = h * l->len;
79 79 l->l = b;
80 80 l->n = -1;
81 81 l++;
82 82 b = p + 1;
83 83 h = 0;
84 84 }
85 85 }
86 86
87 87 /* set up a sentinel */
88 88 l->h = l->len = 0;
89 89 l->l = a + len;
90 90 return i - 1;
91 91 }
92 92
93 93 int inline cmp(struct line *a, struct line *b)
94 94 {
95 95 return a->h != b->h || a->len != b->len || memcmp(a->l, b->l, a->len);
96 96 }
97 97
98 98 static int equatelines(struct line *a, int an, struct line *b, int bn)
99 99 {
100 100 int i, j, buckets = 1, t;
101 101 struct pos *h;
102 102
103 103 /* build a hash table of the next highest power of 2 */
104 104 while (buckets < bn + 1)
105 105 buckets *= 2;
106 106
107 107 h = malloc(buckets * sizeof(struct pos));
108 108 buckets = buckets - 1;
109 109 if (!h)
110 110 return 0;
111 111
112 112 /* clear the hash table */
113 113 for (i = 0; i <= buckets; i++) {
114 114 h[i].pos = -1;
115 115 h[i].len = 0;
116 116 }
117 117
118 118 /* add lines to the hash table chains */
119 119 for (i = bn - 1; i >= 0; i--) {
120 120 /* find the equivalence class */
121 121 for (j = b[i].h & buckets; h[j].pos != -1;
122 122 j = (j + 1) & buckets)
123 123 if (!cmp(b + i, b + h[j].pos))
124 124 break;
125 125
126 126 /* add to the head of the equivalence class */
127 127 b[i].n = h[j].pos;
128 128 b[i].e = j;
129 129 h[j].pos = i;
130 130 h[j].len++; /* keep track of popularity */
131 131 }
132 132
133 133 /* compute popularity threshold */
134 134 t = (bn >= 200) ? bn / 100 : bn + 1;
135 135
136 136 /* match items in a to their equivalence class in b */
137 137 for (i = 0; i < an; i++) {
138 138 /* find the equivalence class */
139 139 for (j = a[i].h & buckets; h[j].pos != -1;
140 140 j = (j + 1) & buckets)
141 141 if (!cmp(a + i, b + h[j].pos))
142 142 break;
143 143
144 144 a[i].e = j; /* use equivalence class for quick compare */
145 145 if(h[j].len <= t)
146 146 a[i].n = h[j].pos; /* point to head of match list */
147 147 else
148 148 a[i].n = -1; /* too popular */
149 149 }
150 150
151 151 /* discard hash tables */
152 152 free(h);
153 153 return 1;
154 154 }
155 155
156 156 static int longest_match(struct line *a, struct line *b, struct pos *pos,
157 157 int a1, int a2, int b1, int b2, int *omi, int *omj)
158 158 {
159 159 int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;
160 160
161 161 for (i = a1; i < a2; i++) {
162 162 /* skip things before the current block */
163 163 for (j = a[i].n; j != -1 && j < b1; j = b[j].n)
164 164 ;
165 165
166 166 /* loop through all lines match a[i] in b */
167 167 for (; j != -1 && j < b2; j = b[j].n) {
168 168 /* does this extend an earlier match? */
169 169 if (i > a1 && j > b1 && pos[j - 1].pos == i - 1)
170 170 k = pos[j - 1].len + 1;
171 171 else
172 172 k = 1;
173 173 pos[j].pos = i;
174 174 pos[j].len = k;
175 175
176 176 /* best match so far? */
177 177 if (k > mk) {
178 178 mi = i;
179 179 mj = j;
180 180 mk = k;
181 181 }
182 182 }
183 183 }
184 184
185 185 if (mk) {
186 186 mi = mi - mk + 1;
187 187 mj = mj - mk + 1;
188 188 }
189 189
190 190 /* expand match to include neighboring popular lines */
191 191 while (mi - mb > a1 && mj - mb > b1 &&
192 192 a[mi - mb - 1].e == b[mj - mb - 1].e)
193 193 mb++;
194 194 while (mi + mk < a2 && mj + mk < b2 &&
195 195 a[mi + mk].e == b[mj + mk].e)
196 196 mk++;
197 197
198 198 *omi = mi - mb;
199 199 *omj = mj - mb;
200 200 return mk + mb;
201 201 }
202 202
203 203 static void recurse(struct line *a, struct line *b, struct pos *pos,
204 204 int a1, int a2, int b1, int b2, struct hunklist *l)
205 205 {
206 206 int i, j, k;
207 207
208 208 /* find the longest match in this chunk */
209 209 k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
210 210 if (!k)
211 211 return;
212 212
213 213 /* and recurse on the remaining chunks on either side */
214 214 recurse(a, b, pos, a1, i, b1, j, l);
215 215 l->head->a1 = i;
216 216 l->head->a2 = i + k;
217 217 l->head->b1 = j;
218 218 l->head->b2 = j + k;
219 219 l->head++;
220 220 recurse(a, b, pos, i + k, a2, j + k, b2, l);
221 221 }
222 222
223 223 static struct hunklist diff(struct line *a, int an, struct line *b, int bn)
224 224 {
225 225 struct hunklist l;
226 226 struct pos *pos;
227 227 int t;
228 228
229 229 /* allocate and fill arrays */
230 230 t = equatelines(a, an, b, bn);
231 231 pos = calloc(bn, sizeof(struct pos));
232 232 l.head = l.base = malloc(sizeof(struct hunk) * ((an + bn) / 4 + 2));
233 233
234 234 if (pos && l.base && t) {
235 235 /* generate the matching block list */
236 236 recurse(a, b, pos, 0, an, 0, bn, &l);
237 237 l.head->a1 = an;
238 238 l.head->b1 = bn;
239 239 l.head++;
240 240 }
241 241
242 242 free(pos);
243 243 return l;
244 244 }
245 245
246 246 static PyObject *blocks(PyObject *self, PyObject *args)
247 247 {
248 248 PyObject *sa, *sb, *rl = NULL, *m;
249 249 struct line *a, *b;
250 250 struct hunklist l;
251 251 struct hunk *h;
252 252 int an, bn, pos = 0;
253 253
254 254 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
255 255 return NULL;
256 256
257 257 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &a);
258 258 bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &b);
259 259 if (!a || !b)
260 260 goto nomem;
261 261
262 262 l = diff(a, an, b, bn);
263 263 rl = PyList_New(l.head - l.base);
264 264 if (!l.head || !rl)
265 265 goto nomem;
266 266
267 267 for(h = l.base; h != l.head; h++) {
268 268 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
269 269 PyList_SetItem(rl, pos, m);
270 270 pos++;
271 271 }
272 272
273 273 nomem:
274 274 free(a);
275 275 free(b);
276 276 free(l.base);
277 277 return rl ? rl : PyErr_NoMemory();
278 278 }
279 279
280 280 static PyObject *bdiff(PyObject *self, PyObject *args)
281 281 {
282 282 PyObject *sa, *sb, *result = NULL;
283 283 struct line *al, *bl;
284 284 struct hunklist l;
285 285 struct hunk *h;
286 286 char encode[12], *rb;
287 287 int an, bn, len = 0, la = 0, lb = 0;
288 288
289 289 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
290 290 return NULL;
291 291
292 292 an = splitlines(PyString_AsString(sa), PyString_Size(sa), &al);
293 293 bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &bl);
294 294 if (!al || !bl)
295 295 goto nomem;
296 296
297 297 l = diff(al, an, bl, bn);
298 298 if (!l.head)
299 299 goto nomem;
300 300
301 301 /* calculate length of output */
302 302 for(h = l.base; h != l.head; h++) {
303 303 if (h->a1 != la || h->b1 != lb)
304 304 len += 12 + bl[h->b1].l - bl[lb].l;
305 305 la = h->a2;
306 306 lb = h->b2;
307 307 }
308 308
309 309 result = PyString_FromStringAndSize(NULL, len);
310 310 if (!result)
311 311 goto nomem;
312 312
313 313 /* build binary patch */
314 314 rb = PyString_AsString(result);
315 315 la = lb = 0;
316 316
317 317 for(h = l.base; h != l.head; h++) {
318 318 if (h->a1 != la || h->b1 != lb) {
319 319 len = bl[h->b1].l - bl[lb].l;
320 320 *(uint32_t *)(encode) = htonl(al[la].l - al->l);
321 321 *(uint32_t *)(encode + 4) = htonl(al[h->a1].l - al->l);
322 322 *(uint32_t *)(encode + 8) = htonl(len);
323 323 memcpy(rb, encode, 12);
324 324 memcpy(rb + 12, bl[lb].l, len);
325 325 rb += 12 + len;
326 326 }
327 327 la = h->a2;
328 328 lb = h->b2;
329 329 }
330 330
331 331 nomem:
332 332 free(al);
333 333 free(bl);
334 334 free(l.base);
335 335 return result ? result : PyErr_NoMemory();
336 336 }
337 337
338 338 static char mdiff_doc[] = "Efficient binary diff.";
339 339
340 340 static PyMethodDef methods[] = {
341 341 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
342 342 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
343 343 {NULL, NULL}
344 344 };
345 345
346 346 PyMODINIT_FUNC initbdiff(void)
347 347 {
348 348 Py_InitModule3("bdiff", methods, mdiff_doc);
349 349 }
@@ -1,331 +1,331 b''
1 1 /*
2 2 mpatch.c - efficient binary patching for Mercurial
3 3
4 4 This implements a patch algorithm that's O(m + nlog n) where m is the
5 5 size of the output and n is the number of patches.
6 6
7 7 Given a list of binary patches, it unpacks each into a hunk list,
8 8 then combines the hunk lists with a treewise recursion to form a
9 9 single hunk list. This hunk list is then applied to the original
10 10 text.
11 11
12 12 The text (or binary) fragments are copied directly from their source
13 13 Python objects into a preallocated output string to avoid the
14 14 allocation of intermediate Python objects. Working memory is about 2x
15 15 the total number of hunks.
16 16
17 17 Copyright 2005 Matt Mackall <mpm@selenic.com>
18 18
19 19 This software may be used and distributed according to the terms
20 20 of the GNU General Public License, incorporated herein by reference.
21 21 */
22 22
23 23 #include <Python.h>
24 24 #include <stdlib.h>
25 25 #include <string.h>
26 26 #ifdef _WIN32
27 27 #ifdef _MSC_VER
28 28 #define inline __inline
29 29 typedef unsigned long uint32_t;
30 30 #else
31 31 #include <stdint.h>
32 32 #endif
33 33 static uint32_t ntohl(uint32_t x)
34 34 {
35 35 return ((x & 0x000000ffUL) << 24) |
36 36 ((x & 0x0000ff00UL) << 8) |
37 37 ((x & 0x00ff0000UL) >> 8) |
38 38 ((x & 0xff000000UL) >> 24);
39 39 }
40 40 #else
41 41 #include <sys/types.h>
42 #include <netinet/in.h>
42 #include <arpa/inet.h>
43 43 #endif
44 44
45 45 static char mpatch_doc[] = "Efficient binary patching.";
46 46
47 47 struct frag {
48 48 int start, end, len;
49 49 char *data;
50 50 };
51 51
52 52 struct flist {
53 53 struct frag *base, *head, *tail;
54 54 };
55 55
56 56 static struct flist *lalloc(int size)
57 57 {
58 58 struct flist *a = NULL;
59 59
60 60 a = malloc(sizeof(struct flist));
61 61 if (a) {
62 62 a->base = malloc(sizeof(struct frag) * size);
63 63 if (!a->base) {
64 64 free(a);
65 65 a = NULL;
66 66 } else
67 67 a->head = a->tail = a->base;
68 68 }
69 69 return a;
70 70 }
71 71
72 72 static void lfree(struct flist *a)
73 73 {
74 74 if (a) {
75 75 free(a->base);
76 76 free(a);
77 77 }
78 78 }
79 79
80 80 static int lsize(struct flist *a)
81 81 {
82 82 return a->tail - a->head;
83 83 }
84 84
85 85 /* move hunks in source that are less cut to dest, compensating
86 86 for changes in offset. the last hunk may be split if necessary.
87 87 */
88 88 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
89 89 {
90 90 struct frag *d = dest->tail, *s = src->head;
91 91 int postend, c, l;
92 92
93 93 while (s != src->tail) {
94 94 if (s->start + offset >= cut)
95 95 break; /* we've gone far enough */
96 96
97 97 postend = offset + s->start + s->len;
98 98 if (postend <= cut) {
99 99 /* save this hunk */
100 100 offset += s->start + s->len - s->end;
101 101 *d++ = *s++;
102 102 }
103 103 else {
104 104 /* break up this hunk */
105 105 c = cut - offset;
106 106 if (s->end < c)
107 107 c = s->end;
108 108 l = cut - offset - s->start;
109 109 if (s->len < l)
110 110 l = s->len;
111 111
112 112 offset += s->start + l - c;
113 113
114 114 d->start = s->start;
115 115 d->end = c;
116 116 d->len = l;
117 117 d->data = s->data;
118 118 d++;
119 119 s->start = c;
120 120 s->len = s->len - l;
121 121 s->data = s->data + l;
122 122
123 123 break;
124 124 }
125 125 }
126 126
127 127 dest->tail = d;
128 128 src->head = s;
129 129 return offset;
130 130 }
131 131
132 132 /* like gather, but with no output list */
133 133 static int discard(struct flist *src, int cut, int offset)
134 134 {
135 135 struct frag *s = src->head;
136 136 int postend, c, l;
137 137
138 138 while (s != src->tail) {
139 139 if (s->start + offset >= cut)
140 140 break;
141 141
142 142 postend = offset + s->start + s->len;
143 143 if (postend <= cut) {
144 144 offset += s->start + s->len - s->end;
145 145 s++;
146 146 }
147 147 else {
148 148 c = cut - offset;
149 149 if (s->end < c)
150 150 c = s->end;
151 151 l = cut - offset - s->start;
152 152 if (s->len < l)
153 153 l = s->len;
154 154
155 155 offset += s->start + l - c;
156 156 s->start = c;
157 157 s->len = s->len - l;
158 158 s->data = s->data + l;
159 159
160 160 break;
161 161 }
162 162 }
163 163
164 164 src->head = s;
165 165 return offset;
166 166 }
167 167
168 168 /* combine hunk lists a and b, while adjusting b for offset changes in a/
169 169 this deletes a and b and returns the resultant list. */
170 170 static struct flist *combine(struct flist *a, struct flist *b)
171 171 {
172 172 struct flist *c = NULL;
173 173 struct frag *bh, *ct;
174 174 int offset = 0, post;
175 175
176 176 if (a && b)
177 177 c = lalloc((lsize(a) + lsize(b)) * 2);
178 178
179 179 if (c) {
180 180
181 181 for (bh = b->head; bh != b->tail; bh++) {
182 182 /* save old hunks */
183 183 offset = gather(c, a, bh->start, offset);
184 184
185 185 /* discard replaced hunks */
186 186 post = discard(a, bh->end, offset);
187 187
188 188 /* insert new hunk */
189 189 ct = c->tail;
190 190 ct->start = bh->start - offset;
191 191 ct->end = bh->end - post;
192 192 ct->len = bh->len;
193 193 ct->data = bh->data;
194 194 c->tail++;
195 195 offset = post;
196 196 }
197 197
198 198 /* hold on to tail from a */
199 199 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
200 200 c->tail += lsize(a);
201 201 }
202 202
203 203 lfree(a);
204 204 lfree(b);
205 205 return c;
206 206 }
207 207
208 208 /* decode a binary patch into a hunk list */
209 209 static struct flist *decode(char *bin, int len)
210 210 {
211 211 struct flist *l;
212 212 struct frag *lt;
213 213 char *end = bin + len;
214 214 char decode[12]; /* for dealing with alignment issues */
215 215
216 216 /* assume worst case size, we won't have many of these lists */
217 217 l = lalloc(len / 12);
218 218 lt = l->tail;
219 219
220 220 while (bin < end) {
221 221 memcpy(decode, bin, 12);
222 222 lt->start = ntohl(*(uint32_t *)decode);
223 223 lt->end = ntohl(*(uint32_t *)(decode + 4));
224 224 lt->len = ntohl(*(uint32_t *)(decode + 8));
225 225 lt->data = bin + 12;
226 226 bin += 12 + lt->len;
227 227 lt++;
228 228 }
229 229
230 230 l->tail = lt;
231 231 return l;
232 232 }
233 233
234 234 /* calculate the size of resultant text */
235 235 static int calcsize(int len, struct flist *l)
236 236 {
237 237 int outlen = 0, last = 0;
238 238 struct frag *f = l->head;
239 239
240 240 while (f != l->tail) {
241 241 outlen += f->start - last;
242 242 last = f->end;
243 243 outlen += f->len;
244 244 f++;
245 245 }
246 246
247 247 outlen += len - last;
248 248 return outlen;
249 249 }
250 250
251 251 static void apply(char *buf, char *orig, int len, struct flist *l)
252 252 {
253 253 struct frag *f = l->head;
254 254 int last = 0;
255 255 char *p = buf;
256 256
257 257 while (f != l->tail) {
258 258 memcpy(p, orig + last, f->start - last);
259 259 p += f->start - last;
260 260 memcpy(p, f->data, f->len);
261 261 last = f->end;
262 262 p += f->len;
263 263 f++;
264 264 }
265 265 memcpy(p, orig + last, len - last);
266 266 }
267 267
268 268 /* recursively generate a patch of all bins between start and end */
269 269 static struct flist *fold(PyObject *bins, int start, int end)
270 270 {
271 271 int len;
272 272
273 273 if (start + 1 == end) {
274 274 /* trivial case, output a decoded list */
275 275 PyObject *tmp = PyList_GetItem(bins, start);
276 276 if (!tmp)
277 277 return NULL;
278 278 return decode(PyString_AsString(tmp), PyString_Size(tmp));
279 279 }
280 280
281 281 /* divide and conquer, memory management is elsewhere */
282 282 len = (end - start) / 2;
283 283 return combine(fold(bins, start, start + len),
284 284 fold(bins, start + len, end));
285 285 }
286 286
287 287 static PyObject *
288 288 patches(PyObject *self, PyObject *args)
289 289 {
290 290 PyObject *text, *bins, *result;
291 291 struct flist *patch;
292 292 char *in, *out;
293 293 int len, outlen;
294 294
295 295 if (!PyArg_ParseTuple(args, "SO:mpatch", &text, &bins))
296 296 return NULL;
297 297
298 298 len = PyList_Size(bins);
299 299 if (!len) {
300 300 /* nothing to do */
301 301 Py_INCREF(text);
302 302 return text;
303 303 }
304 304
305 305 patch = fold(bins, 0, len);
306 306 if (!patch)
307 307 return PyErr_NoMemory();
308 308
309 309 outlen = calcsize(PyString_Size(text), patch);
310 310 result = PyString_FromStringAndSize(NULL, outlen);
311 311 if (result) {
312 312 in = PyString_AsString(text);
313 313 out = PyString_AsString(result);
314 314 apply(out, in, PyString_Size(text), patch);
315 315 }
316 316
317 317 lfree(patch);
318 318 return result;
319 319 }
320 320
321 321 static PyMethodDef methods[] = {
322 322 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
323 323 {NULL, NULL}
324 324 };
325 325
326 326 PyMODINIT_FUNC
327 327 initmpatch(void)
328 328 {
329 329 Py_InitModule3("mpatch", methods, mpatch_doc);
330 330 }
331 331
General Comments 0
You need to be logged in to leave comments. Login now