##// END OF EJS Templates
bdiff: fix malloc(0) issue in fixws()...
Jim Hague -
r16071:8134ec86 stable
parent child Browse files
Show More
@@ -1,499 +1,499 b''
1 /*
1 /*
2 bdiff.c - efficient binary diff extension for Mercurial
2 bdiff.c - efficient binary diff extension for Mercurial
3
3
4 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8
8
9 Based roughly on Python difflib
9 Based roughly on Python difflib
10 */
10 */
11
11
12 #include <Python.h>
12 #include <Python.h>
13 #include <stdlib.h>
13 #include <stdlib.h>
14 #include <string.h>
14 #include <string.h>
15 #include <limits.h>
15 #include <limits.h>
16
16
17 #if defined __hpux || defined __SUNPRO_C || defined _AIX
17 #if defined __hpux || defined __SUNPRO_C || defined _AIX
18 #define inline
18 #define inline
19 #endif
19 #endif
20
20
21 #ifdef __linux
21 #ifdef __linux
22 #define inline __inline
22 #define inline __inline
23 #endif
23 #endif
24
24
25 #ifdef _WIN32
25 #ifdef _WIN32
26 #ifdef _MSC_VER
26 #ifdef _MSC_VER
27 #define inline __inline
27 #define inline __inline
28 typedef unsigned long uint32_t;
28 typedef unsigned long uint32_t;
29 #else
29 #else
30 #include <stdint.h>
30 #include <stdint.h>
31 #endif
31 #endif
32 static uint32_t htonl(uint32_t x)
32 static uint32_t htonl(uint32_t x)
33 {
33 {
34 return ((x & 0x000000ffUL) << 24) |
34 return ((x & 0x000000ffUL) << 24) |
35 ((x & 0x0000ff00UL) << 8) |
35 ((x & 0x0000ff00UL) << 8) |
36 ((x & 0x00ff0000UL) >> 8) |
36 ((x & 0x00ff0000UL) >> 8) |
37 ((x & 0xff000000UL) >> 24);
37 ((x & 0xff000000UL) >> 24);
38 }
38 }
39 #else
39 #else
40 #include <sys/types.h>
40 #include <sys/types.h>
41 #if defined __BEOS__ && !defined __HAIKU__
41 #if defined __BEOS__ && !defined __HAIKU__
42 #include <ByteOrder.h>
42 #include <ByteOrder.h>
43 #else
43 #else
44 #include <arpa/inet.h>
44 #include <arpa/inet.h>
45 #endif
45 #endif
46 #include <inttypes.h>
46 #include <inttypes.h>
47 #endif
47 #endif
48
48
49 #include "util.h"
49 #include "util.h"
50
50
51 struct line {
51 struct line {
52 int hash, len, n, e;
52 int hash, len, n, e;
53 const char *l;
53 const char *l;
54 };
54 };
55
55
56 struct pos {
56 struct pos {
57 int pos, len;
57 int pos, len;
58 };
58 };
59
59
60 struct hunk;
60 struct hunk;
61 struct hunk {
61 struct hunk {
62 int a1, a2, b1, b2;
62 int a1, a2, b1, b2;
63 struct hunk *next;
63 struct hunk *next;
64 };
64 };
65
65
66 static int splitlines(const char *a, int len, struct line **lr)
66 static int splitlines(const char *a, int len, struct line **lr)
67 {
67 {
68 unsigned hash;
68 unsigned hash;
69 int i;
69 int i;
70 const char *p, *b = a;
70 const char *p, *b = a;
71 const char * const plast = a + len - 1;
71 const char * const plast = a + len - 1;
72 struct line *l;
72 struct line *l;
73
73
74 /* count the lines */
74 /* count the lines */
75 i = 1; /* extra line for sentinel */
75 i = 1; /* extra line for sentinel */
76 for (p = a; p < a + len; p++)
76 for (p = a; p < a + len; p++)
77 if (*p == '\n' || p == plast)
77 if (*p == '\n' || p == plast)
78 i++;
78 i++;
79
79
80 *lr = l = (struct line *)malloc(sizeof(struct line) * i);
80 *lr = l = (struct line *)malloc(sizeof(struct line) * i);
81 if (!l)
81 if (!l)
82 return -1;
82 return -1;
83
83
84 /* build the line array and calculate hashes */
84 /* build the line array and calculate hashes */
85 hash = 0;
85 hash = 0;
86 for (p = a; p < a + len; p++) {
86 for (p = a; p < a + len; p++) {
87 /* Leonid Yuriev's hash */
87 /* Leonid Yuriev's hash */
88 hash = (hash * 1664525) + (unsigned char)*p + 1013904223;
88 hash = (hash * 1664525) + (unsigned char)*p + 1013904223;
89
89
90 if (*p == '\n' || p == plast) {
90 if (*p == '\n' || p == plast) {
91 l->hash = hash;
91 l->hash = hash;
92 hash = 0;
92 hash = 0;
93 l->len = p - b + 1;
93 l->len = p - b + 1;
94 l->l = b;
94 l->l = b;
95 l->n = INT_MAX;
95 l->n = INT_MAX;
96 l++;
96 l++;
97 b = p + 1;
97 b = p + 1;
98 }
98 }
99 }
99 }
100
100
101 /* set up a sentinel */
101 /* set up a sentinel */
102 l->hash = 0;
102 l->hash = 0;
103 l->len = 0;
103 l->len = 0;
104 l->l = a + len;
104 l->l = a + len;
105 return i - 1;
105 return i - 1;
106 }
106 }
107
107
108 static inline int cmp(struct line *a, struct line *b)
108 static inline int cmp(struct line *a, struct line *b)
109 {
109 {
110 return a->hash != b->hash || a->len != b->len || memcmp(a->l, b->l, a->len);
110 return a->hash != b->hash || a->len != b->len || memcmp(a->l, b->l, a->len);
111 }
111 }
112
112
113 static int equatelines(struct line *a, int an, struct line *b, int bn)
113 static int equatelines(struct line *a, int an, struct line *b, int bn)
114 {
114 {
115 int i, j, buckets = 1, t, scale;
115 int i, j, buckets = 1, t, scale;
116 struct pos *h = NULL;
116 struct pos *h = NULL;
117
117
118 /* build a hash table of the next highest power of 2 */
118 /* build a hash table of the next highest power of 2 */
119 while (buckets < bn + 1)
119 while (buckets < bn + 1)
120 buckets *= 2;
120 buckets *= 2;
121
121
122 /* try to allocate a large hash table to avoid collisions */
122 /* try to allocate a large hash table to avoid collisions */
123 for (scale = 4; scale; scale /= 2) {
123 for (scale = 4; scale; scale /= 2) {
124 h = (struct pos *)malloc(scale * buckets * sizeof(struct pos));
124 h = (struct pos *)malloc(scale * buckets * sizeof(struct pos));
125 if (h)
125 if (h)
126 break;
126 break;
127 }
127 }
128
128
129 if (!h)
129 if (!h)
130 return 0;
130 return 0;
131
131
132 buckets = buckets * scale - 1;
132 buckets = buckets * scale - 1;
133
133
134 /* clear the hash table */
134 /* clear the hash table */
135 for (i = 0; i <= buckets; i++) {
135 for (i = 0; i <= buckets; i++) {
136 h[i].pos = INT_MAX;
136 h[i].pos = INT_MAX;
137 h[i].len = 0;
137 h[i].len = 0;
138 }
138 }
139
139
140 /* add lines to the hash table chains */
140 /* add lines to the hash table chains */
141 for (i = bn - 1; i >= 0; i--) {
141 for (i = bn - 1; i >= 0; i--) {
142 /* find the equivalence class */
142 /* find the equivalence class */
143 for (j = b[i].hash & buckets; h[j].pos != INT_MAX;
143 for (j = b[i].hash & buckets; h[j].pos != INT_MAX;
144 j = (j + 1) & buckets)
144 j = (j + 1) & buckets)
145 if (!cmp(b + i, b + h[j].pos))
145 if (!cmp(b + i, b + h[j].pos))
146 break;
146 break;
147
147
148 /* add to the head of the equivalence class */
148 /* add to the head of the equivalence class */
149 b[i].n = h[j].pos;
149 b[i].n = h[j].pos;
150 b[i].e = j;
150 b[i].e = j;
151 h[j].pos = i;
151 h[j].pos = i;
152 h[j].len++; /* keep track of popularity */
152 h[j].len++; /* keep track of popularity */
153 }
153 }
154
154
155 /* compute popularity threshold */
155 /* compute popularity threshold */
156 t = (bn >= 31000) ? bn / 1000 : 1000000 / (bn + 1);
156 t = (bn >= 31000) ? bn / 1000 : 1000000 / (bn + 1);
157
157
158 /* match items in a to their equivalence class in b */
158 /* match items in a to their equivalence class in b */
159 for (i = 0; i < an; i++) {
159 for (i = 0; i < an; i++) {
160 /* find the equivalence class */
160 /* find the equivalence class */
161 for (j = a[i].hash & buckets; h[j].pos != INT_MAX;
161 for (j = a[i].hash & buckets; h[j].pos != INT_MAX;
162 j = (j + 1) & buckets)
162 j = (j + 1) & buckets)
163 if (!cmp(a + i, b + h[j].pos))
163 if (!cmp(a + i, b + h[j].pos))
164 break;
164 break;
165
165
166 a[i].e = j; /* use equivalence class for quick compare */
166 a[i].e = j; /* use equivalence class for quick compare */
167 if (h[j].len <= t)
167 if (h[j].len <= t)
168 a[i].n = h[j].pos; /* point to head of match list */
168 a[i].n = h[j].pos; /* point to head of match list */
169 else
169 else
170 a[i].n = INT_MAX; /* too popular */
170 a[i].n = INT_MAX; /* too popular */
171 }
171 }
172
172
173 /* discard hash tables */
173 /* discard hash tables */
174 free(h);
174 free(h);
175 return 1;
175 return 1;
176 }
176 }
177
177
178 static int longest_match(struct line *a, struct line *b, struct pos *pos,
178 static int longest_match(struct line *a, struct line *b, struct pos *pos,
179 int a1, int a2, int b1, int b2, int *omi, int *omj)
179 int a1, int a2, int b1, int b2, int *omi, int *omj)
180 {
180 {
181 int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;
181 int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;
182
182
183 for (i = a1; i < a2; i++) {
183 for (i = a1; i < a2; i++) {
184 /* skip things before the current block */
184 /* skip things before the current block */
185 for (j = a[i].n; j < b1; j = b[j].n)
185 for (j = a[i].n; j < b1; j = b[j].n)
186 ;
186 ;
187
187
188 /* loop through all lines match a[i] in b */
188 /* loop through all lines match a[i] in b */
189 for (; j < b2; j = b[j].n) {
189 for (; j < b2; j = b[j].n) {
190 /* does this extend an earlier match? */
190 /* does this extend an earlier match? */
191 if (i > a1 && j > b1 && pos[j - 1].pos == i - 1)
191 if (i > a1 && j > b1 && pos[j - 1].pos == i - 1)
192 k = pos[j - 1].len + 1;
192 k = pos[j - 1].len + 1;
193 else
193 else
194 k = 1;
194 k = 1;
195 pos[j].pos = i;
195 pos[j].pos = i;
196 pos[j].len = k;
196 pos[j].len = k;
197
197
198 /* best match so far? */
198 /* best match so far? */
199 if (k > mk) {
199 if (k > mk) {
200 mi = i;
200 mi = i;
201 mj = j;
201 mj = j;
202 mk = k;
202 mk = k;
203 }
203 }
204 }
204 }
205 }
205 }
206
206
207 if (mk) {
207 if (mk) {
208 mi = mi - mk + 1;
208 mi = mi - mk + 1;
209 mj = mj - mk + 1;
209 mj = mj - mk + 1;
210 }
210 }
211
211
212 /* expand match to include neighboring popular lines */
212 /* expand match to include neighboring popular lines */
213 while (mi - mb > a1 && mj - mb > b1 &&
213 while (mi - mb > a1 && mj - mb > b1 &&
214 a[mi - mb - 1].e == b[mj - mb - 1].e)
214 a[mi - mb - 1].e == b[mj - mb - 1].e)
215 mb++;
215 mb++;
216 while (mi + mk < a2 && mj + mk < b2 &&
216 while (mi + mk < a2 && mj + mk < b2 &&
217 a[mi + mk].e == b[mj + mk].e)
217 a[mi + mk].e == b[mj + mk].e)
218 mk++;
218 mk++;
219
219
220 *omi = mi - mb;
220 *omi = mi - mb;
221 *omj = mj - mb;
221 *omj = mj - mb;
222
222
223 return mk + mb;
223 return mk + mb;
224 }
224 }
225
225
226 static struct hunk *recurse(struct line *a, struct line *b, struct pos *pos,
226 static struct hunk *recurse(struct line *a, struct line *b, struct pos *pos,
227 int a1, int a2, int b1, int b2, struct hunk *l)
227 int a1, int a2, int b1, int b2, struct hunk *l)
228 {
228 {
229 int i, j, k;
229 int i, j, k;
230
230
231 while (1) {
231 while (1) {
232 /* find the longest match in this chunk */
232 /* find the longest match in this chunk */
233 k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
233 k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
234 if (!k)
234 if (!k)
235 return l;
235 return l;
236
236
237 /* and recurse on the remaining chunks on either side */
237 /* and recurse on the remaining chunks on either side */
238 l = recurse(a, b, pos, a1, i, b1, j, l);
238 l = recurse(a, b, pos, a1, i, b1, j, l);
239 if (!l)
239 if (!l)
240 return NULL;
240 return NULL;
241
241
242 l->next = (struct hunk *)malloc(sizeof(struct hunk));
242 l->next = (struct hunk *)malloc(sizeof(struct hunk));
243 if (!l->next)
243 if (!l->next)
244 return NULL;
244 return NULL;
245
245
246 l = l->next;
246 l = l->next;
247 l->a1 = i;
247 l->a1 = i;
248 l->a2 = i + k;
248 l->a2 = i + k;
249 l->b1 = j;
249 l->b1 = j;
250 l->b2 = j + k;
250 l->b2 = j + k;
251 l->next = NULL;
251 l->next = NULL;
252
252
253 /* tail-recursion didn't happen, so do equivalent iteration */
253 /* tail-recursion didn't happen, so do equivalent iteration */
254 a1 = i + k;
254 a1 = i + k;
255 b1 = j + k;
255 b1 = j + k;
256 }
256 }
257 }
257 }
258
258
259 static int diff(struct line *a, int an, struct line *b, int bn,
259 static int diff(struct line *a, int an, struct line *b, int bn,
260 struct hunk *base)
260 struct hunk *base)
261 {
261 {
262 struct hunk *curr;
262 struct hunk *curr;
263 struct pos *pos;
263 struct pos *pos;
264 int t, count = 0;
264 int t, count = 0;
265
265
266 /* allocate and fill arrays */
266 /* allocate and fill arrays */
267 t = equatelines(a, an, b, bn);
267 t = equatelines(a, an, b, bn);
268 pos = (struct pos *)calloc(bn ? bn : 1, sizeof(struct pos));
268 pos = (struct pos *)calloc(bn ? bn : 1, sizeof(struct pos));
269
269
270 if (pos && t) {
270 if (pos && t) {
271 /* generate the matching block list */
271 /* generate the matching block list */
272
272
273 curr = recurse(a, b, pos, 0, an, 0, bn, base);
273 curr = recurse(a, b, pos, 0, an, 0, bn, base);
274 if (!curr)
274 if (!curr)
275 return -1;
275 return -1;
276
276
277 /* sentinel end hunk */
277 /* sentinel end hunk */
278 curr->next = (struct hunk *)malloc(sizeof(struct hunk));
278 curr->next = (struct hunk *)malloc(sizeof(struct hunk));
279 if (!curr->next)
279 if (!curr->next)
280 return -1;
280 return -1;
281 curr = curr->next;
281 curr = curr->next;
282 curr->a1 = curr->a2 = an;
282 curr->a1 = curr->a2 = an;
283 curr->b1 = curr->b2 = bn;
283 curr->b1 = curr->b2 = bn;
284 curr->next = NULL;
284 curr->next = NULL;
285 }
285 }
286
286
287 free(pos);
287 free(pos);
288
288
289 /* normalize the hunk list, try to push each hunk towards the end */
289 /* normalize the hunk list, try to push each hunk towards the end */
290 for (curr = base->next; curr; curr = curr->next) {
290 for (curr = base->next; curr; curr = curr->next) {
291 struct hunk *next = curr->next;
291 struct hunk *next = curr->next;
292 int shift = 0;
292 int shift = 0;
293
293
294 if (!next)
294 if (!next)
295 break;
295 break;
296
296
297 if (curr->a2 == next->a1)
297 if (curr->a2 == next->a1)
298 while (curr->a2 + shift < an && curr->b2 + shift < bn
298 while (curr->a2 + shift < an && curr->b2 + shift < bn
299 && !cmp(a + curr->a2 + shift,
299 && !cmp(a + curr->a2 + shift,
300 b + curr->b2 + shift))
300 b + curr->b2 + shift))
301 shift++;
301 shift++;
302 else if (curr->b2 == next->b1)
302 else if (curr->b2 == next->b1)
303 while (curr->b2 + shift < bn && curr->a2 + shift < an
303 while (curr->b2 + shift < bn && curr->a2 + shift < an
304 && !cmp(b + curr->b2 + shift,
304 && !cmp(b + curr->b2 + shift,
305 a + curr->a2 + shift))
305 a + curr->a2 + shift))
306 shift++;
306 shift++;
307 if (!shift)
307 if (!shift)
308 continue;
308 continue;
309 curr->b2 += shift;
309 curr->b2 += shift;
310 next->b1 += shift;
310 next->b1 += shift;
311 curr->a2 += shift;
311 curr->a2 += shift;
312 next->a1 += shift;
312 next->a1 += shift;
313 }
313 }
314
314
315 for (curr = base->next; curr; curr = curr->next)
315 for (curr = base->next; curr; curr = curr->next)
316 count++;
316 count++;
317 return count;
317 return count;
318 }
318 }
319
319
320 static void freehunks(struct hunk *l)
320 static void freehunks(struct hunk *l)
321 {
321 {
322 struct hunk *n;
322 struct hunk *n;
323 for (; l; l = n) {
323 for (; l; l = n) {
324 n = l->next;
324 n = l->next;
325 free(l);
325 free(l);
326 }
326 }
327 }
327 }
328
328
329 static PyObject *blocks(PyObject *self, PyObject *args)
329 static PyObject *blocks(PyObject *self, PyObject *args)
330 {
330 {
331 PyObject *sa, *sb, *rl = NULL, *m;
331 PyObject *sa, *sb, *rl = NULL, *m;
332 struct line *a, *b;
332 struct line *a, *b;
333 struct hunk l, *h;
333 struct hunk l, *h;
334 int an, bn, count, pos = 0;
334 int an, bn, count, pos = 0;
335
335
336 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
336 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
337 return NULL;
337 return NULL;
338
338
339 an = splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a);
339 an = splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a);
340 bn = splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b);
340 bn = splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b);
341
341
342 if (!a || !b)
342 if (!a || !b)
343 goto nomem;
343 goto nomem;
344
344
345 l.next = NULL;
345 l.next = NULL;
346 count = diff(a, an, b, bn, &l);
346 count = diff(a, an, b, bn, &l);
347 if (count < 0)
347 if (count < 0)
348 goto nomem;
348 goto nomem;
349
349
350 rl = PyList_New(count);
350 rl = PyList_New(count);
351 if (!rl)
351 if (!rl)
352 goto nomem;
352 goto nomem;
353
353
354 for (h = l.next; h; h = h->next) {
354 for (h = l.next; h; h = h->next) {
355 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
355 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
356 PyList_SetItem(rl, pos, m);
356 PyList_SetItem(rl, pos, m);
357 pos++;
357 pos++;
358 }
358 }
359
359
360 nomem:
360 nomem:
361 free(a);
361 free(a);
362 free(b);
362 free(b);
363 freehunks(l.next);
363 freehunks(l.next);
364 return rl ? rl : PyErr_NoMemory();
364 return rl ? rl : PyErr_NoMemory();
365 }
365 }
366
366
367 static PyObject *bdiff(PyObject *self, PyObject *args)
367 static PyObject *bdiff(PyObject *self, PyObject *args)
368 {
368 {
369 char *sa, *sb, *rb;
369 char *sa, *sb, *rb;
370 PyObject *result = NULL;
370 PyObject *result = NULL;
371 struct line *al, *bl;
371 struct line *al, *bl;
372 struct hunk l, *h;
372 struct hunk l, *h;
373 uint32_t encode[3];
373 uint32_t encode[3];
374 int an, bn, len = 0, la, lb, count;
374 int an, bn, len = 0, la, lb, count;
375
375
376 if (!PyArg_ParseTuple(args, "s#s#:bdiff", &sa, &la, &sb, &lb))
376 if (!PyArg_ParseTuple(args, "s#s#:bdiff", &sa, &la, &sb, &lb))
377 return NULL;
377 return NULL;
378
378
379 an = splitlines(sa, la, &al);
379 an = splitlines(sa, la, &al);
380 bn = splitlines(sb, lb, &bl);
380 bn = splitlines(sb, lb, &bl);
381 if (!al || !bl)
381 if (!al || !bl)
382 goto nomem;
382 goto nomem;
383
383
384 l.next = NULL;
384 l.next = NULL;
385 count = diff(al, an, bl, bn, &l);
385 count = diff(al, an, bl, bn, &l);
386 if (count < 0)
386 if (count < 0)
387 goto nomem;
387 goto nomem;
388
388
389 /* calculate length of output */
389 /* calculate length of output */
390 la = lb = 0;
390 la = lb = 0;
391 for (h = l.next; h; h = h->next) {
391 for (h = l.next; h; h = h->next) {
392 if (h->a1 != la || h->b1 != lb)
392 if (h->a1 != la || h->b1 != lb)
393 len += 12 + bl[h->b1].l - bl[lb].l;
393 len += 12 + bl[h->b1].l - bl[lb].l;
394 la = h->a2;
394 la = h->a2;
395 lb = h->b2;
395 lb = h->b2;
396 }
396 }
397
397
398 result = PyBytes_FromStringAndSize(NULL, len);
398 result = PyBytes_FromStringAndSize(NULL, len);
399
399
400 if (!result)
400 if (!result)
401 goto nomem;
401 goto nomem;
402
402
403 /* build binary patch */
403 /* build binary patch */
404 rb = PyBytes_AsString(result);
404 rb = PyBytes_AsString(result);
405 la = lb = 0;
405 la = lb = 0;
406
406
407 for (h = l.next; h; h = h->next) {
407 for (h = l.next; h; h = h->next) {
408 if (h->a1 != la || h->b1 != lb) {
408 if (h->a1 != la || h->b1 != lb) {
409 len = bl[h->b1].l - bl[lb].l;
409 len = bl[h->b1].l - bl[lb].l;
410 encode[0] = htonl(al[la].l - al->l);
410 encode[0] = htonl(al[la].l - al->l);
411 encode[1] = htonl(al[h->a1].l - al->l);
411 encode[1] = htonl(al[h->a1].l - al->l);
412 encode[2] = htonl(len);
412 encode[2] = htonl(len);
413 memcpy(rb, encode, 12);
413 memcpy(rb, encode, 12);
414 memcpy(rb + 12, bl[lb].l, len);
414 memcpy(rb + 12, bl[lb].l, len);
415 rb += 12 + len;
415 rb += 12 + len;
416 }
416 }
417 la = h->a2;
417 la = h->a2;
418 lb = h->b2;
418 lb = h->b2;
419 }
419 }
420
420
421 nomem:
421 nomem:
422 free(al);
422 free(al);
423 free(bl);
423 free(bl);
424 freehunks(l.next);
424 freehunks(l.next);
425 return result ? result : PyErr_NoMemory();
425 return result ? result : PyErr_NoMemory();
426 }
426 }
427
427
428 /*
428 /*
429 * If allws != 0, remove all whitespace (' ', \t and \r). Otherwise,
429 * If allws != 0, remove all whitespace (' ', \t and \r). Otherwise,
430 * reduce whitespace sequences to a single space and trim remaining whitespace
430 * reduce whitespace sequences to a single space and trim remaining whitespace
431 * from end of lines.
431 * from end of lines.
432 */
432 */
433 static PyObject *fixws(PyObject *self, PyObject *args)
433 static PyObject *fixws(PyObject *self, PyObject *args)
434 {
434 {
435 PyObject *s, *result = NULL;
435 PyObject *s, *result = NULL;
436 char allws, c;
436 char allws, c;
437 const char *r;
437 const char *r;
438 int i, rlen, wlen = 0;
438 int i, rlen, wlen = 0;
439 char *w;
439 char *w;
440
440
441 if (!PyArg_ParseTuple(args, "Sb:fixws", &s, &allws))
441 if (!PyArg_ParseTuple(args, "Sb:fixws", &s, &allws))
442 return NULL;
442 return NULL;
443 r = PyBytes_AsString(s);
443 r = PyBytes_AsString(s);
444 rlen = PyBytes_Size(s);
444 rlen = PyBytes_Size(s);
445
445
446 w = (char *)malloc(rlen);
446 w = (char *)malloc(rlen ? rlen : 1);
447 if (!w)
447 if (!w)
448 goto nomem;
448 goto nomem;
449
449
450 for (i = 0; i != rlen; i++) {
450 for (i = 0; i != rlen; i++) {
451 c = r[i];
451 c = r[i];
452 if (c == ' ' || c == '\t' || c == '\r') {
452 if (c == ' ' || c == '\t' || c == '\r') {
453 if (!allws && (wlen == 0 || w[wlen - 1] != ' '))
453 if (!allws && (wlen == 0 || w[wlen - 1] != ' '))
454 w[wlen++] = ' ';
454 w[wlen++] = ' ';
455 } else if (c == '\n' && !allws
455 } else if (c == '\n' && !allws
456 && wlen > 0 && w[wlen - 1] == ' ') {
456 && wlen > 0 && w[wlen - 1] == ' ') {
457 w[wlen - 1] = '\n';
457 w[wlen - 1] = '\n';
458 } else {
458 } else {
459 w[wlen++] = c;
459 w[wlen++] = c;
460 }
460 }
461 }
461 }
462
462
463 result = PyBytes_FromStringAndSize(w, wlen);
463 result = PyBytes_FromStringAndSize(w, wlen);
464
464
465 nomem:
465 nomem:
466 free(w);
466 free(w);
467 return result ? result : PyErr_NoMemory();
467 return result ? result : PyErr_NoMemory();
468 }
468 }
469
469
470
470
471 static char mdiff_doc[] = "Efficient binary diff.";
471 static char mdiff_doc[] = "Efficient binary diff.";
472
472
473 static PyMethodDef methods[] = {
473 static PyMethodDef methods[] = {
474 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
474 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
475 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
475 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
476 {"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"},
476 {"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"},
477 {NULL, NULL}
477 {NULL, NULL}
478 };
478 };
479
479
480 #ifdef IS_PY3K
480 #ifdef IS_PY3K
481 static struct PyModuleDef bdiff_module = {
481 static struct PyModuleDef bdiff_module = {
482 PyModuleDef_HEAD_INIT,
482 PyModuleDef_HEAD_INIT,
483 "bdiff",
483 "bdiff",
484 mdiff_doc,
484 mdiff_doc,
485 -1,
485 -1,
486 methods
486 methods
487 };
487 };
488
488
489 PyMODINIT_FUNC PyInit_bdiff(void)
489 PyMODINIT_FUNC PyInit_bdiff(void)
490 {
490 {
491 return PyModule_Create(&bdiff_module);
491 return PyModule_Create(&bdiff_module);
492 }
492 }
493 #else
493 #else
494 PyMODINIT_FUNC initbdiff(void)
494 PyMODINIT_FUNC initbdiff(void)
495 {
495 {
496 Py_InitModule3("bdiff", methods, mdiff_doc);
496 Py_InitModule3("bdiff", methods, mdiff_doc);
497 }
497 }
498 #endif
498 #endif
499
499
General Comments 0
You need to be logged in to leave comments. Login now