##// END OF EJS Templates
mdiff: replace wscleanup() regexps with C loops...
Patrick Mezard -
r15530:eeac5e17 default
parent child Browse files
Show More
@@ -1,455 +1,499 b''
1 /*
1 /*
2 bdiff.c - efficient binary diff extension for Mercurial
2 bdiff.c - efficient binary diff extension for Mercurial
3
3
4 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8
8
9 Based roughly on Python difflib
9 Based roughly on Python difflib
10 */
10 */
11
11
12 #include <Python.h>
12 #include <Python.h>
13 #include <stdlib.h>
13 #include <stdlib.h>
14 #include <string.h>
14 #include <string.h>
15 #include <limits.h>
15 #include <limits.h>
16
16
17 #if defined __hpux || defined __SUNPRO_C || defined _AIX
17 #if defined __hpux || defined __SUNPRO_C || defined _AIX
18 #define inline
18 #define inline
19 #endif
19 #endif
20
20
21 #ifdef __linux
21 #ifdef __linux
22 #define inline __inline
22 #define inline __inline
23 #endif
23 #endif
24
24
25 #ifdef _WIN32
25 #ifdef _WIN32
26 #ifdef _MSC_VER
26 #ifdef _MSC_VER
27 #define inline __inline
27 #define inline __inline
28 typedef unsigned long uint32_t;
28 typedef unsigned long uint32_t;
29 #else
29 #else
30 #include <stdint.h>
30 #include <stdint.h>
31 #endif
31 #endif
32 static uint32_t htonl(uint32_t x)
32 static uint32_t htonl(uint32_t x)
33 {
33 {
34 return ((x & 0x000000ffUL) << 24) |
34 return ((x & 0x000000ffUL) << 24) |
35 ((x & 0x0000ff00UL) << 8) |
35 ((x & 0x0000ff00UL) << 8) |
36 ((x & 0x00ff0000UL) >> 8) |
36 ((x & 0x00ff0000UL) >> 8) |
37 ((x & 0xff000000UL) >> 24);
37 ((x & 0xff000000UL) >> 24);
38 }
38 }
39 #else
39 #else
40 #include <sys/types.h>
40 #include <sys/types.h>
41 #if defined __BEOS__ && !defined __HAIKU__
41 #if defined __BEOS__ && !defined __HAIKU__
42 #include <ByteOrder.h>
42 #include <ByteOrder.h>
43 #else
43 #else
44 #include <arpa/inet.h>
44 #include <arpa/inet.h>
45 #endif
45 #endif
46 #include <inttypes.h>
46 #include <inttypes.h>
47 #endif
47 #endif
48
48
49 #include "util.h"
49 #include "util.h"
50
50
51 struct line {
51 struct line {
52 int hash, len, n, e;
52 int hash, len, n, e;
53 const char *l;
53 const char *l;
54 };
54 };
55
55
56 struct pos {
56 struct pos {
57 int pos, len;
57 int pos, len;
58 };
58 };
59
59
60 struct hunk;
60 struct hunk;
61 struct hunk {
61 struct hunk {
62 int a1, a2, b1, b2;
62 int a1, a2, b1, b2;
63 struct hunk *next;
63 struct hunk *next;
64 };
64 };
65
65
66 static int splitlines(const char *a, int len, struct line **lr)
66 static int splitlines(const char *a, int len, struct line **lr)
67 {
67 {
68 unsigned hash;
68 unsigned hash;
69 int i;
69 int i;
70 const char *p, *b = a;
70 const char *p, *b = a;
71 const char * const plast = a + len - 1;
71 const char * const plast = a + len - 1;
72 struct line *l;
72 struct line *l;
73
73
74 /* count the lines */
74 /* count the lines */
75 i = 1; /* extra line for sentinel */
75 i = 1; /* extra line for sentinel */
76 for (p = a; p < a + len; p++)
76 for (p = a; p < a + len; p++)
77 if (*p == '\n' || p == plast)
77 if (*p == '\n' || p == plast)
78 i++;
78 i++;
79
79
80 *lr = l = (struct line *)malloc(sizeof(struct line) * i);
80 *lr = l = (struct line *)malloc(sizeof(struct line) * i);
81 if (!l)
81 if (!l)
82 return -1;
82 return -1;
83
83
84 /* build the line array and calculate hashes */
84 /* build the line array and calculate hashes */
85 hash = 0;
85 hash = 0;
86 for (p = a; p < a + len; p++) {
86 for (p = a; p < a + len; p++) {
87 /* Leonid Yuriev's hash */
87 /* Leonid Yuriev's hash */
88 hash = (hash * 1664525) + (unsigned char)*p + 1013904223;
88 hash = (hash * 1664525) + (unsigned char)*p + 1013904223;
89
89
90 if (*p == '\n' || p == plast) {
90 if (*p == '\n' || p == plast) {
91 l->hash = hash;
91 l->hash = hash;
92 hash = 0;
92 hash = 0;
93 l->len = p - b + 1;
93 l->len = p - b + 1;
94 l->l = b;
94 l->l = b;
95 l->n = INT_MAX;
95 l->n = INT_MAX;
96 l++;
96 l++;
97 b = p + 1;
97 b = p + 1;
98 }
98 }
99 }
99 }
100
100
101 /* set up a sentinel */
101 /* set up a sentinel */
102 l->hash = 0;
102 l->hash = 0;
103 l->len = 0;
103 l->len = 0;
104 l->l = a + len;
104 l->l = a + len;
105 return i - 1;
105 return i - 1;
106 }
106 }
107
107
108 static inline int cmp(struct line *a, struct line *b)
108 static inline int cmp(struct line *a, struct line *b)
109 {
109 {
110 return a->hash != b->hash || a->len != b->len || memcmp(a->l, b->l, a->len);
110 return a->hash != b->hash || a->len != b->len || memcmp(a->l, b->l, a->len);
111 }
111 }
112
112
113 static int equatelines(struct line *a, int an, struct line *b, int bn)
113 static int equatelines(struct line *a, int an, struct line *b, int bn)
114 {
114 {
115 int i, j, buckets = 1, t, scale;
115 int i, j, buckets = 1, t, scale;
116 struct pos *h = NULL;
116 struct pos *h = NULL;
117
117
118 /* build a hash table of the next highest power of 2 */
118 /* build a hash table of the next highest power of 2 */
119 while (buckets < bn + 1)
119 while (buckets < bn + 1)
120 buckets *= 2;
120 buckets *= 2;
121
121
122 /* try to allocate a large hash table to avoid collisions */
122 /* try to allocate a large hash table to avoid collisions */
123 for (scale = 4; scale; scale /= 2) {
123 for (scale = 4; scale; scale /= 2) {
124 h = (struct pos *)malloc(scale * buckets * sizeof(struct pos));
124 h = (struct pos *)malloc(scale * buckets * sizeof(struct pos));
125 if (h)
125 if (h)
126 break;
126 break;
127 }
127 }
128
128
129 if (!h)
129 if (!h)
130 return 0;
130 return 0;
131
131
132 buckets = buckets * scale - 1;
132 buckets = buckets * scale - 1;
133
133
134 /* clear the hash table */
134 /* clear the hash table */
135 for (i = 0; i <= buckets; i++) {
135 for (i = 0; i <= buckets; i++) {
136 h[i].pos = INT_MAX;
136 h[i].pos = INT_MAX;
137 h[i].len = 0;
137 h[i].len = 0;
138 }
138 }
139
139
140 /* add lines to the hash table chains */
140 /* add lines to the hash table chains */
141 for (i = bn - 1; i >= 0; i--) {
141 for (i = bn - 1; i >= 0; i--) {
142 /* find the equivalence class */
142 /* find the equivalence class */
143 for (j = b[i].hash & buckets; h[j].pos != INT_MAX;
143 for (j = b[i].hash & buckets; h[j].pos != INT_MAX;
144 j = (j + 1) & buckets)
144 j = (j + 1) & buckets)
145 if (!cmp(b + i, b + h[j].pos))
145 if (!cmp(b + i, b + h[j].pos))
146 break;
146 break;
147
147
148 /* add to the head of the equivalence class */
148 /* add to the head of the equivalence class */
149 b[i].n = h[j].pos;
149 b[i].n = h[j].pos;
150 b[i].e = j;
150 b[i].e = j;
151 h[j].pos = i;
151 h[j].pos = i;
152 h[j].len++; /* keep track of popularity */
152 h[j].len++; /* keep track of popularity */
153 }
153 }
154
154
155 /* compute popularity threshold */
155 /* compute popularity threshold */
156 t = (bn >= 31000) ? bn / 1000 : 1000000 / (bn + 1);
156 t = (bn >= 31000) ? bn / 1000 : 1000000 / (bn + 1);
157
157
158 /* match items in a to their equivalence class in b */
158 /* match items in a to their equivalence class in b */
159 for (i = 0; i < an; i++) {
159 for (i = 0; i < an; i++) {
160 /* find the equivalence class */
160 /* find the equivalence class */
161 for (j = a[i].hash & buckets; h[j].pos != INT_MAX;
161 for (j = a[i].hash & buckets; h[j].pos != INT_MAX;
162 j = (j + 1) & buckets)
162 j = (j + 1) & buckets)
163 if (!cmp(a + i, b + h[j].pos))
163 if (!cmp(a + i, b + h[j].pos))
164 break;
164 break;
165
165
166 a[i].e = j; /* use equivalence class for quick compare */
166 a[i].e = j; /* use equivalence class for quick compare */
167 if (h[j].len <= t)
167 if (h[j].len <= t)
168 a[i].n = h[j].pos; /* point to head of match list */
168 a[i].n = h[j].pos; /* point to head of match list */
169 else
169 else
170 a[i].n = INT_MAX; /* too popular */
170 a[i].n = INT_MAX; /* too popular */
171 }
171 }
172
172
173 /* discard hash tables */
173 /* discard hash tables */
174 free(h);
174 free(h);
175 return 1;
175 return 1;
176 }
176 }
177
177
178 static int longest_match(struct line *a, struct line *b, struct pos *pos,
178 static int longest_match(struct line *a, struct line *b, struct pos *pos,
179 int a1, int a2, int b1, int b2, int *omi, int *omj)
179 int a1, int a2, int b1, int b2, int *omi, int *omj)
180 {
180 {
181 int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;
181 int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;
182
182
183 for (i = a1; i < a2; i++) {
183 for (i = a1; i < a2; i++) {
184 /* skip things before the current block */
184 /* skip things before the current block */
185 for (j = a[i].n; j < b1; j = b[j].n)
185 for (j = a[i].n; j < b1; j = b[j].n)
186 ;
186 ;
187
187
188 /* loop through all lines match a[i] in b */
188 /* loop through all lines match a[i] in b */
189 for (; j < b2; j = b[j].n) {
189 for (; j < b2; j = b[j].n) {
190 /* does this extend an earlier match? */
190 /* does this extend an earlier match? */
191 if (i > a1 && j > b1 && pos[j - 1].pos == i - 1)
191 if (i > a1 && j > b1 && pos[j - 1].pos == i - 1)
192 k = pos[j - 1].len + 1;
192 k = pos[j - 1].len + 1;
193 else
193 else
194 k = 1;
194 k = 1;
195 pos[j].pos = i;
195 pos[j].pos = i;
196 pos[j].len = k;
196 pos[j].len = k;
197
197
198 /* best match so far? */
198 /* best match so far? */
199 if (k > mk) {
199 if (k > mk) {
200 mi = i;
200 mi = i;
201 mj = j;
201 mj = j;
202 mk = k;
202 mk = k;
203 }
203 }
204 }
204 }
205 }
205 }
206
206
207 if (mk) {
207 if (mk) {
208 mi = mi - mk + 1;
208 mi = mi - mk + 1;
209 mj = mj - mk + 1;
209 mj = mj - mk + 1;
210 }
210 }
211
211
212 /* expand match to include neighboring popular lines */
212 /* expand match to include neighboring popular lines */
213 while (mi - mb > a1 && mj - mb > b1 &&
213 while (mi - mb > a1 && mj - mb > b1 &&
214 a[mi - mb - 1].e == b[mj - mb - 1].e)
214 a[mi - mb - 1].e == b[mj - mb - 1].e)
215 mb++;
215 mb++;
216 while (mi + mk < a2 && mj + mk < b2 &&
216 while (mi + mk < a2 && mj + mk < b2 &&
217 a[mi + mk].e == b[mj + mk].e)
217 a[mi + mk].e == b[mj + mk].e)
218 mk++;
218 mk++;
219
219
220 *omi = mi - mb;
220 *omi = mi - mb;
221 *omj = mj - mb;
221 *omj = mj - mb;
222
222
223 return mk + mb;
223 return mk + mb;
224 }
224 }
225
225
226 static struct hunk *recurse(struct line *a, struct line *b, struct pos *pos,
226 static struct hunk *recurse(struct line *a, struct line *b, struct pos *pos,
227 int a1, int a2, int b1, int b2, struct hunk *l)
227 int a1, int a2, int b1, int b2, struct hunk *l)
228 {
228 {
229 int i, j, k;
229 int i, j, k;
230
230
231 while (1) {
231 while (1) {
232 /* find the longest match in this chunk */
232 /* find the longest match in this chunk */
233 k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
233 k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
234 if (!k)
234 if (!k)
235 return l;
235 return l;
236
236
237 /* and recurse on the remaining chunks on either side */
237 /* and recurse on the remaining chunks on either side */
238 l = recurse(a, b, pos, a1, i, b1, j, l);
238 l = recurse(a, b, pos, a1, i, b1, j, l);
239 if (!l)
239 if (!l)
240 return NULL;
240 return NULL;
241
241
242 l->next = (struct hunk *)malloc(sizeof(struct hunk));
242 l->next = (struct hunk *)malloc(sizeof(struct hunk));
243 if (!l->next)
243 if (!l->next)
244 return NULL;
244 return NULL;
245
245
246 l = l->next;
246 l = l->next;
247 l->a1 = i;
247 l->a1 = i;
248 l->a2 = i + k;
248 l->a2 = i + k;
249 l->b1 = j;
249 l->b1 = j;
250 l->b2 = j + k;
250 l->b2 = j + k;
251 l->next = NULL;
251 l->next = NULL;
252
252
253 /* tail-recursion didn't happen, so do equivalent iteration */
253 /* tail-recursion didn't happen, so do equivalent iteration */
254 a1 = i + k;
254 a1 = i + k;
255 b1 = j + k;
255 b1 = j + k;
256 }
256 }
257 }
257 }
258
258
259 static int diff(struct line *a, int an, struct line *b, int bn,
259 static int diff(struct line *a, int an, struct line *b, int bn,
260 struct hunk *base)
260 struct hunk *base)
261 {
261 {
262 struct hunk *curr;
262 struct hunk *curr;
263 struct pos *pos;
263 struct pos *pos;
264 int t, count = 0;
264 int t, count = 0;
265
265
266 /* allocate and fill arrays */
266 /* allocate and fill arrays */
267 t = equatelines(a, an, b, bn);
267 t = equatelines(a, an, b, bn);
268 pos = (struct pos *)calloc(bn ? bn : 1, sizeof(struct pos));
268 pos = (struct pos *)calloc(bn ? bn : 1, sizeof(struct pos));
269
269
270 if (pos && t) {
270 if (pos && t) {
271 /* generate the matching block list */
271 /* generate the matching block list */
272
272
273 curr = recurse(a, b, pos, 0, an, 0, bn, base);
273 curr = recurse(a, b, pos, 0, an, 0, bn, base);
274 if (!curr)
274 if (!curr)
275 return -1;
275 return -1;
276
276
277 /* sentinel end hunk */
277 /* sentinel end hunk */
278 curr->next = (struct hunk *)malloc(sizeof(struct hunk));
278 curr->next = (struct hunk *)malloc(sizeof(struct hunk));
279 if (!curr->next)
279 if (!curr->next)
280 return -1;
280 return -1;
281 curr = curr->next;
281 curr = curr->next;
282 curr->a1 = curr->a2 = an;
282 curr->a1 = curr->a2 = an;
283 curr->b1 = curr->b2 = bn;
283 curr->b1 = curr->b2 = bn;
284 curr->next = NULL;
284 curr->next = NULL;
285 }
285 }
286
286
287 free(pos);
287 free(pos);
288
288
289 /* normalize the hunk list, try to push each hunk towards the end */
289 /* normalize the hunk list, try to push each hunk towards the end */
290 for (curr = base->next; curr; curr = curr->next) {
290 for (curr = base->next; curr; curr = curr->next) {
291 struct hunk *next = curr->next;
291 struct hunk *next = curr->next;
292 int shift = 0;
292 int shift = 0;
293
293
294 if (!next)
294 if (!next)
295 break;
295 break;
296
296
297 if (curr->a2 == next->a1)
297 if (curr->a2 == next->a1)
298 while (curr->a2 + shift < an && curr->b2 + shift < bn
298 while (curr->a2 + shift < an && curr->b2 + shift < bn
299 && !cmp(a + curr->a2 + shift,
299 && !cmp(a + curr->a2 + shift,
300 b + curr->b2 + shift))
300 b + curr->b2 + shift))
301 shift++;
301 shift++;
302 else if (curr->b2 == next->b1)
302 else if (curr->b2 == next->b1)
303 while (curr->b2 + shift < bn && curr->a2 + shift < an
303 while (curr->b2 + shift < bn && curr->a2 + shift < an
304 && !cmp(b + curr->b2 + shift,
304 && !cmp(b + curr->b2 + shift,
305 a + curr->a2 + shift))
305 a + curr->a2 + shift))
306 shift++;
306 shift++;
307 if (!shift)
307 if (!shift)
308 continue;
308 continue;
309 curr->b2 += shift;
309 curr->b2 += shift;
310 next->b1 += shift;
310 next->b1 += shift;
311 curr->a2 += shift;
311 curr->a2 += shift;
312 next->a1 += shift;
312 next->a1 += shift;
313 }
313 }
314
314
315 for (curr = base->next; curr; curr = curr->next)
315 for (curr = base->next; curr; curr = curr->next)
316 count++;
316 count++;
317 return count;
317 return count;
318 }
318 }
319
319
320 static void freehunks(struct hunk *l)
320 static void freehunks(struct hunk *l)
321 {
321 {
322 struct hunk *n;
322 struct hunk *n;
323 for (; l; l = n) {
323 for (; l; l = n) {
324 n = l->next;
324 n = l->next;
325 free(l);
325 free(l);
326 }
326 }
327 }
327 }
328
328
329 static PyObject *blocks(PyObject *self, PyObject *args)
329 static PyObject *blocks(PyObject *self, PyObject *args)
330 {
330 {
331 PyObject *sa, *sb, *rl = NULL, *m;
331 PyObject *sa, *sb, *rl = NULL, *m;
332 struct line *a, *b;
332 struct line *a, *b;
333 struct hunk l, *h;
333 struct hunk l, *h;
334 int an, bn, count, pos = 0;
334 int an, bn, count, pos = 0;
335
335
336 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
336 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
337 return NULL;
337 return NULL;
338
338
339 an = splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a);
339 an = splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a);
340 bn = splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b);
340 bn = splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b);
341
341
342 if (!a || !b)
342 if (!a || !b)
343 goto nomem;
343 goto nomem;
344
344
345 l.next = NULL;
345 l.next = NULL;
346 count = diff(a, an, b, bn, &l);
346 count = diff(a, an, b, bn, &l);
347 if (count < 0)
347 if (count < 0)
348 goto nomem;
348 goto nomem;
349
349
350 rl = PyList_New(count);
350 rl = PyList_New(count);
351 if (!rl)
351 if (!rl)
352 goto nomem;
352 goto nomem;
353
353
354 for (h = l.next; h; h = h->next) {
354 for (h = l.next; h; h = h->next) {
355 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
355 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
356 PyList_SetItem(rl, pos, m);
356 PyList_SetItem(rl, pos, m);
357 pos++;
357 pos++;
358 }
358 }
359
359
360 nomem:
360 nomem:
361 free(a);
361 free(a);
362 free(b);
362 free(b);
363 freehunks(l.next);
363 freehunks(l.next);
364 return rl ? rl : PyErr_NoMemory();
364 return rl ? rl : PyErr_NoMemory();
365 }
365 }
366
366
367 static PyObject *bdiff(PyObject *self, PyObject *args)
367 static PyObject *bdiff(PyObject *self, PyObject *args)
368 {
368 {
369 char *sa, *sb, *rb;
369 char *sa, *sb, *rb;
370 PyObject *result = NULL;
370 PyObject *result = NULL;
371 struct line *al, *bl;
371 struct line *al, *bl;
372 struct hunk l, *h;
372 struct hunk l, *h;
373 uint32_t encode[3];
373 uint32_t encode[3];
374 int an, bn, len = 0, la, lb, count;
374 int an, bn, len = 0, la, lb, count;
375
375
376 if (!PyArg_ParseTuple(args, "s#s#:bdiff", &sa, &la, &sb, &lb))
376 if (!PyArg_ParseTuple(args, "s#s#:bdiff", &sa, &la, &sb, &lb))
377 return NULL;
377 return NULL;
378
378
379 an = splitlines(sa, la, &al);
379 an = splitlines(sa, la, &al);
380 bn = splitlines(sb, lb, &bl);
380 bn = splitlines(sb, lb, &bl);
381 if (!al || !bl)
381 if (!al || !bl)
382 goto nomem;
382 goto nomem;
383
383
384 l.next = NULL;
384 l.next = NULL;
385 count = diff(al, an, bl, bn, &l);
385 count = diff(al, an, bl, bn, &l);
386 if (count < 0)
386 if (count < 0)
387 goto nomem;
387 goto nomem;
388
388
389 /* calculate length of output */
389 /* calculate length of output */
390 la = lb = 0;
390 la = lb = 0;
391 for (h = l.next; h; h = h->next) {
391 for (h = l.next; h; h = h->next) {
392 if (h->a1 != la || h->b1 != lb)
392 if (h->a1 != la || h->b1 != lb)
393 len += 12 + bl[h->b1].l - bl[lb].l;
393 len += 12 + bl[h->b1].l - bl[lb].l;
394 la = h->a2;
394 la = h->a2;
395 lb = h->b2;
395 lb = h->b2;
396 }
396 }
397
397
398 result = PyBytes_FromStringAndSize(NULL, len);
398 result = PyBytes_FromStringAndSize(NULL, len);
399
399
400 if (!result)
400 if (!result)
401 goto nomem;
401 goto nomem;
402
402
403 /* build binary patch */
403 /* build binary patch */
404 rb = PyBytes_AsString(result);
404 rb = PyBytes_AsString(result);
405 la = lb = 0;
405 la = lb = 0;
406
406
407 for (h = l.next; h; h = h->next) {
407 for (h = l.next; h; h = h->next) {
408 if (h->a1 != la || h->b1 != lb) {
408 if (h->a1 != la || h->b1 != lb) {
409 len = bl[h->b1].l - bl[lb].l;
409 len = bl[h->b1].l - bl[lb].l;
410 encode[0] = htonl(al[la].l - al->l);
410 encode[0] = htonl(al[la].l - al->l);
411 encode[1] = htonl(al[h->a1].l - al->l);
411 encode[1] = htonl(al[h->a1].l - al->l);
412 encode[2] = htonl(len);
412 encode[2] = htonl(len);
413 memcpy(rb, encode, 12);
413 memcpy(rb, encode, 12);
414 memcpy(rb + 12, bl[lb].l, len);
414 memcpy(rb + 12, bl[lb].l, len);
415 rb += 12 + len;
415 rb += 12 + len;
416 }
416 }
417 la = h->a2;
417 la = h->a2;
418 lb = h->b2;
418 lb = h->b2;
419 }
419 }
420
420
421 nomem:
421 nomem:
422 free(al);
422 free(al);
423 free(bl);
423 free(bl);
424 freehunks(l.next);
424 freehunks(l.next);
425 return result ? result : PyErr_NoMemory();
425 return result ? result : PyErr_NoMemory();
426 }
426 }
427
427
428 /*
429 * If allws != 0, remove all whitespace (' ', \t and \r). Otherwise,
430 * reduce whitespace sequences to a single space and trim remaining whitespace
431 * from end of lines.
432 */
433 static PyObject *fixws(PyObject *self, PyObject *args)
434 {
435 PyObject *s, *result = NULL;
436 char allws, c;
437 const char *r;
438 int i, rlen, wlen = 0;
439 char *w;
440
441 if (!PyArg_ParseTuple(args, "Sb:fixws", &s, &allws))
442 return NULL;
443 r = PyBytes_AsString(s);
444 rlen = PyBytes_Size(s);
445
446 w = (char *)malloc(rlen);
447 if (!w)
448 goto nomem;
449
450 for (i = 0; i != rlen; i++) {
451 c = r[i];
452 if (c == ' ' || c == '\t' || c == '\r') {
453 if (!allws && (wlen == 0 || w[wlen - 1] != ' '))
454 w[wlen++] = ' ';
455 } else if (c == '\n' && !allws
456 && wlen > 0 && w[wlen - 1] == ' ') {
457 w[wlen - 1] = '\n';
458 } else {
459 w[wlen++] = c;
460 }
461 }
462
463 result = PyBytes_FromStringAndSize(w, wlen);
464
465 nomem:
466 free(w);
467 return result ? result : PyErr_NoMemory();
468 }
469
470
428 static char mdiff_doc[] = "Efficient binary diff.";
471 static char mdiff_doc[] = "Efficient binary diff.";
429
472
430 static PyMethodDef methods[] = {
473 static PyMethodDef methods[] = {
431 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
474 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
432 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
475 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
476 {"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"},
433 {NULL, NULL}
477 {NULL, NULL}
434 };
478 };
435
479
436 #ifdef IS_PY3K
480 #ifdef IS_PY3K
437 static struct PyModuleDef bdiff_module = {
481 static struct PyModuleDef bdiff_module = {
438 PyModuleDef_HEAD_INIT,
482 PyModuleDef_HEAD_INIT,
439 "bdiff",
483 "bdiff",
440 mdiff_doc,
484 mdiff_doc,
441 -1,
485 -1,
442 methods
486 methods
443 };
487 };
444
488
445 PyMODINIT_FUNC PyInit_bdiff(void)
489 PyMODINIT_FUNC PyInit_bdiff(void)
446 {
490 {
447 return PyModule_Create(&bdiff_module);
491 return PyModule_Create(&bdiff_module);
448 }
492 }
449 #else
493 #else
450 PyMODINIT_FUNC initbdiff(void)
494 PyMODINIT_FUNC initbdiff(void)
451 {
495 {
452 Py_InitModule3("bdiff", methods, mdiff_doc);
496 Py_InitModule3("bdiff", methods, mdiff_doc);
453 }
497 }
454 #endif
498 #endif
455
499
@@ -1,334 +1,333 b''
1 # mdiff.py - diff and patch routines for mercurial
1 # mdiff.py - diff and patch routines for mercurial
2 #
2 #
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from i18n import _
8 from i18n import _
9 import bdiff, mpatch, util
9 import bdiff, mpatch, util
10 import re, struct
10 import re, struct
11
11
12 def splitnewlines(text):
12 def splitnewlines(text):
13 '''like str.splitlines, but only split on newlines.'''
13 '''like str.splitlines, but only split on newlines.'''
14 lines = [l + '\n' for l in text.split('\n')]
14 lines = [l + '\n' for l in text.split('\n')]
15 if lines:
15 if lines:
16 if lines[-1] == '\n':
16 if lines[-1] == '\n':
17 lines.pop()
17 lines.pop()
18 else:
18 else:
19 lines[-1] = lines[-1][:-1]
19 lines[-1] = lines[-1][:-1]
20 return lines
20 return lines
21
21
22 class diffopts(object):
22 class diffopts(object):
23 '''context is the number of context lines
23 '''context is the number of context lines
24 text treats all files as text
24 text treats all files as text
25 showfunc enables diff -p output
25 showfunc enables diff -p output
26 git enables the git extended patch format
26 git enables the git extended patch format
27 nodates removes dates from diff headers
27 nodates removes dates from diff headers
28 ignorews ignores all whitespace changes in the diff
28 ignorews ignores all whitespace changes in the diff
29 ignorewsamount ignores changes in the amount of whitespace
29 ignorewsamount ignores changes in the amount of whitespace
30 ignoreblanklines ignores changes whose lines are all blank
30 ignoreblanklines ignores changes whose lines are all blank
31 upgrade generates git diffs to avoid data loss
31 upgrade generates git diffs to avoid data loss
32 '''
32 '''
33
33
34 defaults = {
34 defaults = {
35 'context': 3,
35 'context': 3,
36 'text': False,
36 'text': False,
37 'showfunc': False,
37 'showfunc': False,
38 'git': False,
38 'git': False,
39 'nodates': False,
39 'nodates': False,
40 'ignorews': False,
40 'ignorews': False,
41 'ignorewsamount': False,
41 'ignorewsamount': False,
42 'ignoreblanklines': False,
42 'ignoreblanklines': False,
43 'upgrade': False,
43 'upgrade': False,
44 }
44 }
45
45
46 __slots__ = defaults.keys()
46 __slots__ = defaults.keys()
47
47
48 def __init__(self, **opts):
48 def __init__(self, **opts):
49 for k in self.__slots__:
49 for k in self.__slots__:
50 v = opts.get(k)
50 v = opts.get(k)
51 if v is None:
51 if v is None:
52 v = self.defaults[k]
52 v = self.defaults[k]
53 setattr(self, k, v)
53 setattr(self, k, v)
54
54
55 try:
55 try:
56 self.context = int(self.context)
56 self.context = int(self.context)
57 except ValueError:
57 except ValueError:
58 raise util.Abort(_('diff context lines count must be '
58 raise util.Abort(_('diff context lines count must be '
59 'an integer, not %r') % self.context)
59 'an integer, not %r') % self.context)
60
60
61 def copy(self, **kwargs):
61 def copy(self, **kwargs):
62 opts = dict((k, getattr(self, k)) for k in self.defaults)
62 opts = dict((k, getattr(self, k)) for k in self.defaults)
63 opts.update(kwargs)
63 opts.update(kwargs)
64 return diffopts(**opts)
64 return diffopts(**opts)
65
65
66 defaultopts = diffopts()
66 defaultopts = diffopts()
67
67
68 def wsclean(opts, text, blank=True):
68 def wsclean(opts, text, blank=True):
69 if opts.ignorews:
69 if opts.ignorews:
70 text = re.sub('[ \t\r]+', '', text)
70 text = bdiff.fixws(text, 1)
71 elif opts.ignorewsamount:
71 elif opts.ignorewsamount:
72 text = re.sub('[ \t\r]+', ' ', text)
72 text = bdiff.fixws(text, 0)
73 text = text.replace(' \n', '\n')
74 if blank and opts.ignoreblanklines:
73 if blank and opts.ignoreblanklines:
75 text = re.sub('\n+', '\n', text).strip('\n')
74 text = re.sub('\n+', '\n', text).strip('\n')
76 return text
75 return text
77
76
78 def splitblock(base1, lines1, base2, lines2, opts):
77 def splitblock(base1, lines1, base2, lines2, opts):
79 # The input lines matches except for interwoven blank lines. We
78 # The input lines matches except for interwoven blank lines. We
80 # transform it into a sequence of matching blocks and blank blocks.
79 # transform it into a sequence of matching blocks and blank blocks.
81 lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]
80 lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]
82 lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]
81 lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]
83 s1, e1 = 0, len(lines1)
82 s1, e1 = 0, len(lines1)
84 s2, e2 = 0, len(lines2)
83 s2, e2 = 0, len(lines2)
85 while s1 < e1 or s2 < e2:
84 while s1 < e1 or s2 < e2:
86 i1, i2, btype = s1, s2, '='
85 i1, i2, btype = s1, s2, '='
87 if (i1 >= e1 or lines1[i1] == 0
86 if (i1 >= e1 or lines1[i1] == 0
88 or i2 >= e2 or lines2[i2] == 0):
87 or i2 >= e2 or lines2[i2] == 0):
89 # Consume the block of blank lines
88 # Consume the block of blank lines
90 btype = '~'
89 btype = '~'
91 while i1 < e1 and lines1[i1] == 0:
90 while i1 < e1 and lines1[i1] == 0:
92 i1 += 1
91 i1 += 1
93 while i2 < e2 and lines2[i2] == 0:
92 while i2 < e2 and lines2[i2] == 0:
94 i2 += 1
93 i2 += 1
95 else:
94 else:
96 # Consume the matching lines
95 # Consume the matching lines
97 while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:
96 while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1:
98 i1 += 1
97 i1 += 1
99 i2 += 1
98 i2 += 1
100 yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype
99 yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype
101 s1 = i1
100 s1 = i1
102 s2 = i2
101 s2 = i2
103
102
104 def allblocks(text1, text2, opts=None, lines1=None, lines2=None, refine=False):
103 def allblocks(text1, text2, opts=None, lines1=None, lines2=None, refine=False):
105 """Return (block, type) tuples, where block is an mdiff.blocks
104 """Return (block, type) tuples, where block is an mdiff.blocks
106 line entry. type is '=' for blocks matching exactly one another
105 line entry. type is '=' for blocks matching exactly one another
107 (bdiff blocks), '!' for non-matching blocks and '~' for blocks
106 (bdiff blocks), '!' for non-matching blocks and '~' for blocks
108 matching only after having filtered blank lines. If refine is True,
107 matching only after having filtered blank lines. If refine is True,
109 then '~' blocks are refined and are only made of blank lines.
108 then '~' blocks are refined and are only made of blank lines.
110 line1 and line2 are text1 and text2 split with splitnewlines() if
109 line1 and line2 are text1 and text2 split with splitnewlines() if
111 they are already available.
110 they are already available.
112 """
111 """
113 if opts is None:
112 if opts is None:
114 opts = defaultopts
113 opts = defaultopts
115 if opts.ignorews or opts.ignorewsamount:
114 if opts.ignorews or opts.ignorewsamount:
116 text1 = wsclean(opts, text1, False)
115 text1 = wsclean(opts, text1, False)
117 text2 = wsclean(opts, text2, False)
116 text2 = wsclean(opts, text2, False)
118 diff = bdiff.blocks(text1, text2)
117 diff = bdiff.blocks(text1, text2)
119 for i, s1 in enumerate(diff):
118 for i, s1 in enumerate(diff):
120 # The first match is special.
119 # The first match is special.
121 # we've either found a match starting at line 0 or a match later
120 # we've either found a match starting at line 0 or a match later
122 # in the file. If it starts later, old and new below will both be
121 # in the file. If it starts later, old and new below will both be
123 # empty and we'll continue to the next match.
122 # empty and we'll continue to the next match.
124 if i > 0:
123 if i > 0:
125 s = diff[i - 1]
124 s = diff[i - 1]
126 else:
125 else:
127 s = [0, 0, 0, 0]
126 s = [0, 0, 0, 0]
128 s = [s[1], s1[0], s[3], s1[2]]
127 s = [s[1], s1[0], s[3], s1[2]]
129
128
130 # bdiff sometimes gives huge matches past eof, this check eats them,
129 # bdiff sometimes gives huge matches past eof, this check eats them,
131 # and deals with the special first match case described above
130 # and deals with the special first match case described above
132 if s[0] != s[1] or s[2] != s[3]:
131 if s[0] != s[1] or s[2] != s[3]:
133 type = '!'
132 type = '!'
134 if opts.ignoreblanklines:
133 if opts.ignoreblanklines:
135 if lines1 is None:
134 if lines1 is None:
136 lines1 = splitnewlines(text1)
135 lines1 = splitnewlines(text1)
137 if lines2 is None:
136 if lines2 is None:
138 lines2 = splitnewlines(text2)
137 lines2 = splitnewlines(text2)
139 old = wsclean(opts, "".join(lines1[s[0]:s[1]]))
138 old = wsclean(opts, "".join(lines1[s[0]:s[1]]))
140 new = wsclean(opts, "".join(lines2[s[2]:s[3]]))
139 new = wsclean(opts, "".join(lines2[s[2]:s[3]]))
141 if old == new:
140 if old == new:
142 type = '~'
141 type = '~'
143 yield s, type
142 yield s, type
144 yield s1, '='
143 yield s1, '='
145
144
146 def diffline(revs, a, b, opts):
145 def diffline(revs, a, b, opts):
147 parts = ['diff']
146 parts = ['diff']
148 if opts.git:
147 if opts.git:
149 parts.append('--git')
148 parts.append('--git')
150 if revs and not opts.git:
149 if revs and not opts.git:
151 parts.append(' '.join(["-r %s" % rev for rev in revs]))
150 parts.append(' '.join(["-r %s" % rev for rev in revs]))
152 if opts.git:
151 if opts.git:
153 parts.append('a/%s' % a)
152 parts.append('a/%s' % a)
154 parts.append('b/%s' % b)
153 parts.append('b/%s' % b)
155 else:
154 else:
156 parts.append(a)
155 parts.append(a)
157 return ' '.join(parts) + '\n'
156 return ' '.join(parts) + '\n'
158
157
159 def unidiff(a, ad, b, bd, fn1, fn2, r=None, opts=defaultopts):
158 def unidiff(a, ad, b, bd, fn1, fn2, r=None, opts=defaultopts):
160 def datetag(date, addtab=True):
159 def datetag(date, addtab=True):
161 if not opts.git and not opts.nodates:
160 if not opts.git and not opts.nodates:
162 return '\t%s\n' % date
161 return '\t%s\n' % date
163 if addtab and ' ' in fn1:
162 if addtab and ' ' in fn1:
164 return '\t\n'
163 return '\t\n'
165 return '\n'
164 return '\n'
166
165
167 if not a and not b:
166 if not a and not b:
168 return ""
167 return ""
169 epoch = util.datestr((0, 0))
168 epoch = util.datestr((0, 0))
170
169
171 fn1 = util.pconvert(fn1)
170 fn1 = util.pconvert(fn1)
172 fn2 = util.pconvert(fn2)
171 fn2 = util.pconvert(fn2)
173
172
174 if not opts.text and (util.binary(a) or util.binary(b)):
173 if not opts.text and (util.binary(a) or util.binary(b)):
175 if a and b and len(a) == len(b) and a == b:
174 if a and b and len(a) == len(b) and a == b:
176 return ""
175 return ""
177 l = ['Binary file %s has changed\n' % fn1]
176 l = ['Binary file %s has changed\n' % fn1]
178 elif not a:
177 elif not a:
179 b = splitnewlines(b)
178 b = splitnewlines(b)
180 if a is None:
179 if a is None:
181 l1 = '--- /dev/null%s' % datetag(epoch, False)
180 l1 = '--- /dev/null%s' % datetag(epoch, False)
182 else:
181 else:
183 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
182 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
184 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
183 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
185 l3 = "@@ -0,0 +1,%d @@\n" % len(b)
184 l3 = "@@ -0,0 +1,%d @@\n" % len(b)
186 l = [l1, l2, l3] + ["+" + e for e in b]
185 l = [l1, l2, l3] + ["+" + e for e in b]
187 elif not b:
186 elif not b:
188 a = splitnewlines(a)
187 a = splitnewlines(a)
189 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
188 l1 = "--- %s%s" % ("a/" + fn1, datetag(ad))
190 if b is None:
189 if b is None:
191 l2 = '+++ /dev/null%s' % datetag(epoch, False)
190 l2 = '+++ /dev/null%s' % datetag(epoch, False)
192 else:
191 else:
193 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
192 l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd))
194 l3 = "@@ -1,%d +0,0 @@\n" % len(a)
193 l3 = "@@ -1,%d +0,0 @@\n" % len(a)
195 l = [l1, l2, l3] + ["-" + e for e in a]
194 l = [l1, l2, l3] + ["-" + e for e in a]
196 else:
195 else:
197 al = splitnewlines(a)
196 al = splitnewlines(a)
198 bl = splitnewlines(b)
197 bl = splitnewlines(b)
199 l = list(_unidiff(a, b, al, bl, opts=opts))
198 l = list(_unidiff(a, b, al, bl, opts=opts))
200 if not l:
199 if not l:
201 return ""
200 return ""
202
201
203 l.insert(0, "--- a/%s%s" % (fn1, datetag(ad)))
202 l.insert(0, "--- a/%s%s" % (fn1, datetag(ad)))
204 l.insert(1, "+++ b/%s%s" % (fn2, datetag(bd)))
203 l.insert(1, "+++ b/%s%s" % (fn2, datetag(bd)))
205
204
206 for ln in xrange(len(l)):
205 for ln in xrange(len(l)):
207 if l[ln][-1] != '\n':
206 if l[ln][-1] != '\n':
208 l[ln] += "\n\ No newline at end of file\n"
207 l[ln] += "\n\ No newline at end of file\n"
209
208
210 if r:
209 if r:
211 l.insert(0, diffline(r, fn1, fn2, opts))
210 l.insert(0, diffline(r, fn1, fn2, opts))
212
211
213 return "".join(l)
212 return "".join(l)
214
213
215 # creates a headerless unified diff
214 # creates a headerless unified diff
216 # t1 and t2 are the text to be diffed
215 # t1 and t2 are the text to be diffed
217 # l1 and l2 are the text broken up into lines
216 # l1 and l2 are the text broken up into lines
218 def _unidiff(t1, t2, l1, l2, opts=defaultopts):
217 def _unidiff(t1, t2, l1, l2, opts=defaultopts):
219 def contextend(l, len):
218 def contextend(l, len):
220 ret = l + opts.context
219 ret = l + opts.context
221 if ret > len:
220 if ret > len:
222 ret = len
221 ret = len
223 return ret
222 return ret
224
223
225 def contextstart(l):
224 def contextstart(l):
226 ret = l - opts.context
225 ret = l - opts.context
227 if ret < 0:
226 if ret < 0:
228 return 0
227 return 0
229 return ret
228 return ret
230
229
231 lastfunc = [0, '']
230 lastfunc = [0, '']
232 def yieldhunk(hunk):
231 def yieldhunk(hunk):
233 (astart, a2, bstart, b2, delta) = hunk
232 (astart, a2, bstart, b2, delta) = hunk
234 aend = contextend(a2, len(l1))
233 aend = contextend(a2, len(l1))
235 alen = aend - astart
234 alen = aend - astart
236 blen = b2 - bstart + aend - a2
235 blen = b2 - bstart + aend - a2
237
236
238 func = ""
237 func = ""
239 if opts.showfunc:
238 if opts.showfunc:
240 lastpos, func = lastfunc
239 lastpos, func = lastfunc
241 # walk backwards from the start of the context up to the start of
240 # walk backwards from the start of the context up to the start of
242 # the previous hunk context until we find a line starting with an
241 # the previous hunk context until we find a line starting with an
243 # alphanumeric char.
242 # alphanumeric char.
244 for i in xrange(astart - 1, lastpos - 1, -1):
243 for i in xrange(astart - 1, lastpos - 1, -1):
245 if l1[i][0].isalnum():
244 if l1[i][0].isalnum():
246 func = ' ' + l1[i].rstrip()[:40]
245 func = ' ' + l1[i].rstrip()[:40]
247 lastfunc[1] = func
246 lastfunc[1] = func
248 break
247 break
249 # by recording this hunk's starting point as the next place to
248 # by recording this hunk's starting point as the next place to
250 # start looking for function lines, we avoid reading any line in
249 # start looking for function lines, we avoid reading any line in
251 # the file more than once.
250 # the file more than once.
252 lastfunc[0] = astart
251 lastfunc[0] = astart
253
252
254 # zero-length hunk ranges report their start line as one less
253 # zero-length hunk ranges report their start line as one less
255 if alen:
254 if alen:
256 astart += 1
255 astart += 1
257 if blen:
256 if blen:
258 bstart += 1
257 bstart += 1
259
258
260 yield "@@ -%d,%d +%d,%d @@%s\n" % (astart, alen,
259 yield "@@ -%d,%d +%d,%d @@%s\n" % (astart, alen,
261 bstart, blen, func)
260 bstart, blen, func)
262 for x in delta:
261 for x in delta:
263 yield x
262 yield x
264 for x in xrange(a2, aend):
263 for x in xrange(a2, aend):
265 yield ' ' + l1[x]
264 yield ' ' + l1[x]
266
265
267 # bdiff.blocks gives us the matching sequences in the files. The loop
266 # bdiff.blocks gives us the matching sequences in the files. The loop
268 # below finds the spaces between those matching sequences and translates
267 # below finds the spaces between those matching sequences and translates
269 # them into diff output.
268 # them into diff output.
270 #
269 #
271 hunk = None
270 hunk = None
272 for s, stype in allblocks(t1, t2, opts, l1, l2):
271 for s, stype in allblocks(t1, t2, opts, l1, l2):
273 if stype != '!':
272 if stype != '!':
274 continue
273 continue
275 delta = []
274 delta = []
276 a1, a2, b1, b2 = s
275 a1, a2, b1, b2 = s
277 old = l1[a1:a2]
276 old = l1[a1:a2]
278 new = l2[b1:b2]
277 new = l2[b1:b2]
279
278
280 astart = contextstart(a1)
279 astart = contextstart(a1)
281 bstart = contextstart(b1)
280 bstart = contextstart(b1)
282 prev = None
281 prev = None
283 if hunk:
282 if hunk:
284 # join with the previous hunk if it falls inside the context
283 # join with the previous hunk if it falls inside the context
285 if astart < hunk[1] + opts.context + 1:
284 if astart < hunk[1] + opts.context + 1:
286 prev = hunk
285 prev = hunk
287 astart = hunk[1]
286 astart = hunk[1]
288 bstart = hunk[3]
287 bstart = hunk[3]
289 else:
288 else:
290 for x in yieldhunk(hunk):
289 for x in yieldhunk(hunk):
291 yield x
290 yield x
292 if prev:
291 if prev:
293 # we've joined the previous hunk, record the new ending points.
292 # we've joined the previous hunk, record the new ending points.
294 hunk[1] = a2
293 hunk[1] = a2
295 hunk[3] = b2
294 hunk[3] = b2
296 delta = hunk[4]
295 delta = hunk[4]
297 else:
296 else:
298 # create a new hunk
297 # create a new hunk
299 hunk = [astart, a2, bstart, b2, delta]
298 hunk = [astart, a2, bstart, b2, delta]
300
299
301 delta[len(delta):] = [' ' + x for x in l1[astart:a1]]
300 delta[len(delta):] = [' ' + x for x in l1[astart:a1]]
302 delta[len(delta):] = ['-' + x for x in old]
301 delta[len(delta):] = ['-' + x for x in old]
303 delta[len(delta):] = ['+' + x for x in new]
302 delta[len(delta):] = ['+' + x for x in new]
304
303
305 if hunk:
304 if hunk:
306 for x in yieldhunk(hunk):
305 for x in yieldhunk(hunk):
307 yield x
306 yield x
308
307
309 def patchtext(bin):
308 def patchtext(bin):
310 pos = 0
309 pos = 0
311 t = []
310 t = []
312 while pos < len(bin):
311 while pos < len(bin):
313 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
312 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
314 pos += 12
313 pos += 12
315 t.append(bin[pos:pos + l])
314 t.append(bin[pos:pos + l])
316 pos += l
315 pos += l
317 return "".join(t)
316 return "".join(t)
318
317
319 def patch(a, bin):
318 def patch(a, bin):
320 if len(a) == 0:
319 if len(a) == 0:
321 # skip over trivial delta header
320 # skip over trivial delta header
322 return buffer(bin, 12)
321 return buffer(bin, 12)
323 return mpatch.patches(a, [bin])
322 return mpatch.patches(a, [bin])
324
323
325 # similar to difflib.SequenceMatcher.get_matching_blocks
324 # similar to difflib.SequenceMatcher.get_matching_blocks
326 def get_matching_blocks(a, b):
325 def get_matching_blocks(a, b):
327 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
326 return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
328
327
329 def trivialdiffheader(length):
328 def trivialdiffheader(length):
330 return struct.pack(">lll", 0, 0, length)
329 return struct.pack(">lll", 0, 0, length)
331
330
332 patches = mpatch.patches
331 patches = mpatch.patches
333 patchedsize = mpatch.patchedsize
332 patchedsize = mpatch.patchedsize
334 textdiff = bdiff.bdiff
333 textdiff = bdiff.bdiff
@@ -1,80 +1,87 b''
1 # bdiff.py - Python implementation of bdiff.c
1 # bdiff.py - Python implementation of bdiff.c
2 #
2 #
3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import struct, difflib
8 import struct, difflib, re
9
9
10 def splitnewlines(text):
10 def splitnewlines(text):
11 '''like str.splitlines, but only split on newlines.'''
11 '''like str.splitlines, but only split on newlines.'''
12 lines = [l + '\n' for l in text.split('\n')]
12 lines = [l + '\n' for l in text.split('\n')]
13 if lines:
13 if lines:
14 if lines[-1] == '\n':
14 if lines[-1] == '\n':
15 lines.pop()
15 lines.pop()
16 else:
16 else:
17 lines[-1] = lines[-1][:-1]
17 lines[-1] = lines[-1][:-1]
18 return lines
18 return lines
19
19
20 def _normalizeblocks(a, b, blocks):
20 def _normalizeblocks(a, b, blocks):
21 prev = None
21 prev = None
22 r = []
22 r = []
23 for curr in blocks:
23 for curr in blocks:
24 if prev is None:
24 if prev is None:
25 prev = curr
25 prev = curr
26 continue
26 continue
27 shift = 0
27 shift = 0
28
28
29 a1, b1, l1 = prev
29 a1, b1, l1 = prev
30 a1end = a1 + l1
30 a1end = a1 + l1
31 b1end = b1 + l1
31 b1end = b1 + l1
32
32
33 a2, b2, l2 = curr
33 a2, b2, l2 = curr
34 a2end = a2 + l2
34 a2end = a2 + l2
35 b2end = b2 + l2
35 b2end = b2 + l2
36 if a1end == a2:
36 if a1end == a2:
37 while (a1end + shift < a2end and
37 while (a1end + shift < a2end and
38 a[a1end + shift] == b[b1end + shift]):
38 a[a1end + shift] == b[b1end + shift]):
39 shift += 1
39 shift += 1
40 elif b1end == b2:
40 elif b1end == b2:
41 while (b1end + shift < b2end and
41 while (b1end + shift < b2end and
42 a[a1end + shift] == b[b1end + shift]):
42 a[a1end + shift] == b[b1end + shift]):
43 shift += 1
43 shift += 1
44 r.append((a1, b1, l1 + shift))
44 r.append((a1, b1, l1 + shift))
45 prev = a2 + shift, b2 + shift, l2 - shift
45 prev = a2 + shift, b2 + shift, l2 - shift
46 r.append(prev)
46 r.append(prev)
47 return r
47 return r
48
48
49 def bdiff(a, b):
49 def bdiff(a, b):
50 a = str(a).splitlines(True)
50 a = str(a).splitlines(True)
51 b = str(b).splitlines(True)
51 b = str(b).splitlines(True)
52
52
53 if not a:
53 if not a:
54 s = "".join(b)
54 s = "".join(b)
55 return s and (struct.pack(">lll", 0, 0, len(s)) + s)
55 return s and (struct.pack(">lll", 0, 0, len(s)) + s)
56
56
57 bin = []
57 bin = []
58 p = [0]
58 p = [0]
59 for i in a: p.append(p[-1] + len(i))
59 for i in a: p.append(p[-1] + len(i))
60
60
61 d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()
61 d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()
62 d = _normalizeblocks(a, b, d)
62 d = _normalizeblocks(a, b, d)
63 la = 0
63 la = 0
64 lb = 0
64 lb = 0
65 for am, bm, size in d:
65 for am, bm, size in d:
66 s = "".join(b[lb:bm])
66 s = "".join(b[lb:bm])
67 if am > la or s:
67 if am > la or s:
68 bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s)
68 bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s)
69 la = am + size
69 la = am + size
70 lb = bm + size
70 lb = bm + size
71
71
72 return "".join(bin)
72 return "".join(bin)
73
73
74 def blocks(a, b):
74 def blocks(a, b):
75 an = splitnewlines(a)
75 an = splitnewlines(a)
76 bn = splitnewlines(b)
76 bn = splitnewlines(b)
77 d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks()
77 d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks()
78 d = _normalizeblocks(an, bn, d)
78 d = _normalizeblocks(an, bn, d)
79 return [(i, i + n, j, j + n) for (i, j, n) in d]
79 return [(i, i + n, j, j + n) for (i, j, n) in d]
80
80
81 def fixws(text, allws):
82 if allws:
83 text = re.sub('[ \t\r]+', '', text)
84 else:
85 text = re.sub('[ \t\r]+', ' ', text)
86 text = text.replace(' \n', '\n')
87 return text
@@ -1,52 +1,66 b''
1 import struct
1 import struct
2 from mercurial import bdiff, mpatch
2 from mercurial import bdiff, mpatch
3
3
4 def test1(a, b):
4 def test1(a, b):
5 d = bdiff.bdiff(a, b)
5 d = bdiff.bdiff(a, b)
6 c = a
6 c = a
7 if d:
7 if d:
8 c = mpatch.patches(a, [d])
8 c = mpatch.patches(a, [d])
9 if c != b:
9 if c != b:
10 print "***", repr(a), repr(b)
10 print "***", repr(a), repr(b)
11 print "bad:"
11 print "bad:"
12 print repr(c)[:200]
12 print repr(c)[:200]
13 print repr(d)
13 print repr(d)
14
14
15 def test(a, b):
15 def test(a, b):
16 print "***", repr(a), repr(b)
16 print "***", repr(a), repr(b)
17 test1(a, b)
17 test1(a, b)
18 test1(b, a)
18 test1(b, a)
19
19
20 test("a\nc\n\n\n\n", "a\nb\n\n\n")
20 test("a\nc\n\n\n\n", "a\nb\n\n\n")
21 test("a\nb\nc\n", "a\nc\n")
21 test("a\nb\nc\n", "a\nc\n")
22 test("", "")
22 test("", "")
23 test("a\nb\nc", "a\nb\nc")
23 test("a\nb\nc", "a\nb\nc")
24 test("a\nb\nc\nd\n", "a\nd\n")
24 test("a\nb\nc\nd\n", "a\nd\n")
25 test("a\nb\nc\nd\n", "a\nc\ne\n")
25 test("a\nb\nc\nd\n", "a\nc\ne\n")
26 test("a\nb\nc\n", "a\nc\n")
26 test("a\nb\nc\n", "a\nc\n")
27 test("a\n", "c\na\nb\n")
27 test("a\n", "c\na\nb\n")
28 test("a\n", "")
28 test("a\n", "")
29 test("a\n", "b\nc\n")
29 test("a\n", "b\nc\n")
30 test("a\n", "c\na\n")
30 test("a\n", "c\na\n")
31 test("", "adjfkjdjksdhfksj")
31 test("", "adjfkjdjksdhfksj")
32 test("", "ab")
32 test("", "ab")
33 test("", "abc")
33 test("", "abc")
34 test("a", "a")
34 test("a", "a")
35 test("ab", "ab")
35 test("ab", "ab")
36 test("abc", "abc")
36 test("abc", "abc")
37 test("a\n", "a\n")
37 test("a\n", "a\n")
38 test("a\nb", "a\nb")
38 test("a\nb", "a\nb")
39
39
40 #issue1295
40 #issue1295
41 def showdiff(a, b):
41 def showdiff(a, b):
42 bin = bdiff.bdiff(a, b)
42 bin = bdiff.bdiff(a, b)
43 pos = 0
43 pos = 0
44 while pos < len(bin):
44 while pos < len(bin):
45 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
45 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
46 pos += 12
46 pos += 12
47 print p1, p2, repr(bin[pos:pos + l])
47 print p1, p2, repr(bin[pos:pos + l])
48 pos += l
48 pos += l
49 showdiff("x\n\nx\n\nx\n\nx\n\nz\n", "x\n\nx\n\ny\n\nx\n\nx\n\nz\n")
49 showdiff("x\n\nx\n\nx\n\nx\n\nz\n", "x\n\nx\n\ny\n\nx\n\nx\n\nz\n")
50 showdiff("x\n\nx\n\nx\n\nx\n\nz\n", "x\n\nx\n\ny\n\nx\n\ny\n\nx\n\nz\n")
50 showdiff("x\n\nx\n\nx\n\nx\n\nz\n", "x\n\nx\n\ny\n\nx\n\ny\n\nx\n\nz\n")
51
51
52 print "done"
52 print "done"
53
54 def testfixws(a, b, allws):
55 c = bdiff.fixws(a, allws)
56 if c != b:
57 print "*** fixws", repr(a), repr(b), allws
58 print "got:"
59 print repr(c)
60
61 testfixws(" \ta\r b\t\n", "ab\n", 1)
62 testfixws(" \ta\r b\t\n", " a b\n", 0)
63 testfixws("", "", 1)
64 testfixws("", "", 0)
65
66 print "done"
@@ -1,23 +1,24 b''
1 *** 'a\nc\n\n\n\n' 'a\nb\n\n\n'
1 *** 'a\nc\n\n\n\n' 'a\nb\n\n\n'
2 *** 'a\nb\nc\n' 'a\nc\n'
2 *** 'a\nb\nc\n' 'a\nc\n'
3 *** '' ''
3 *** '' ''
4 *** 'a\nb\nc' 'a\nb\nc'
4 *** 'a\nb\nc' 'a\nb\nc'
5 *** 'a\nb\nc\nd\n' 'a\nd\n'
5 *** 'a\nb\nc\nd\n' 'a\nd\n'
6 *** 'a\nb\nc\nd\n' 'a\nc\ne\n'
6 *** 'a\nb\nc\nd\n' 'a\nc\ne\n'
7 *** 'a\nb\nc\n' 'a\nc\n'
7 *** 'a\nb\nc\n' 'a\nc\n'
8 *** 'a\n' 'c\na\nb\n'
8 *** 'a\n' 'c\na\nb\n'
9 *** 'a\n' ''
9 *** 'a\n' ''
10 *** 'a\n' 'b\nc\n'
10 *** 'a\n' 'b\nc\n'
11 *** 'a\n' 'c\na\n'
11 *** 'a\n' 'c\na\n'
12 *** '' 'adjfkjdjksdhfksj'
12 *** '' 'adjfkjdjksdhfksj'
13 *** '' 'ab'
13 *** '' 'ab'
14 *** '' 'abc'
14 *** '' 'abc'
15 *** 'a' 'a'
15 *** 'a' 'a'
16 *** 'ab' 'ab'
16 *** 'ab' 'ab'
17 *** 'abc' 'abc'
17 *** 'abc' 'abc'
18 *** 'a\n' 'a\n'
18 *** 'a\n' 'a\n'
19 *** 'a\nb' 'a\nb'
19 *** 'a\nb' 'a\nb'
20 6 6 'y\n\n'
20 6 6 'y\n\n'
21 6 6 'y\n\n'
21 6 6 'y\n\n'
22 9 9 'y\n\n'
22 9 9 'y\n\n'
23 done
23 done
24 done
General Comments 0
You need to be logged in to leave comments. Login now