##// END OF EJS Templates
bdiff: one more safe call of bdiff_freehunks(NULL)
Yuya Nishihara -
r38328:93b812d5 @41 default
parent child Browse files
Show More
@@ -1,342 +1,340 b''
1 /*
1 /*
2 bdiff.c - efficient binary diff extension for Mercurial
2 bdiff.c - efficient binary diff extension for Mercurial
3
3
4 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8
8
9 Based roughly on Python difflib
9 Based roughly on Python difflib
10 */
10 */
11
11
12 #define PY_SSIZE_T_CLEAN
12 #define PY_SSIZE_T_CLEAN
13 #include <Python.h>
13 #include <Python.h>
14 #include <limits.h>
14 #include <limits.h>
15 #include <stdlib.h>
15 #include <stdlib.h>
16 #include <string.h>
16 #include <string.h>
17
17
18 #include "bdiff.h"
18 #include "bdiff.h"
19 #include "bitmanipulation.h"
19 #include "bitmanipulation.h"
20 #include "thirdparty/xdiff/xdiff.h"
20 #include "thirdparty/xdiff/xdiff.h"
21 #include "util.h"
21 #include "util.h"
22
22
23 static PyObject *blocks(PyObject *self, PyObject *args)
23 static PyObject *blocks(PyObject *self, PyObject *args)
24 {
24 {
25 PyObject *sa, *sb, *rl = NULL, *m;
25 PyObject *sa, *sb, *rl = NULL, *m;
26 struct bdiff_line *a, *b;
26 struct bdiff_line *a, *b;
27 struct bdiff_hunk l, *h;
27 struct bdiff_hunk l, *h;
28 int an, bn, count, pos = 0;
28 int an, bn, count, pos = 0;
29
29
30 l.next = NULL;
30 l.next = NULL;
31
31
32 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
32 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
33 return NULL;
33 return NULL;
34
34
35 an = bdiff_splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a);
35 an = bdiff_splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a);
36 bn = bdiff_splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b);
36 bn = bdiff_splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b);
37
37
38 if (!a || !b)
38 if (!a || !b)
39 goto nomem;
39 goto nomem;
40
40
41 count = bdiff_diff(a, an, b, bn, &l);
41 count = bdiff_diff(a, an, b, bn, &l);
42 if (count < 0)
42 if (count < 0)
43 goto nomem;
43 goto nomem;
44
44
45 rl = PyList_New(count);
45 rl = PyList_New(count);
46 if (!rl)
46 if (!rl)
47 goto nomem;
47 goto nomem;
48
48
49 for (h = l.next; h; h = h->next) {
49 for (h = l.next; h; h = h->next) {
50 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
50 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
51 PyList_SetItem(rl, pos, m);
51 PyList_SetItem(rl, pos, m);
52 pos++;
52 pos++;
53 }
53 }
54
54
55 nomem:
55 nomem:
56 free(a);
56 free(a);
57 free(b);
57 free(b);
58 bdiff_freehunks(l.next);
58 bdiff_freehunks(l.next);
59 return rl ? rl : PyErr_NoMemory();
59 return rl ? rl : PyErr_NoMemory();
60 }
60 }
61
61
62 static PyObject *bdiff(PyObject *self, PyObject *args)
62 static PyObject *bdiff(PyObject *self, PyObject *args)
63 {
63 {
64 Py_buffer ba, bb;
64 Py_buffer ba, bb;
65 char *rb, *ia, *ib;
65 char *rb, *ia, *ib;
66 PyObject *result = NULL;
66 PyObject *result = NULL;
67 struct bdiff_line *al = NULL, *bl = NULL;
67 struct bdiff_line *al = NULL, *bl = NULL;
68 struct bdiff_hunk l, *h;
68 struct bdiff_hunk l, *h;
69 int an, bn, count;
69 int an, bn, count;
70 Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lmax;
70 Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lmax;
71 PyThreadState *_save = NULL;
71 PyThreadState *_save = NULL;
72
72
73 l.next = NULL;
73 l.next = NULL;
74
74
75 if (!PyArg_ParseTuple(args, PY23("s*s*:bdiff", "y*y*:bdiff"), &ba, &bb))
75 if (!PyArg_ParseTuple(args, PY23("s*s*:bdiff", "y*y*:bdiff"), &ba, &bb))
76 return NULL;
76 return NULL;
77
77
78 if (!PyBuffer_IsContiguous(&ba, 'C') || ba.ndim > 1) {
78 if (!PyBuffer_IsContiguous(&ba, 'C') || ba.ndim > 1) {
79 PyErr_SetString(PyExc_ValueError, "bdiff input not contiguous");
79 PyErr_SetString(PyExc_ValueError, "bdiff input not contiguous");
80 goto cleanup;
80 goto cleanup;
81 }
81 }
82
82
83 if (!PyBuffer_IsContiguous(&bb, 'C') || bb.ndim > 1) {
83 if (!PyBuffer_IsContiguous(&bb, 'C') || bb.ndim > 1) {
84 PyErr_SetString(PyExc_ValueError, "bdiff input not contiguous");
84 PyErr_SetString(PyExc_ValueError, "bdiff input not contiguous");
85 goto cleanup;
85 goto cleanup;
86 }
86 }
87
87
88 la = ba.len;
88 la = ba.len;
89 lb = bb.len;
89 lb = bb.len;
90
90
91 if (la > UINT_MAX || lb > UINT_MAX) {
91 if (la > UINT_MAX || lb > UINT_MAX) {
92 PyErr_SetString(PyExc_ValueError, "bdiff inputs too large");
92 PyErr_SetString(PyExc_ValueError, "bdiff inputs too large");
93 goto cleanup;
93 goto cleanup;
94 }
94 }
95
95
96 _save = PyEval_SaveThread();
96 _save = PyEval_SaveThread();
97
97
98 lmax = la > lb ? lb : la;
98 lmax = la > lb ? lb : la;
99 for (ia = ba.buf, ib = bb.buf; li < lmax && *ia == *ib;
99 for (ia = ba.buf, ib = bb.buf; li < lmax && *ia == *ib;
100 ++li, ++ia, ++ib) {
100 ++li, ++ia, ++ib) {
101 if (*ia == '\n')
101 if (*ia == '\n')
102 lcommon = li + 1;
102 lcommon = li + 1;
103 }
103 }
104 /* we can almost add: if (li == lmax) lcommon = li; */
104 /* we can almost add: if (li == lmax) lcommon = li; */
105
105
106 an = bdiff_splitlines((char *)ba.buf + lcommon, la - lcommon, &al);
106 an = bdiff_splitlines((char *)ba.buf + lcommon, la - lcommon, &al);
107 bn = bdiff_splitlines((char *)bb.buf + lcommon, lb - lcommon, &bl);
107 bn = bdiff_splitlines((char *)bb.buf + lcommon, lb - lcommon, &bl);
108 if (!al || !bl) {
108 if (!al || !bl) {
109 PyErr_NoMemory();
109 PyErr_NoMemory();
110 goto cleanup;
110 goto cleanup;
111 }
111 }
112
112
113 count = bdiff_diff(al, an, bl, bn, &l);
113 count = bdiff_diff(al, an, bl, bn, &l);
114 if (count < 0) {
114 if (count < 0) {
115 PyErr_NoMemory();
115 PyErr_NoMemory();
116 goto cleanup;
116 goto cleanup;
117 }
117 }
118
118
119 /* calculate length of output */
119 /* calculate length of output */
120 la = lb = 0;
120 la = lb = 0;
121 for (h = l.next; h; h = h->next) {
121 for (h = l.next; h; h = h->next) {
122 if (h->a1 != la || h->b1 != lb)
122 if (h->a1 != la || h->b1 != lb)
123 len += 12 + bl[h->b1].l - bl[lb].l;
123 len += 12 + bl[h->b1].l - bl[lb].l;
124 la = h->a2;
124 la = h->a2;
125 lb = h->b2;
125 lb = h->b2;
126 }
126 }
127 PyEval_RestoreThread(_save);
127 PyEval_RestoreThread(_save);
128 _save = NULL;
128 _save = NULL;
129
129
130 result = PyBytes_FromStringAndSize(NULL, len);
130 result = PyBytes_FromStringAndSize(NULL, len);
131
131
132 if (!result)
132 if (!result)
133 goto cleanup;
133 goto cleanup;
134
134
135 /* build binary patch */
135 /* build binary patch */
136 rb = PyBytes_AsString(result);
136 rb = PyBytes_AsString(result);
137 la = lb = 0;
137 la = lb = 0;
138
138
139 for (h = l.next; h; h = h->next) {
139 for (h = l.next; h; h = h->next) {
140 if (h->a1 != la || h->b1 != lb) {
140 if (h->a1 != la || h->b1 != lb) {
141 len = bl[h->b1].l - bl[lb].l;
141 len = bl[h->b1].l - bl[lb].l;
142 putbe32((uint32_t)(al[la].l + lcommon - al->l), rb);
142 putbe32((uint32_t)(al[la].l + lcommon - al->l), rb);
143 putbe32((uint32_t)(al[h->a1].l + lcommon - al->l),
143 putbe32((uint32_t)(al[h->a1].l + lcommon - al->l),
144 rb + 4);
144 rb + 4);
145 putbe32((uint32_t)len, rb + 8);
145 putbe32((uint32_t)len, rb + 8);
146 memcpy(rb + 12, bl[lb].l, len);
146 memcpy(rb + 12, bl[lb].l, len);
147 rb += 12 + len;
147 rb += 12 + len;
148 }
148 }
149 la = h->a2;
149 la = h->a2;
150 lb = h->b2;
150 lb = h->b2;
151 }
151 }
152
152
153 cleanup:
153 cleanup:
154 if (_save)
154 if (_save)
155 PyEval_RestoreThread(_save);
155 PyEval_RestoreThread(_save);
156 PyBuffer_Release(&ba);
156 PyBuffer_Release(&ba);
157 PyBuffer_Release(&bb);
157 PyBuffer_Release(&bb);
158 free(al);
158 free(al);
159 free(bl);
159 free(bl);
160 if (l.next) {
160 bdiff_freehunks(l.next);
161 bdiff_freehunks(l.next);
162 }
163 return result;
161 return result;
164 }
162 }
165
163
166 /*
164 /*
167 * If allws != 0, remove all whitespace (' ', \t and \r). Otherwise,
165 * If allws != 0, remove all whitespace (' ', \t and \r). Otherwise,
168 * reduce whitespace sequences to a single space and trim remaining whitespace
166 * reduce whitespace sequences to a single space and trim remaining whitespace
169 * from end of lines.
167 * from end of lines.
170 */
168 */
171 static PyObject *fixws(PyObject *self, PyObject *args)
169 static PyObject *fixws(PyObject *self, PyObject *args)
172 {
170 {
173 PyObject *s, *result = NULL;
171 PyObject *s, *result = NULL;
174 char allws, c;
172 char allws, c;
175 const char *r;
173 const char *r;
176 Py_ssize_t i, rlen, wlen = 0;
174 Py_ssize_t i, rlen, wlen = 0;
177 char *w;
175 char *w;
178
176
179 if (!PyArg_ParseTuple(args, "Sb:fixws", &s, &allws))
177 if (!PyArg_ParseTuple(args, "Sb:fixws", &s, &allws))
180 return NULL;
178 return NULL;
181 r = PyBytes_AsString(s);
179 r = PyBytes_AsString(s);
182 rlen = PyBytes_Size(s);
180 rlen = PyBytes_Size(s);
183
181
184 w = (char *)PyMem_Malloc(rlen ? rlen : 1);
182 w = (char *)PyMem_Malloc(rlen ? rlen : 1);
185 if (!w)
183 if (!w)
186 goto nomem;
184 goto nomem;
187
185
188 for (i = 0; i != rlen; i++) {
186 for (i = 0; i != rlen; i++) {
189 c = r[i];
187 c = r[i];
190 if (c == ' ' || c == '\t' || c == '\r') {
188 if (c == ' ' || c == '\t' || c == '\r') {
191 if (!allws && (wlen == 0 || w[wlen - 1] != ' '))
189 if (!allws && (wlen == 0 || w[wlen - 1] != ' '))
192 w[wlen++] = ' ';
190 w[wlen++] = ' ';
193 } else if (c == '\n' && !allws && wlen > 0 &&
191 } else if (c == '\n' && !allws && wlen > 0 &&
194 w[wlen - 1] == ' ') {
192 w[wlen - 1] == ' ') {
195 w[wlen - 1] = '\n';
193 w[wlen - 1] = '\n';
196 } else {
194 } else {
197 w[wlen++] = c;
195 w[wlen++] = c;
198 }
196 }
199 }
197 }
200
198
201 result = PyBytes_FromStringAndSize(w, wlen);
199 result = PyBytes_FromStringAndSize(w, wlen);
202
200
203 nomem:
201 nomem:
204 PyMem_Free(w);
202 PyMem_Free(w);
205 return result ? result : PyErr_NoMemory();
203 return result ? result : PyErr_NoMemory();
206 }
204 }
207
205
208 static bool sliceintolist(PyObject *list, Py_ssize_t destidx,
206 static bool sliceintolist(PyObject *list, Py_ssize_t destidx,
209 const char *source, Py_ssize_t len)
207 const char *source, Py_ssize_t len)
210 {
208 {
211 PyObject *sliced = PyBytes_FromStringAndSize(source, len);
209 PyObject *sliced = PyBytes_FromStringAndSize(source, len);
212 if (sliced == NULL)
210 if (sliced == NULL)
213 return false;
211 return false;
214 PyList_SET_ITEM(list, destidx, sliced);
212 PyList_SET_ITEM(list, destidx, sliced);
215 return true;
213 return true;
216 }
214 }
217
215
218 static PyObject *splitnewlines(PyObject *self, PyObject *args)
216 static PyObject *splitnewlines(PyObject *self, PyObject *args)
219 {
217 {
220 const char *text;
218 const char *text;
221 Py_ssize_t nelts = 0, size, i, start = 0;
219 Py_ssize_t nelts = 0, size, i, start = 0;
222 PyObject *result = NULL;
220 PyObject *result = NULL;
223
221
224 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &text, &size)) {
222 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &text, &size)) {
225 goto abort;
223 goto abort;
226 }
224 }
227 if (!size) {
225 if (!size) {
228 return PyList_New(0);
226 return PyList_New(0);
229 }
227 }
230 /* This loops to size-1 because if the last byte is a newline,
228 /* This loops to size-1 because if the last byte is a newline,
231 * we don't want to perform a split there. */
229 * we don't want to perform a split there. */
232 for (i = 0; i < size - 1; ++i) {
230 for (i = 0; i < size - 1; ++i) {
233 if (text[i] == '\n') {
231 if (text[i] == '\n') {
234 ++nelts;
232 ++nelts;
235 }
233 }
236 }
234 }
237 if ((result = PyList_New(nelts + 1)) == NULL)
235 if ((result = PyList_New(nelts + 1)) == NULL)
238 goto abort;
236 goto abort;
239 nelts = 0;
237 nelts = 0;
240 for (i = 0; i < size - 1; ++i) {
238 for (i = 0; i < size - 1; ++i) {
241 if (text[i] == '\n') {
239 if (text[i] == '\n') {
242 if (!sliceintolist(result, nelts++, text + start,
240 if (!sliceintolist(result, nelts++, text + start,
243 i - start + 1))
241 i - start + 1))
244 goto abort;
242 goto abort;
245 start = i + 1;
243 start = i + 1;
246 }
244 }
247 }
245 }
248 if (!sliceintolist(result, nelts++, text + start, size - start))
246 if (!sliceintolist(result, nelts++, text + start, size - start))
249 goto abort;
247 goto abort;
250 return result;
248 return result;
251 abort:
249 abort:
252 Py_XDECREF(result);
250 Py_XDECREF(result);
253 return NULL;
251 return NULL;
254 }
252 }
255
253
256 static int hunk_consumer(int64_t a1, int64_t a2, int64_t b1, int64_t b2,
254 static int hunk_consumer(int64_t a1, int64_t a2, int64_t b1, int64_t b2,
257 void *priv)
255 void *priv)
258 {
256 {
259 PyObject *rl = (PyObject *)priv;
257 PyObject *rl = (PyObject *)priv;
260 PyObject *m = Py_BuildValue("LLLL", a1, a2, b1, b2);
258 PyObject *m = Py_BuildValue("LLLL", a1, a2, b1, b2);
261 if (!m)
259 if (!m)
262 return -1;
260 return -1;
263 if (PyList_Append(rl, m) != 0) {
261 if (PyList_Append(rl, m) != 0) {
264 Py_DECREF(m);
262 Py_DECREF(m);
265 return -1;
263 return -1;
266 }
264 }
267 return 0;
265 return 0;
268 }
266 }
269
267
270 static PyObject *xdiffblocks(PyObject *self, PyObject *args)
268 static PyObject *xdiffblocks(PyObject *self, PyObject *args)
271 {
269 {
272 Py_ssize_t la, lb;
270 Py_ssize_t la, lb;
273 mmfile_t a, b;
271 mmfile_t a, b;
274 PyObject *rl;
272 PyObject *rl;
275
273
276 xpparam_t xpp = {
274 xpparam_t xpp = {
277 XDF_INDENT_HEURISTIC, /* flags */
275 XDF_INDENT_HEURISTIC, /* flags */
278 };
276 };
279 xdemitconf_t xecfg = {
277 xdemitconf_t xecfg = {
280 XDL_EMIT_BDIFFHUNK, /* flags */
278 XDL_EMIT_BDIFFHUNK, /* flags */
281 hunk_consumer, /* hunk_consume_func */
279 hunk_consumer, /* hunk_consume_func */
282 };
280 };
283 xdemitcb_t ecb = {
281 xdemitcb_t ecb = {
284 NULL, /* priv */
282 NULL, /* priv */
285 };
283 };
286
284
287 if (!PyArg_ParseTuple(args, PY23("s#s#", "y#y#"), &a.ptr, &la, &b.ptr,
285 if (!PyArg_ParseTuple(args, PY23("s#s#", "y#y#"), &a.ptr, &la, &b.ptr,
288 &lb))
286 &lb))
289 return NULL;
287 return NULL;
290
288
291 a.size = la;
289 a.size = la;
292 b.size = lb;
290 b.size = lb;
293
291
294 rl = PyList_New(0);
292 rl = PyList_New(0);
295 if (!rl)
293 if (!rl)
296 return PyErr_NoMemory();
294 return PyErr_NoMemory();
297
295
298 ecb.priv = rl;
296 ecb.priv = rl;
299
297
300 if (xdl_diff(&a, &b, &xpp, &xecfg, &ecb) != 0) {
298 if (xdl_diff(&a, &b, &xpp, &xecfg, &ecb) != 0) {
301 Py_DECREF(rl);
299 Py_DECREF(rl);
302 return PyErr_NoMemory();
300 return PyErr_NoMemory();
303 }
301 }
304
302
305 return rl;
303 return rl;
306 }
304 }
307
305
308 static char mdiff_doc[] = "Efficient binary diff.";
306 static char mdiff_doc[] = "Efficient binary diff.";
309
307
310 static PyMethodDef methods[] = {
308 static PyMethodDef methods[] = {
311 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
309 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
312 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
310 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
313 {"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"},
311 {"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"},
314 {"splitnewlines", splitnewlines, METH_VARARGS,
312 {"splitnewlines", splitnewlines, METH_VARARGS,
315 "like str.splitlines, but only split on newlines\n"},
313 "like str.splitlines, but only split on newlines\n"},
316 {"xdiffblocks", xdiffblocks, METH_VARARGS,
314 {"xdiffblocks", xdiffblocks, METH_VARARGS,
317 "find a list of matching lines using xdiff algorithm\n"},
315 "find a list of matching lines using xdiff algorithm\n"},
318 {NULL, NULL},
316 {NULL, NULL},
319 };
317 };
320
318
321 static const int version = 3;
319 static const int version = 3;
322
320
323 #ifdef IS_PY3K
321 #ifdef IS_PY3K
324 static struct PyModuleDef bdiff_module = {
322 static struct PyModuleDef bdiff_module = {
325 PyModuleDef_HEAD_INIT, "bdiff", mdiff_doc, -1, methods,
323 PyModuleDef_HEAD_INIT, "bdiff", mdiff_doc, -1, methods,
326 };
324 };
327
325
328 PyMODINIT_FUNC PyInit_bdiff(void)
326 PyMODINIT_FUNC PyInit_bdiff(void)
329 {
327 {
330 PyObject *m;
328 PyObject *m;
331 m = PyModule_Create(&bdiff_module);
329 m = PyModule_Create(&bdiff_module);
332 PyModule_AddIntConstant(m, "version", version);
330 PyModule_AddIntConstant(m, "version", version);
333 return m;
331 return m;
334 }
332 }
335 #else
333 #else
336 PyMODINIT_FUNC initbdiff(void)
334 PyMODINIT_FUNC initbdiff(void)
337 {
335 {
338 PyObject *m;
336 PyObject *m;
339 m = Py_InitModule3("bdiff", methods, mdiff_doc);
337 m = Py_InitModule3("bdiff", methods, mdiff_doc);
340 PyModule_AddIntConstant(m, "version", version);
338 PyModule_AddIntConstant(m, "version", version);
341 }
339 }
342 #endif
340 #endif
General Comments 0
You need to be logged in to leave comments. Login now