##// END OF EJS Templates
xdiff: fix leak in hunk_consumer()...
Yuya Nishihara -
r39484:ad76032d stable
parent child Browse files
Show More
@@ -1,340 +1,339 b''
1 1 /*
2 2 bdiff.c - efficient binary diff extension for Mercurial
3 3
4 4 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8
9 9 Based roughly on Python difflib
10 10 */
11 11
12 12 #define PY_SSIZE_T_CLEAN
13 13 #include <Python.h>
14 14 #include <limits.h>
15 15 #include <stdlib.h>
16 16 #include <string.h>
17 17
18 18 #include "bdiff.h"
19 19 #include "bitmanipulation.h"
20 20 #include "thirdparty/xdiff/xdiff.h"
21 21 #include "util.h"
22 22
23 23 static PyObject *blocks(PyObject *self, PyObject *args)
24 24 {
25 25 PyObject *sa, *sb, *rl = NULL, *m;
26 26 struct bdiff_line *a, *b;
27 27 struct bdiff_hunk l, *h;
28 28 int an, bn, count, pos = 0;
29 29
30 30 l.next = NULL;
31 31
32 32 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
33 33 return NULL;
34 34
35 35 an = bdiff_splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a);
36 36 bn = bdiff_splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b);
37 37
38 38 if (!a || !b)
39 39 goto nomem;
40 40
41 41 count = bdiff_diff(a, an, b, bn, &l);
42 42 if (count < 0)
43 43 goto nomem;
44 44
45 45 rl = PyList_New(count);
46 46 if (!rl)
47 47 goto nomem;
48 48
49 49 for (h = l.next; h; h = h->next) {
50 50 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
51 51 PyList_SetItem(rl, pos, m);
52 52 pos++;
53 53 }
54 54
55 55 nomem:
56 56 free(a);
57 57 free(b);
58 58 bdiff_freehunks(l.next);
59 59 return rl ? rl : PyErr_NoMemory();
60 60 }
61 61
62 62 static PyObject *bdiff(PyObject *self, PyObject *args)
63 63 {
64 64 Py_buffer ba, bb;
65 65 char *rb, *ia, *ib;
66 66 PyObject *result = NULL;
67 67 struct bdiff_line *al = NULL, *bl = NULL;
68 68 struct bdiff_hunk l, *h;
69 69 int an, bn, count;
70 70 Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lmax;
71 71 PyThreadState *_save = NULL;
72 72
73 73 l.next = NULL;
74 74
75 75 if (!PyArg_ParseTuple(args, PY23("s*s*:bdiff", "y*y*:bdiff"), &ba, &bb))
76 76 return NULL;
77 77
78 78 if (!PyBuffer_IsContiguous(&ba, 'C') || ba.ndim > 1) {
79 79 PyErr_SetString(PyExc_ValueError, "bdiff input not contiguous");
80 80 goto cleanup;
81 81 }
82 82
83 83 if (!PyBuffer_IsContiguous(&bb, 'C') || bb.ndim > 1) {
84 84 PyErr_SetString(PyExc_ValueError, "bdiff input not contiguous");
85 85 goto cleanup;
86 86 }
87 87
88 88 la = ba.len;
89 89 lb = bb.len;
90 90
91 91 if (la > UINT_MAX || lb > UINT_MAX) {
92 92 PyErr_SetString(PyExc_ValueError, "bdiff inputs too large");
93 93 goto cleanup;
94 94 }
95 95
96 96 _save = PyEval_SaveThread();
97 97
98 98 lmax = la > lb ? lb : la;
99 99 for (ia = ba.buf, ib = bb.buf; li < lmax && *ia == *ib;
100 100 ++li, ++ia, ++ib) {
101 101 if (*ia == '\n')
102 102 lcommon = li + 1;
103 103 }
104 104 /* we can almost add: if (li == lmax) lcommon = li; */
105 105
106 106 an = bdiff_splitlines((char *)ba.buf + lcommon, la - lcommon, &al);
107 107 bn = bdiff_splitlines((char *)bb.buf + lcommon, lb - lcommon, &bl);
108 108 if (!al || !bl) {
109 109 PyErr_NoMemory();
110 110 goto cleanup;
111 111 }
112 112
113 113 count = bdiff_diff(al, an, bl, bn, &l);
114 114 if (count < 0) {
115 115 PyErr_NoMemory();
116 116 goto cleanup;
117 117 }
118 118
119 119 /* calculate length of output */
120 120 la = lb = 0;
121 121 for (h = l.next; h; h = h->next) {
122 122 if (h->a1 != la || h->b1 != lb)
123 123 len += 12 + bl[h->b1].l - bl[lb].l;
124 124 la = h->a2;
125 125 lb = h->b2;
126 126 }
127 127 PyEval_RestoreThread(_save);
128 128 _save = NULL;
129 129
130 130 result = PyBytes_FromStringAndSize(NULL, len);
131 131
132 132 if (!result)
133 133 goto cleanup;
134 134
135 135 /* build binary patch */
136 136 rb = PyBytes_AsString(result);
137 137 la = lb = 0;
138 138
139 139 for (h = l.next; h; h = h->next) {
140 140 if (h->a1 != la || h->b1 != lb) {
141 141 len = bl[h->b1].l - bl[lb].l;
142 142 putbe32((uint32_t)(al[la].l + lcommon - al->l), rb);
143 143 putbe32((uint32_t)(al[h->a1].l + lcommon - al->l),
144 144 rb + 4);
145 145 putbe32((uint32_t)len, rb + 8);
146 146 memcpy(rb + 12, bl[lb].l, len);
147 147 rb += 12 + len;
148 148 }
149 149 la = h->a2;
150 150 lb = h->b2;
151 151 }
152 152
153 153 cleanup:
154 154 if (_save)
155 155 PyEval_RestoreThread(_save);
156 156 PyBuffer_Release(&ba);
157 157 PyBuffer_Release(&bb);
158 158 free(al);
159 159 free(bl);
160 160 bdiff_freehunks(l.next);
161 161 return result;
162 162 }
163 163
164 164 /*
165 165 * If allws != 0, remove all whitespace (' ', \t and \r). Otherwise,
166 166 * reduce whitespace sequences to a single space and trim remaining whitespace
167 167 * from end of lines.
168 168 */
169 169 static PyObject *fixws(PyObject *self, PyObject *args)
170 170 {
171 171 PyObject *s, *result = NULL;
172 172 char allws, c;
173 173 const char *r;
174 174 Py_ssize_t i, rlen, wlen = 0;
175 175 char *w;
176 176
177 177 if (!PyArg_ParseTuple(args, "Sb:fixws", &s, &allws))
178 178 return NULL;
179 179 r = PyBytes_AsString(s);
180 180 rlen = PyBytes_Size(s);
181 181
182 182 w = (char *)PyMem_Malloc(rlen ? rlen : 1);
183 183 if (!w)
184 184 goto nomem;
185 185
186 186 for (i = 0; i != rlen; i++) {
187 187 c = r[i];
188 188 if (c == ' ' || c == '\t' || c == '\r') {
189 189 if (!allws && (wlen == 0 || w[wlen - 1] != ' '))
190 190 w[wlen++] = ' ';
191 191 } else if (c == '\n' && !allws && wlen > 0 &&
192 192 w[wlen - 1] == ' ') {
193 193 w[wlen - 1] = '\n';
194 194 } else {
195 195 w[wlen++] = c;
196 196 }
197 197 }
198 198
199 199 result = PyBytes_FromStringAndSize(w, wlen);
200 200
201 201 nomem:
202 202 PyMem_Free(w);
203 203 return result ? result : PyErr_NoMemory();
204 204 }
205 205
206 206 static bool sliceintolist(PyObject *list, Py_ssize_t destidx,
207 207 const char *source, Py_ssize_t len)
208 208 {
209 209 PyObject *sliced = PyBytes_FromStringAndSize(source, len);
210 210 if (sliced == NULL)
211 211 return false;
212 212 PyList_SET_ITEM(list, destidx, sliced);
213 213 return true;
214 214 }
215 215
216 216 static PyObject *splitnewlines(PyObject *self, PyObject *args)
217 217 {
218 218 const char *text;
219 219 Py_ssize_t nelts = 0, size, i, start = 0;
220 220 PyObject *result = NULL;
221 221
222 222 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &text, &size)) {
223 223 goto abort;
224 224 }
225 225 if (!size) {
226 226 return PyList_New(0);
227 227 }
228 228 /* This loops to size-1 because if the last byte is a newline,
229 229 * we don't want to perform a split there. */
230 230 for (i = 0; i < size - 1; ++i) {
231 231 if (text[i] == '\n') {
232 232 ++nelts;
233 233 }
234 234 }
235 235 if ((result = PyList_New(nelts + 1)) == NULL)
236 236 goto abort;
237 237 nelts = 0;
238 238 for (i = 0; i < size - 1; ++i) {
239 239 if (text[i] == '\n') {
240 240 if (!sliceintolist(result, nelts++, text + start,
241 241 i - start + 1))
242 242 goto abort;
243 243 start = i + 1;
244 244 }
245 245 }
246 246 if (!sliceintolist(result, nelts++, text + start, size - start))
247 247 goto abort;
248 248 return result;
249 249 abort:
250 250 Py_XDECREF(result);
251 251 return NULL;
252 252 }
253 253
254 254 static int hunk_consumer(int64_t a1, int64_t a2, int64_t b1, int64_t b2,
255 255 void *priv)
256 256 {
257 257 PyObject *rl = (PyObject *)priv;
258 258 PyObject *m = Py_BuildValue("LLLL", a1, a2, b1, b2);
259 int r;
259 260 if (!m)
260 261 return -1;
261 if (PyList_Append(rl, m) != 0) {
262 Py_DECREF(m);
263 return -1;
264 }
265 return 0;
262 r = PyList_Append(rl, m);
263 Py_DECREF(m);
264 return r;
266 265 }
267 266
268 267 static PyObject *xdiffblocks(PyObject *self, PyObject *args)
269 268 {
270 269 Py_ssize_t la, lb;
271 270 mmfile_t a, b;
272 271 PyObject *rl;
273 272
274 273 xpparam_t xpp = {
275 274 XDF_INDENT_HEURISTIC, /* flags */
276 275 };
277 276 xdemitconf_t xecfg = {
278 277 XDL_EMIT_BDIFFHUNK, /* flags */
279 278 hunk_consumer, /* hunk_consume_func */
280 279 };
281 280 xdemitcb_t ecb = {
282 281 NULL, /* priv */
283 282 };
284 283
285 284 if (!PyArg_ParseTuple(args, PY23("s#s#", "y#y#"), &a.ptr, &la, &b.ptr,
286 285 &lb))
287 286 return NULL;
288 287
289 288 a.size = la;
290 289 b.size = lb;
291 290
292 291 rl = PyList_New(0);
293 292 if (!rl)
294 293 return PyErr_NoMemory();
295 294
296 295 ecb.priv = rl;
297 296
298 297 if (xdl_diff(&a, &b, &xpp, &xecfg, &ecb) != 0) {
299 298 Py_DECREF(rl);
300 299 return PyErr_NoMemory();
301 300 }
302 301
303 302 return rl;
304 303 }
305 304
306 305 static char mdiff_doc[] = "Efficient binary diff.";
307 306
308 307 static PyMethodDef methods[] = {
309 308 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
310 309 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
311 310 {"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"},
312 311 {"splitnewlines", splitnewlines, METH_VARARGS,
313 312 "like str.splitlines, but only split on newlines\n"},
314 313 {"xdiffblocks", xdiffblocks, METH_VARARGS,
315 314 "find a list of matching lines using xdiff algorithm\n"},
316 315 {NULL, NULL},
317 316 };
318 317
319 318 static const int version = 3;
320 319
321 320 #ifdef IS_PY3K
322 321 static struct PyModuleDef bdiff_module = {
323 322 PyModuleDef_HEAD_INIT, "bdiff", mdiff_doc, -1, methods,
324 323 };
325 324
326 325 PyMODINIT_FUNC PyInit_bdiff(void)
327 326 {
328 327 PyObject *m;
329 328 m = PyModule_Create(&bdiff_module);
330 329 PyModule_AddIntConstant(m, "version", version);
331 330 return m;
332 331 }
333 332 #else
334 333 PyMODINIT_FUNC initbdiff(void)
335 334 {
336 335 PyObject *m;
337 336 m = Py_InitModule3("bdiff", methods, mdiff_doc);
338 337 PyModule_AddIntConstant(m, "version", version);
339 338 }
340 339 #endif
General Comments 0
You need to be logged in to leave comments. Login now