##// END OF EJS Templates
bdiff: convert more longs to int64_t...
Matt Harbison -
r36934:1b9f6440 default
parent child Browse files
Show More
@@ -1,345 +1,346 b''
1 /*
1 /*
2 bdiff.c - efficient binary diff extension for Mercurial
2 bdiff.c - efficient binary diff extension for Mercurial
3
3
4 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8
8
9 Based roughly on Python difflib
9 Based roughly on Python difflib
10 */
10 */
11
11
12 #define PY_SSIZE_T_CLEAN
12 #define PY_SSIZE_T_CLEAN
13 #include <Python.h>
13 #include <Python.h>
14 #include <limits.h>
14 #include <limits.h>
15 #include <stdlib.h>
15 #include <stdlib.h>
16 #include <string.h>
16 #include <string.h>
17
17
18 #include "bdiff.h"
18 #include "bdiff.h"
19 #include "bitmanipulation.h"
19 #include "bitmanipulation.h"
20 #include "thirdparty/xdiff/xdiff.h"
20 #include "thirdparty/xdiff/xdiff.h"
21 #include "util.h"
21 #include "util.h"
22
22
23 static PyObject *blocks(PyObject *self, PyObject *args)
23 static PyObject *blocks(PyObject *self, PyObject *args)
24 {
24 {
25 PyObject *sa, *sb, *rl = NULL, *m;
25 PyObject *sa, *sb, *rl = NULL, *m;
26 struct bdiff_line *a, *b;
26 struct bdiff_line *a, *b;
27 struct bdiff_hunk l, *h;
27 struct bdiff_hunk l, *h;
28 int an, bn, count, pos = 0;
28 int an, bn, count, pos = 0;
29
29
30 l.next = NULL;
30 l.next = NULL;
31
31
32 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
32 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
33 return NULL;
33 return NULL;
34
34
35 an = bdiff_splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a);
35 an = bdiff_splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a);
36 bn = bdiff_splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b);
36 bn = bdiff_splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b);
37
37
38 if (!a || !b)
38 if (!a || !b)
39 goto nomem;
39 goto nomem;
40
40
41 count = bdiff_diff(a, an, b, bn, &l);
41 count = bdiff_diff(a, an, b, bn, &l);
42 if (count < 0)
42 if (count < 0)
43 goto nomem;
43 goto nomem;
44
44
45 rl = PyList_New(count);
45 rl = PyList_New(count);
46 if (!rl)
46 if (!rl)
47 goto nomem;
47 goto nomem;
48
48
49 for (h = l.next; h; h = h->next) {
49 for (h = l.next; h; h = h->next) {
50 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
50 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
51 PyList_SetItem(rl, pos, m);
51 PyList_SetItem(rl, pos, m);
52 pos++;
52 pos++;
53 }
53 }
54
54
55 nomem:
55 nomem:
56 free(a);
56 free(a);
57 free(b);
57 free(b);
58 bdiff_freehunks(l.next);
58 bdiff_freehunks(l.next);
59 return rl ? rl : PyErr_NoMemory();
59 return rl ? rl : PyErr_NoMemory();
60 }
60 }
61
61
62 static PyObject *bdiff(PyObject *self, PyObject *args)
62 static PyObject *bdiff(PyObject *self, PyObject *args)
63 {
63 {
64 Py_buffer ba, bb;
64 Py_buffer ba, bb;
65 char *rb, *ia, *ib;
65 char *rb, *ia, *ib;
66 PyObject *result = NULL;
66 PyObject *result = NULL;
67 struct bdiff_line *al = NULL, *bl = NULL;
67 struct bdiff_line *al = NULL, *bl = NULL;
68 struct bdiff_hunk l, *h;
68 struct bdiff_hunk l, *h;
69 int an, bn, count;
69 int an, bn, count;
70 Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lmax;
70 Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lmax;
71 PyThreadState *_save = NULL;
71 PyThreadState *_save = NULL;
72
72
73 l.next = NULL;
73 l.next = NULL;
74
74
75 if (!PyArg_ParseTuple(args, PY23("s*s*:bdiff", "y*y*:bdiff"), &ba, &bb))
75 if (!PyArg_ParseTuple(args, PY23("s*s*:bdiff", "y*y*:bdiff"), &ba, &bb))
76 return NULL;
76 return NULL;
77
77
78 if (!PyBuffer_IsContiguous(&ba, 'C') || ba.ndim > 1) {
78 if (!PyBuffer_IsContiguous(&ba, 'C') || ba.ndim > 1) {
79 PyErr_SetString(PyExc_ValueError, "bdiff input not contiguous");
79 PyErr_SetString(PyExc_ValueError, "bdiff input not contiguous");
80 goto cleanup;
80 goto cleanup;
81 }
81 }
82
82
83 if (!PyBuffer_IsContiguous(&bb, 'C') || bb.ndim > 1) {
83 if (!PyBuffer_IsContiguous(&bb, 'C') || bb.ndim > 1) {
84 PyErr_SetString(PyExc_ValueError, "bdiff input not contiguous");
84 PyErr_SetString(PyExc_ValueError, "bdiff input not contiguous");
85 goto cleanup;
85 goto cleanup;
86 }
86 }
87
87
88 la = ba.len;
88 la = ba.len;
89 lb = bb.len;
89 lb = bb.len;
90
90
91 if (la > UINT_MAX || lb > UINT_MAX) {
91 if (la > UINT_MAX || lb > UINT_MAX) {
92 PyErr_SetString(PyExc_ValueError, "bdiff inputs too large");
92 PyErr_SetString(PyExc_ValueError, "bdiff inputs too large");
93 goto cleanup;
93 goto cleanup;
94 }
94 }
95
95
96 _save = PyEval_SaveThread();
96 _save = PyEval_SaveThread();
97
97
98 lmax = la > lb ? lb : la;
98 lmax = la > lb ? lb : la;
99 for (ia = ba.buf, ib = bb.buf; li < lmax && *ia == *ib;
99 for (ia = ba.buf, ib = bb.buf; li < lmax && *ia == *ib;
100 ++li, ++ia, ++ib) {
100 ++li, ++ia, ++ib) {
101 if (*ia == '\n')
101 if (*ia == '\n')
102 lcommon = li + 1;
102 lcommon = li + 1;
103 }
103 }
104 /* we can almost add: if (li == lmax) lcommon = li; */
104 /* we can almost add: if (li == lmax) lcommon = li; */
105
105
106 an = bdiff_splitlines((char *)ba.buf + lcommon, la - lcommon, &al);
106 an = bdiff_splitlines((char *)ba.buf + lcommon, la - lcommon, &al);
107 bn = bdiff_splitlines((char *)bb.buf + lcommon, lb - lcommon, &bl);
107 bn = bdiff_splitlines((char *)bb.buf + lcommon, lb - lcommon, &bl);
108 if (!al || !bl) {
108 if (!al || !bl) {
109 PyErr_NoMemory();
109 PyErr_NoMemory();
110 goto cleanup;
110 goto cleanup;
111 }
111 }
112
112
113 count = bdiff_diff(al, an, bl, bn, &l);
113 count = bdiff_diff(al, an, bl, bn, &l);
114 if (count < 0) {
114 if (count < 0) {
115 PyErr_NoMemory();
115 PyErr_NoMemory();
116 goto cleanup;
116 goto cleanup;
117 }
117 }
118
118
119 /* calculate length of output */
119 /* calculate length of output */
120 la = lb = 0;
120 la = lb = 0;
121 for (h = l.next; h; h = h->next) {
121 for (h = l.next; h; h = h->next) {
122 if (h->a1 != la || h->b1 != lb)
122 if (h->a1 != la || h->b1 != lb)
123 len += 12 + bl[h->b1].l - bl[lb].l;
123 len += 12 + bl[h->b1].l - bl[lb].l;
124 la = h->a2;
124 la = h->a2;
125 lb = h->b2;
125 lb = h->b2;
126 }
126 }
127 PyEval_RestoreThread(_save);
127 PyEval_RestoreThread(_save);
128 _save = NULL;
128 _save = NULL;
129
129
130 result = PyBytes_FromStringAndSize(NULL, len);
130 result = PyBytes_FromStringAndSize(NULL, len);
131
131
132 if (!result)
132 if (!result)
133 goto cleanup;
133 goto cleanup;
134
134
135 /* build binary patch */
135 /* build binary patch */
136 rb = PyBytes_AsString(result);
136 rb = PyBytes_AsString(result);
137 la = lb = 0;
137 la = lb = 0;
138
138
139 for (h = l.next; h; h = h->next) {
139 for (h = l.next; h; h = h->next) {
140 if (h->a1 != la || h->b1 != lb) {
140 if (h->a1 != la || h->b1 != lb) {
141 len = bl[h->b1].l - bl[lb].l;
141 len = bl[h->b1].l - bl[lb].l;
142 putbe32((uint32_t)(al[la].l + lcommon - al->l), rb);
142 putbe32((uint32_t)(al[la].l + lcommon - al->l), rb);
143 putbe32((uint32_t)(al[h->a1].l + lcommon - al->l),
143 putbe32((uint32_t)(al[h->a1].l + lcommon - al->l),
144 rb + 4);
144 rb + 4);
145 putbe32((uint32_t)len, rb + 8);
145 putbe32((uint32_t)len, rb + 8);
146 memcpy(rb + 12, bl[lb].l, len);
146 memcpy(rb + 12, bl[lb].l, len);
147 rb += 12 + len;
147 rb += 12 + len;
148 }
148 }
149 la = h->a2;
149 la = h->a2;
150 lb = h->b2;
150 lb = h->b2;
151 }
151 }
152
152
153 cleanup:
153 cleanup:
154 if (_save)
154 if (_save)
155 PyEval_RestoreThread(_save);
155 PyEval_RestoreThread(_save);
156 PyBuffer_Release(&ba);
156 PyBuffer_Release(&ba);
157 PyBuffer_Release(&bb);
157 PyBuffer_Release(&bb);
158 if (al) {
158 if (al) {
159 free(al);
159 free(al);
160 }
160 }
161 if (bl) {
161 if (bl) {
162 free(bl);
162 free(bl);
163 }
163 }
164 if (l.next) {
164 if (l.next) {
165 bdiff_freehunks(l.next);
165 bdiff_freehunks(l.next);
166 }
166 }
167 return result;
167 return result;
168 }
168 }
169
169
170 /*
170 /*
171 * If allws != 0, remove all whitespace (' ', \t and \r). Otherwise,
171 * If allws != 0, remove all whitespace (' ', \t and \r). Otherwise,
172 * reduce whitespace sequences to a single space and trim remaining whitespace
172 * reduce whitespace sequences to a single space and trim remaining whitespace
173 * from end of lines.
173 * from end of lines.
174 */
174 */
175 static PyObject *fixws(PyObject *self, PyObject *args)
175 static PyObject *fixws(PyObject *self, PyObject *args)
176 {
176 {
177 PyObject *s, *result = NULL;
177 PyObject *s, *result = NULL;
178 char allws, c;
178 char allws, c;
179 const char *r;
179 const char *r;
180 Py_ssize_t i, rlen, wlen = 0;
180 Py_ssize_t i, rlen, wlen = 0;
181 char *w;
181 char *w;
182
182
183 if (!PyArg_ParseTuple(args, "Sb:fixws", &s, &allws))
183 if (!PyArg_ParseTuple(args, "Sb:fixws", &s, &allws))
184 return NULL;
184 return NULL;
185 r = PyBytes_AsString(s);
185 r = PyBytes_AsString(s);
186 rlen = PyBytes_Size(s);
186 rlen = PyBytes_Size(s);
187
187
188 w = (char *)PyMem_Malloc(rlen ? rlen : 1);
188 w = (char *)PyMem_Malloc(rlen ? rlen : 1);
189 if (!w)
189 if (!w)
190 goto nomem;
190 goto nomem;
191
191
192 for (i = 0; i != rlen; i++) {
192 for (i = 0; i != rlen; i++) {
193 c = r[i];
193 c = r[i];
194 if (c == ' ' || c == '\t' || c == '\r') {
194 if (c == ' ' || c == '\t' || c == '\r') {
195 if (!allws && (wlen == 0 || w[wlen - 1] != ' '))
195 if (!allws && (wlen == 0 || w[wlen - 1] != ' '))
196 w[wlen++] = ' ';
196 w[wlen++] = ' ';
197 } else if (c == '\n' && !allws && wlen > 0 &&
197 } else if (c == '\n' && !allws && wlen > 0 &&
198 w[wlen - 1] == ' ') {
198 w[wlen - 1] == ' ') {
199 w[wlen - 1] = '\n';
199 w[wlen - 1] = '\n';
200 } else {
200 } else {
201 w[wlen++] = c;
201 w[wlen++] = c;
202 }
202 }
203 }
203 }
204
204
205 result = PyBytes_FromStringAndSize(w, wlen);
205 result = PyBytes_FromStringAndSize(w, wlen);
206
206
207 nomem:
207 nomem:
208 PyMem_Free(w);
208 PyMem_Free(w);
209 return result ? result : PyErr_NoMemory();
209 return result ? result : PyErr_NoMemory();
210 }
210 }
211
211
212 static bool sliceintolist(PyObject *list, Py_ssize_t destidx,
212 static bool sliceintolist(PyObject *list, Py_ssize_t destidx,
213 const char *source, Py_ssize_t len)
213 const char *source, Py_ssize_t len)
214 {
214 {
215 PyObject *sliced = PyBytes_FromStringAndSize(source, len);
215 PyObject *sliced = PyBytes_FromStringAndSize(source, len);
216 if (sliced == NULL)
216 if (sliced == NULL)
217 return false;
217 return false;
218 PyList_SET_ITEM(list, destidx, sliced);
218 PyList_SET_ITEM(list, destidx, sliced);
219 return true;
219 return true;
220 }
220 }
221
221
222 static PyObject *splitnewlines(PyObject *self, PyObject *args)
222 static PyObject *splitnewlines(PyObject *self, PyObject *args)
223 {
223 {
224 const char *text;
224 const char *text;
225 Py_ssize_t nelts = 0, size, i, start = 0;
225 Py_ssize_t nelts = 0, size, i, start = 0;
226 PyObject *result = NULL;
226 PyObject *result = NULL;
227
227
228 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &text, &size)) {
228 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &text, &size)) {
229 goto abort;
229 goto abort;
230 }
230 }
231 if (!size) {
231 if (!size) {
232 return PyList_New(0);
232 return PyList_New(0);
233 }
233 }
234 /* This loops to size-1 because if the last byte is a newline,
234 /* This loops to size-1 because if the last byte is a newline,
235 * we don't want to perform a split there. */
235 * we don't want to perform a split there. */
236 for (i = 0; i < size - 1; ++i) {
236 for (i = 0; i < size - 1; ++i) {
237 if (text[i] == '\n') {
237 if (text[i] == '\n') {
238 ++nelts;
238 ++nelts;
239 }
239 }
240 }
240 }
241 if ((result = PyList_New(nelts + 1)) == NULL)
241 if ((result = PyList_New(nelts + 1)) == NULL)
242 goto abort;
242 goto abort;
243 nelts = 0;
243 nelts = 0;
244 for (i = 0; i < size - 1; ++i) {
244 for (i = 0; i < size - 1; ++i) {
245 if (text[i] == '\n') {
245 if (text[i] == '\n') {
246 if (!sliceintolist(result, nelts++, text + start,
246 if (!sliceintolist(result, nelts++, text + start,
247 i - start + 1))
247 i - start + 1))
248 goto abort;
248 goto abort;
249 start = i + 1;
249 start = i + 1;
250 }
250 }
251 }
251 }
252 if (!sliceintolist(result, nelts++, text + start, size - start))
252 if (!sliceintolist(result, nelts++, text + start, size - start))
253 goto abort;
253 goto abort;
254 return result;
254 return result;
255 abort:
255 abort:
256 Py_XDECREF(result);
256 Py_XDECREF(result);
257 return NULL;
257 return NULL;
258 }
258 }
259
259
260 static int hunk_consumer(long a1, long a2, long b1, long b2, void *priv)
260 static int hunk_consumer(int64_t a1, int64_t a2, int64_t b1, int64_t b2,
261 void *priv)
261 {
262 {
262 PyObject *rl = (PyObject *)priv;
263 PyObject *rl = (PyObject *)priv;
263 PyObject *m = Py_BuildValue("llll", a1, a2, b1, b2);
264 PyObject *m = Py_BuildValue("llll", a1, a2, b1, b2);
264 if (!m)
265 if (!m)
265 return -1;
266 return -1;
266 if (PyList_Append(rl, m) != 0) {
267 if (PyList_Append(rl, m) != 0) {
267 Py_DECREF(m);
268 Py_DECREF(m);
268 return -1;
269 return -1;
269 }
270 }
270 return 0;
271 return 0;
271 }
272 }
272
273
273 static PyObject *xdiffblocks(PyObject *self, PyObject *args)
274 static PyObject *xdiffblocks(PyObject *self, PyObject *args)
274 {
275 {
275 Py_ssize_t la, lb;
276 Py_ssize_t la, lb;
276 mmfile_t a, b;
277 mmfile_t a, b;
277 PyObject *rl;
278 PyObject *rl;
278
279
279 xpparam_t xpp = {
280 xpparam_t xpp = {
280 XDF_INDENT_HEURISTIC, /* flags */
281 XDF_INDENT_HEURISTIC, /* flags */
281 };
282 };
282 xdemitconf_t xecfg = {
283 xdemitconf_t xecfg = {
283 XDL_EMIT_BDIFFHUNK, /* flags */
284 XDL_EMIT_BDIFFHUNK, /* flags */
284 hunk_consumer, /* hunk_consume_func */
285 hunk_consumer, /* hunk_consume_func */
285 };
286 };
286 xdemitcb_t ecb = {
287 xdemitcb_t ecb = {
287 NULL, /* priv */
288 NULL, /* priv */
288 };
289 };
289
290
290 if (!PyArg_ParseTuple(args, PY23("s#s#", "y#y#"), &a.ptr, &la, &b.ptr,
291 if (!PyArg_ParseTuple(args, PY23("s#s#", "y#y#"), &a.ptr, &la, &b.ptr,
291 &lb))
292 &lb))
292 return NULL;
293 return NULL;
293
294
294 a.size = la;
295 a.size = la;
295 b.size = lb;
296 b.size = lb;
296
297
297 rl = PyList_New(0);
298 rl = PyList_New(0);
298 if (!rl)
299 if (!rl)
299 return PyErr_NoMemory();
300 return PyErr_NoMemory();
300
301
301 ecb.priv = rl;
302 ecb.priv = rl;
302
303
303 if (xdl_diff(&a, &b, &xpp, &xecfg, &ecb) != 0) {
304 if (xdl_diff(&a, &b, &xpp, &xecfg, &ecb) != 0) {
304 Py_DECREF(rl);
305 Py_DECREF(rl);
305 return PyErr_NoMemory();
306 return PyErr_NoMemory();
306 }
307 }
307
308
308 return rl;
309 return rl;
309 }
310 }
310
311
311 static char mdiff_doc[] = "Efficient binary diff.";
312 static char mdiff_doc[] = "Efficient binary diff.";
312
313
313 static PyMethodDef methods[] = {
314 static PyMethodDef methods[] = {
314 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
315 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
315 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
316 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
316 {"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"},
317 {"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"},
317 {"splitnewlines", splitnewlines, METH_VARARGS,
318 {"splitnewlines", splitnewlines, METH_VARARGS,
318 "like str.splitlines, but only split on newlines\n"},
319 "like str.splitlines, but only split on newlines\n"},
319 {"xdiffblocks", xdiffblocks, METH_VARARGS,
320 {"xdiffblocks", xdiffblocks, METH_VARARGS,
320 "find a list of matching lines using xdiff algorithm\n"},
321 "find a list of matching lines using xdiff algorithm\n"},
321 {NULL, NULL},
322 {NULL, NULL},
322 };
323 };
323
324
324 static const int version = 3;
325 static const int version = 3;
325
326
326 #ifdef IS_PY3K
327 #ifdef IS_PY3K
327 static struct PyModuleDef bdiff_module = {
328 static struct PyModuleDef bdiff_module = {
328 PyModuleDef_HEAD_INIT, "bdiff", mdiff_doc, -1, methods,
329 PyModuleDef_HEAD_INIT, "bdiff", mdiff_doc, -1, methods,
329 };
330 };
330
331
331 PyMODINIT_FUNC PyInit_bdiff(void)
332 PyMODINIT_FUNC PyInit_bdiff(void)
332 {
333 {
333 PyObject *m;
334 PyObject *m;
334 m = PyModule_Create(&bdiff_module);
335 m = PyModule_Create(&bdiff_module);
335 PyModule_AddIntConstant(m, "version", version);
336 PyModule_AddIntConstant(m, "version", version);
336 return m;
337 return m;
337 }
338 }
338 #else
339 #else
339 PyMODINIT_FUNC initbdiff(void)
340 PyMODINIT_FUNC initbdiff(void)
340 {
341 {
341 PyObject *m;
342 PyObject *m;
342 m = Py_InitModule3("bdiff", methods, mdiff_doc);
343 m = Py_InitModule3("bdiff", methods, mdiff_doc);
343 PyModule_AddIntConstant(m, "version", version);
344 PyModule_AddIntConstant(m, "version", version);
344 }
345 }
345 #endif
346 #endif
General Comments 0
You need to be logged in to leave comments. Login now