##// END OF EJS Templates
bdiff: convert more longs to int64_t...
Matt Harbison -
r36934:1b9f6440 default
parent child Browse files
Show More
@@ -1,345 +1,346 b''
1 1 /*
2 2 bdiff.c - efficient binary diff extension for Mercurial
3 3
4 4 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8
9 9 Based roughly on Python difflib
10 10 */
11 11
12 12 #define PY_SSIZE_T_CLEAN
13 13 #include <Python.h>
14 14 #include <limits.h>
15 15 #include <stdlib.h>
16 16 #include <string.h>
17 17
18 18 #include "bdiff.h"
19 19 #include "bitmanipulation.h"
20 20 #include "thirdparty/xdiff/xdiff.h"
21 21 #include "util.h"
22 22
23 23 static PyObject *blocks(PyObject *self, PyObject *args)
24 24 {
25 25 PyObject *sa, *sb, *rl = NULL, *m;
26 26 struct bdiff_line *a, *b;
27 27 struct bdiff_hunk l, *h;
28 28 int an, bn, count, pos = 0;
29 29
30 30 l.next = NULL;
31 31
32 32 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
33 33 return NULL;
34 34
35 35 an = bdiff_splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a);
36 36 bn = bdiff_splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b);
37 37
38 38 if (!a || !b)
39 39 goto nomem;
40 40
41 41 count = bdiff_diff(a, an, b, bn, &l);
42 42 if (count < 0)
43 43 goto nomem;
44 44
45 45 rl = PyList_New(count);
46 46 if (!rl)
47 47 goto nomem;
48 48
49 49 for (h = l.next; h; h = h->next) {
50 50 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
51 51 PyList_SetItem(rl, pos, m);
52 52 pos++;
53 53 }
54 54
55 55 nomem:
56 56 free(a);
57 57 free(b);
58 58 bdiff_freehunks(l.next);
59 59 return rl ? rl : PyErr_NoMemory();
60 60 }
61 61
62 62 static PyObject *bdiff(PyObject *self, PyObject *args)
63 63 {
64 64 Py_buffer ba, bb;
65 65 char *rb, *ia, *ib;
66 66 PyObject *result = NULL;
67 67 struct bdiff_line *al = NULL, *bl = NULL;
68 68 struct bdiff_hunk l, *h;
69 69 int an, bn, count;
70 70 Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lmax;
71 71 PyThreadState *_save = NULL;
72 72
73 73 l.next = NULL;
74 74
75 75 if (!PyArg_ParseTuple(args, PY23("s*s*:bdiff", "y*y*:bdiff"), &ba, &bb))
76 76 return NULL;
77 77
78 78 if (!PyBuffer_IsContiguous(&ba, 'C') || ba.ndim > 1) {
79 79 PyErr_SetString(PyExc_ValueError, "bdiff input not contiguous");
80 80 goto cleanup;
81 81 }
82 82
83 83 if (!PyBuffer_IsContiguous(&bb, 'C') || bb.ndim > 1) {
84 84 PyErr_SetString(PyExc_ValueError, "bdiff input not contiguous");
85 85 goto cleanup;
86 86 }
87 87
88 88 la = ba.len;
89 89 lb = bb.len;
90 90
91 91 if (la > UINT_MAX || lb > UINT_MAX) {
92 92 PyErr_SetString(PyExc_ValueError, "bdiff inputs too large");
93 93 goto cleanup;
94 94 }
95 95
96 96 _save = PyEval_SaveThread();
97 97
98 98 lmax = la > lb ? lb : la;
99 99 for (ia = ba.buf, ib = bb.buf; li < lmax && *ia == *ib;
100 100 ++li, ++ia, ++ib) {
101 101 if (*ia == '\n')
102 102 lcommon = li + 1;
103 103 }
104 104 /* we can almost add: if (li == lmax) lcommon = li; */
105 105
106 106 an = bdiff_splitlines((char *)ba.buf + lcommon, la - lcommon, &al);
107 107 bn = bdiff_splitlines((char *)bb.buf + lcommon, lb - lcommon, &bl);
108 108 if (!al || !bl) {
109 109 PyErr_NoMemory();
110 110 goto cleanup;
111 111 }
112 112
113 113 count = bdiff_diff(al, an, bl, bn, &l);
114 114 if (count < 0) {
115 115 PyErr_NoMemory();
116 116 goto cleanup;
117 117 }
118 118
119 119 /* calculate length of output */
120 120 la = lb = 0;
121 121 for (h = l.next; h; h = h->next) {
122 122 if (h->a1 != la || h->b1 != lb)
123 123 len += 12 + bl[h->b1].l - bl[lb].l;
124 124 la = h->a2;
125 125 lb = h->b2;
126 126 }
127 127 PyEval_RestoreThread(_save);
128 128 _save = NULL;
129 129
130 130 result = PyBytes_FromStringAndSize(NULL, len);
131 131
132 132 if (!result)
133 133 goto cleanup;
134 134
135 135 /* build binary patch */
136 136 rb = PyBytes_AsString(result);
137 137 la = lb = 0;
138 138
139 139 for (h = l.next; h; h = h->next) {
140 140 if (h->a1 != la || h->b1 != lb) {
141 141 len = bl[h->b1].l - bl[lb].l;
142 142 putbe32((uint32_t)(al[la].l + lcommon - al->l), rb);
143 143 putbe32((uint32_t)(al[h->a1].l + lcommon - al->l),
144 144 rb + 4);
145 145 putbe32((uint32_t)len, rb + 8);
146 146 memcpy(rb + 12, bl[lb].l, len);
147 147 rb += 12 + len;
148 148 }
149 149 la = h->a2;
150 150 lb = h->b2;
151 151 }
152 152
153 153 cleanup:
154 154 if (_save)
155 155 PyEval_RestoreThread(_save);
156 156 PyBuffer_Release(&ba);
157 157 PyBuffer_Release(&bb);
158 158 if (al) {
159 159 free(al);
160 160 }
161 161 if (bl) {
162 162 free(bl);
163 163 }
164 164 if (l.next) {
165 165 bdiff_freehunks(l.next);
166 166 }
167 167 return result;
168 168 }
169 169
170 170 /*
171 171 * If allws != 0, remove all whitespace (' ', \t and \r). Otherwise,
172 172 * reduce whitespace sequences to a single space and trim remaining whitespace
173 173 * from end of lines.
174 174 */
175 175 static PyObject *fixws(PyObject *self, PyObject *args)
176 176 {
177 177 PyObject *s, *result = NULL;
178 178 char allws, c;
179 179 const char *r;
180 180 Py_ssize_t i, rlen, wlen = 0;
181 181 char *w;
182 182
183 183 if (!PyArg_ParseTuple(args, "Sb:fixws", &s, &allws))
184 184 return NULL;
185 185 r = PyBytes_AsString(s);
186 186 rlen = PyBytes_Size(s);
187 187
188 188 w = (char *)PyMem_Malloc(rlen ? rlen : 1);
189 189 if (!w)
190 190 goto nomem;
191 191
192 192 for (i = 0; i != rlen; i++) {
193 193 c = r[i];
194 194 if (c == ' ' || c == '\t' || c == '\r') {
195 195 if (!allws && (wlen == 0 || w[wlen - 1] != ' '))
196 196 w[wlen++] = ' ';
197 197 } else if (c == '\n' && !allws && wlen > 0 &&
198 198 w[wlen - 1] == ' ') {
199 199 w[wlen - 1] = '\n';
200 200 } else {
201 201 w[wlen++] = c;
202 202 }
203 203 }
204 204
205 205 result = PyBytes_FromStringAndSize(w, wlen);
206 206
207 207 nomem:
208 208 PyMem_Free(w);
209 209 return result ? result : PyErr_NoMemory();
210 210 }
211 211
212 212 static bool sliceintolist(PyObject *list, Py_ssize_t destidx,
213 213 const char *source, Py_ssize_t len)
214 214 {
215 215 PyObject *sliced = PyBytes_FromStringAndSize(source, len);
216 216 if (sliced == NULL)
217 217 return false;
218 218 PyList_SET_ITEM(list, destidx, sliced);
219 219 return true;
220 220 }
221 221
222 222 static PyObject *splitnewlines(PyObject *self, PyObject *args)
223 223 {
224 224 const char *text;
225 225 Py_ssize_t nelts = 0, size, i, start = 0;
226 226 PyObject *result = NULL;
227 227
228 228 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &text, &size)) {
229 229 goto abort;
230 230 }
231 231 if (!size) {
232 232 return PyList_New(0);
233 233 }
234 234 /* This loops to size-1 because if the last byte is a newline,
235 235 * we don't want to perform a split there. */
236 236 for (i = 0; i < size - 1; ++i) {
237 237 if (text[i] == '\n') {
238 238 ++nelts;
239 239 }
240 240 }
241 241 if ((result = PyList_New(nelts + 1)) == NULL)
242 242 goto abort;
243 243 nelts = 0;
244 244 for (i = 0; i < size - 1; ++i) {
245 245 if (text[i] == '\n') {
246 246 if (!sliceintolist(result, nelts++, text + start,
247 247 i - start + 1))
248 248 goto abort;
249 249 start = i + 1;
250 250 }
251 251 }
252 252 if (!sliceintolist(result, nelts++, text + start, size - start))
253 253 goto abort;
254 254 return result;
255 255 abort:
256 256 Py_XDECREF(result);
257 257 return NULL;
258 258 }
259 259
260 static int hunk_consumer(long a1, long a2, long b1, long b2, void *priv)
260 static int hunk_consumer(int64_t a1, int64_t a2, int64_t b1, int64_t b2,
261 void *priv)
261 262 {
262 263 PyObject *rl = (PyObject *)priv;
263 264 PyObject *m = Py_BuildValue("llll", a1, a2, b1, b2);
264 265 if (!m)
265 266 return -1;
266 267 if (PyList_Append(rl, m) != 0) {
267 268 Py_DECREF(m);
268 269 return -1;
269 270 }
270 271 return 0;
271 272 }
272 273
273 274 static PyObject *xdiffblocks(PyObject *self, PyObject *args)
274 275 {
275 276 Py_ssize_t la, lb;
276 277 mmfile_t a, b;
277 278 PyObject *rl;
278 279
279 280 xpparam_t xpp = {
280 281 XDF_INDENT_HEURISTIC, /* flags */
281 282 };
282 283 xdemitconf_t xecfg = {
283 284 XDL_EMIT_BDIFFHUNK, /* flags */
284 285 hunk_consumer, /* hunk_consume_func */
285 286 };
286 287 xdemitcb_t ecb = {
287 288 NULL, /* priv */
288 289 };
289 290
290 291 if (!PyArg_ParseTuple(args, PY23("s#s#", "y#y#"), &a.ptr, &la, &b.ptr,
291 292 &lb))
292 293 return NULL;
293 294
294 295 a.size = la;
295 296 b.size = lb;
296 297
297 298 rl = PyList_New(0);
298 299 if (!rl)
299 300 return PyErr_NoMemory();
300 301
301 302 ecb.priv = rl;
302 303
303 304 if (xdl_diff(&a, &b, &xpp, &xecfg, &ecb) != 0) {
304 305 Py_DECREF(rl);
305 306 return PyErr_NoMemory();
306 307 }
307 308
308 309 return rl;
309 310 }
310 311
311 312 static char mdiff_doc[] = "Efficient binary diff.";
312 313
313 314 static PyMethodDef methods[] = {
314 315 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
315 316 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
316 317 {"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"},
317 318 {"splitnewlines", splitnewlines, METH_VARARGS,
318 319 "like str.splitlines, but only split on newlines\n"},
319 320 {"xdiffblocks", xdiffblocks, METH_VARARGS,
320 321 "find a list of matching lines using xdiff algorithm\n"},
321 322 {NULL, NULL},
322 323 };
323 324
324 325 static const int version = 3;
325 326
326 327 #ifdef IS_PY3K
327 328 static struct PyModuleDef bdiff_module = {
328 329 PyModuleDef_HEAD_INIT, "bdiff", mdiff_doc, -1, methods,
329 330 };
330 331
331 332 PyMODINIT_FUNC PyInit_bdiff(void)
332 333 {
333 334 PyObject *m;
334 335 m = PyModule_Create(&bdiff_module);
335 336 PyModule_AddIntConstant(m, "version", version);
336 337 return m;
337 338 }
338 339 #else
339 340 PyMODINIT_FUNC initbdiff(void)
340 341 {
341 342 PyObject *m;
342 343 m = Py_InitModule3("bdiff", methods, mdiff_doc);
343 344 PyModule_AddIntConstant(m, "version", version);
344 345 }
345 346 #endif
General Comments 0
You need to be logged in to leave comments. Login now