##// END OF EJS Templates
bdiff: use Python memory allocator in fixws...
Gregory Szorc -
r31467:08ecec29 default
parent child Browse files
Show More
@@ -1,213 +1,213
1 1 /*
2 2 bdiff.c - efficient binary diff extension for Mercurial
3 3
4 4 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8
9 9 Based roughly on Python difflib
10 10 */
11 11
12 12 #define PY_SSIZE_T_CLEAN
13 13 #include <Python.h>
14 14 #include <stdlib.h>
15 15 #include <string.h>
16 16 #include <limits.h>
17 17
18 18 #include "bdiff.h"
19 19 #include "bitmanipulation.h"
20 20 #include "util.h"
21 21
22 22
23 23 static PyObject *blocks(PyObject *self, PyObject *args)
24 24 {
25 25 PyObject *sa, *sb, *rl = NULL, *m;
26 26 struct bdiff_line *a, *b;
27 27 struct bdiff_hunk l, *h;
28 28 int an, bn, count, pos = 0;
29 29
30 30 l.next = NULL;
31 31
32 32 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
33 33 return NULL;
34 34
35 35 an = bdiff_splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a);
36 36 bn = bdiff_splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b);
37 37
38 38 if (!a || !b)
39 39 goto nomem;
40 40
41 41 count = bdiff_diff(a, an, b, bn, &l);
42 42 if (count < 0)
43 43 goto nomem;
44 44
45 45 rl = PyList_New(count);
46 46 if (!rl)
47 47 goto nomem;
48 48
49 49 for (h = l.next; h; h = h->next) {
50 50 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
51 51 PyList_SetItem(rl, pos, m);
52 52 pos++;
53 53 }
54 54
55 55 nomem:
56 56 free(a);
57 57 free(b);
58 58 bdiff_freehunks(l.next);
59 59 return rl ? rl : PyErr_NoMemory();
60 60 }
61 61
62 62 static PyObject *bdiff(PyObject *self, PyObject *args)
63 63 {
64 64 char *sa, *sb, *rb, *ia, *ib;
65 65 PyObject *result = NULL;
66 66 struct bdiff_line *al, *bl;
67 67 struct bdiff_hunk l, *h;
68 68 int an, bn, count;
69 69 Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lmax;
70 70 PyThreadState *_save;
71 71
72 72 l.next = NULL;
73 73
74 74 if (!PyArg_ParseTuple(args, "s#s#:bdiff", &sa, &la, &sb, &lb))
75 75 return NULL;
76 76
77 77 if (la > UINT_MAX || lb > UINT_MAX) {
78 78 PyErr_SetString(PyExc_ValueError, "bdiff inputs too large");
79 79 return NULL;
80 80 }
81 81
82 82 _save = PyEval_SaveThread();
83 83
84 84 lmax = la > lb ? lb : la;
85 85 for (ia = sa, ib = sb;
86 86 li < lmax && *ia == *ib;
87 87 ++li, ++ia, ++ib)
88 88 if (*ia == '\n')
89 89 lcommon = li + 1;
90 90 /* we can almost add: if (li == lmax) lcommon = li; */
91 91
92 92 an = bdiff_splitlines(sa + lcommon, la - lcommon, &al);
93 93 bn = bdiff_splitlines(sb + lcommon, lb - lcommon, &bl);
94 94 if (!al || !bl)
95 95 goto nomem;
96 96
97 97 count = bdiff_diff(al, an, bl, bn, &l);
98 98 if (count < 0)
99 99 goto nomem;
100 100
101 101 /* calculate length of output */
102 102 la = lb = 0;
103 103 for (h = l.next; h; h = h->next) {
104 104 if (h->a1 != la || h->b1 != lb)
105 105 len += 12 + bl[h->b1].l - bl[lb].l;
106 106 la = h->a2;
107 107 lb = h->b2;
108 108 }
109 109 PyEval_RestoreThread(_save);
110 110 _save = NULL;
111 111
112 112 result = PyBytes_FromStringAndSize(NULL, len);
113 113
114 114 if (!result)
115 115 goto nomem;
116 116
117 117 /* build binary patch */
118 118 rb = PyBytes_AsString(result);
119 119 la = lb = 0;
120 120
121 121 for (h = l.next; h; h = h->next) {
122 122 if (h->a1 != la || h->b1 != lb) {
123 123 len = bl[h->b1].l - bl[lb].l;
124 124 putbe32((uint32_t)(al[la].l + lcommon - al->l), rb);
125 125 putbe32((uint32_t)(al[h->a1].l + lcommon - al->l), rb + 4);
126 126 putbe32((uint32_t)len, rb + 8);
127 127 memcpy(rb + 12, bl[lb].l, len);
128 128 rb += 12 + len;
129 129 }
130 130 la = h->a2;
131 131 lb = h->b2;
132 132 }
133 133
134 134 nomem:
135 135 if (_save)
136 136 PyEval_RestoreThread(_save);
137 137 free(al);
138 138 free(bl);
139 139 bdiff_freehunks(l.next);
140 140 return result ? result : PyErr_NoMemory();
141 141 }
142 142
143 143 /*
144 144 * If allws != 0, remove all whitespace (' ', \t and \r). Otherwise,
145 145 * reduce whitespace sequences to a single space and trim remaining whitespace
146 146 * from end of lines.
147 147 */
148 148 static PyObject *fixws(PyObject *self, PyObject *args)
149 149 {
150 150 PyObject *s, *result = NULL;
151 151 char allws, c;
152 152 const char *r;
153 153 Py_ssize_t i, rlen, wlen = 0;
154 154 char *w;
155 155
156 156 if (!PyArg_ParseTuple(args, "Sb:fixws", &s, &allws))
157 157 return NULL;
158 158 r = PyBytes_AsString(s);
159 159 rlen = PyBytes_Size(s);
160 160
161 w = (char *)malloc(rlen ? rlen : 1);
161 w = (char *)PyMem_Malloc(rlen ? rlen : 1);
162 162 if (!w)
163 163 goto nomem;
164 164
165 165 for (i = 0; i != rlen; i++) {
166 166 c = r[i];
167 167 if (c == ' ' || c == '\t' || c == '\r') {
168 168 if (!allws && (wlen == 0 || w[wlen - 1] != ' '))
169 169 w[wlen++] = ' ';
170 170 } else if (c == '\n' && !allws
171 171 && wlen > 0 && w[wlen - 1] == ' ') {
172 172 w[wlen - 1] = '\n';
173 173 } else {
174 174 w[wlen++] = c;
175 175 }
176 176 }
177 177
178 178 result = PyBytes_FromStringAndSize(w, wlen);
179 179
180 180 nomem:
181 free(w);
181 PyMem_Free(w);
182 182 return result ? result : PyErr_NoMemory();
183 183 }
184 184
185 185
186 186 static char mdiff_doc[] = "Efficient binary diff.";
187 187
188 188 static PyMethodDef methods[] = {
189 189 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
190 190 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
191 191 {"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"},
192 192 {NULL, NULL}
193 193 };
194 194
195 195 #ifdef IS_PY3K
196 196 static struct PyModuleDef bdiff_module = {
197 197 PyModuleDef_HEAD_INIT,
198 198 "bdiff",
199 199 mdiff_doc,
200 200 -1,
201 201 methods
202 202 };
203 203
204 204 PyMODINIT_FUNC PyInit_bdiff(void)
205 205 {
206 206 return PyModule_Create(&bdiff_module);
207 207 }
208 208 #else
209 209 PyMODINIT_FUNC initbdiff(void)
210 210 {
211 211 Py_InitModule3("bdiff", methods, mdiff_doc);
212 212 }
213 213 #endif
General Comments 0
You need to be logged in to leave comments. Login now