##// END OF EJS Templates
bdiff: add version to help detect breaking binary changes...
Jun Wu -
r32355:4195b849 default
parent child Browse files
Show More
@@ -1,213 +1,220 b''
1 /*
1 /*
2 bdiff.c - efficient binary diff extension for Mercurial
2 bdiff.c - efficient binary diff extension for Mercurial
3
3
4 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8
8
9 Based roughly on Python difflib
9 Based roughly on Python difflib
10 */
10 */
11
11
12 #define PY_SSIZE_T_CLEAN
12 #define PY_SSIZE_T_CLEAN
13 #include <Python.h>
13 #include <Python.h>
14 #include <stdlib.h>
14 #include <stdlib.h>
15 #include <string.h>
15 #include <string.h>
16 #include <limits.h>
16 #include <limits.h>
17
17
18 #include "bdiff.h"
18 #include "bdiff.h"
19 #include "bitmanipulation.h"
19 #include "bitmanipulation.h"
20 #include "util.h"
20 #include "util.h"
21
21
22
22
23 static PyObject *blocks(PyObject *self, PyObject *args)
23 static PyObject *blocks(PyObject *self, PyObject *args)
24 {
24 {
25 PyObject *sa, *sb, *rl = NULL, *m;
25 PyObject *sa, *sb, *rl = NULL, *m;
26 struct bdiff_line *a, *b;
26 struct bdiff_line *a, *b;
27 struct bdiff_hunk l, *h;
27 struct bdiff_hunk l, *h;
28 int an, bn, count, pos = 0;
28 int an, bn, count, pos = 0;
29
29
30 l.next = NULL;
30 l.next = NULL;
31
31
32 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
32 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
33 return NULL;
33 return NULL;
34
34
35 an = bdiff_splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a);
35 an = bdiff_splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a);
36 bn = bdiff_splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b);
36 bn = bdiff_splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b);
37
37
38 if (!a || !b)
38 if (!a || !b)
39 goto nomem;
39 goto nomem;
40
40
41 count = bdiff_diff(a, an, b, bn, &l);
41 count = bdiff_diff(a, an, b, bn, &l);
42 if (count < 0)
42 if (count < 0)
43 goto nomem;
43 goto nomem;
44
44
45 rl = PyList_New(count);
45 rl = PyList_New(count);
46 if (!rl)
46 if (!rl)
47 goto nomem;
47 goto nomem;
48
48
49 for (h = l.next; h; h = h->next) {
49 for (h = l.next; h; h = h->next) {
50 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
50 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
51 PyList_SetItem(rl, pos, m);
51 PyList_SetItem(rl, pos, m);
52 pos++;
52 pos++;
53 }
53 }
54
54
55 nomem:
55 nomem:
56 free(a);
56 free(a);
57 free(b);
57 free(b);
58 bdiff_freehunks(l.next);
58 bdiff_freehunks(l.next);
59 return rl ? rl : PyErr_NoMemory();
59 return rl ? rl : PyErr_NoMemory();
60 }
60 }
61
61
62 static PyObject *bdiff(PyObject *self, PyObject *args)
62 static PyObject *bdiff(PyObject *self, PyObject *args)
63 {
63 {
64 char *sa, *sb, *rb, *ia, *ib;
64 char *sa, *sb, *rb, *ia, *ib;
65 PyObject *result = NULL;
65 PyObject *result = NULL;
66 struct bdiff_line *al, *bl;
66 struct bdiff_line *al, *bl;
67 struct bdiff_hunk l, *h;
67 struct bdiff_hunk l, *h;
68 int an, bn, count;
68 int an, bn, count;
69 Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lmax;
69 Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lmax;
70 PyThreadState *_save;
70 PyThreadState *_save;
71
71
72 l.next = NULL;
72 l.next = NULL;
73
73
74 if (!PyArg_ParseTuple(args, "s#s#:bdiff", &sa, &la, &sb, &lb))
74 if (!PyArg_ParseTuple(args, "s#s#:bdiff", &sa, &la, &sb, &lb))
75 return NULL;
75 return NULL;
76
76
77 if (la > UINT_MAX || lb > UINT_MAX) {
77 if (la > UINT_MAX || lb > UINT_MAX) {
78 PyErr_SetString(PyExc_ValueError, "bdiff inputs too large");
78 PyErr_SetString(PyExc_ValueError, "bdiff inputs too large");
79 return NULL;
79 return NULL;
80 }
80 }
81
81
82 _save = PyEval_SaveThread();
82 _save = PyEval_SaveThread();
83
83
84 lmax = la > lb ? lb : la;
84 lmax = la > lb ? lb : la;
85 for (ia = sa, ib = sb;
85 for (ia = sa, ib = sb;
86 li < lmax && *ia == *ib;
86 li < lmax && *ia == *ib;
87 ++li, ++ia, ++ib)
87 ++li, ++ia, ++ib)
88 if (*ia == '\n')
88 if (*ia == '\n')
89 lcommon = li + 1;
89 lcommon = li + 1;
90 /* we can almost add: if (li == lmax) lcommon = li; */
90 /* we can almost add: if (li == lmax) lcommon = li; */
91
91
92 an = bdiff_splitlines(sa + lcommon, la - lcommon, &al);
92 an = bdiff_splitlines(sa + lcommon, la - lcommon, &al);
93 bn = bdiff_splitlines(sb + lcommon, lb - lcommon, &bl);
93 bn = bdiff_splitlines(sb + lcommon, lb - lcommon, &bl);
94 if (!al || !bl)
94 if (!al || !bl)
95 goto nomem;
95 goto nomem;
96
96
97 count = bdiff_diff(al, an, bl, bn, &l);
97 count = bdiff_diff(al, an, bl, bn, &l);
98 if (count < 0)
98 if (count < 0)
99 goto nomem;
99 goto nomem;
100
100
101 /* calculate length of output */
101 /* calculate length of output */
102 la = lb = 0;
102 la = lb = 0;
103 for (h = l.next; h; h = h->next) {
103 for (h = l.next; h; h = h->next) {
104 if (h->a1 != la || h->b1 != lb)
104 if (h->a1 != la || h->b1 != lb)
105 len += 12 + bl[h->b1].l - bl[lb].l;
105 len += 12 + bl[h->b1].l - bl[lb].l;
106 la = h->a2;
106 la = h->a2;
107 lb = h->b2;
107 lb = h->b2;
108 }
108 }
109 PyEval_RestoreThread(_save);
109 PyEval_RestoreThread(_save);
110 _save = NULL;
110 _save = NULL;
111
111
112 result = PyBytes_FromStringAndSize(NULL, len);
112 result = PyBytes_FromStringAndSize(NULL, len);
113
113
114 if (!result)
114 if (!result)
115 goto nomem;
115 goto nomem;
116
116
117 /* build binary patch */
117 /* build binary patch */
118 rb = PyBytes_AsString(result);
118 rb = PyBytes_AsString(result);
119 la = lb = 0;
119 la = lb = 0;
120
120
121 for (h = l.next; h; h = h->next) {
121 for (h = l.next; h; h = h->next) {
122 if (h->a1 != la || h->b1 != lb) {
122 if (h->a1 != la || h->b1 != lb) {
123 len = bl[h->b1].l - bl[lb].l;
123 len = bl[h->b1].l - bl[lb].l;
124 putbe32((uint32_t)(al[la].l + lcommon - al->l), rb);
124 putbe32((uint32_t)(al[la].l + lcommon - al->l), rb);
125 putbe32((uint32_t)(al[h->a1].l + lcommon - al->l), rb + 4);
125 putbe32((uint32_t)(al[h->a1].l + lcommon - al->l), rb + 4);
126 putbe32((uint32_t)len, rb + 8);
126 putbe32((uint32_t)len, rb + 8);
127 memcpy(rb + 12, bl[lb].l, len);
127 memcpy(rb + 12, bl[lb].l, len);
128 rb += 12 + len;
128 rb += 12 + len;
129 }
129 }
130 la = h->a2;
130 la = h->a2;
131 lb = h->b2;
131 lb = h->b2;
132 }
132 }
133
133
134 nomem:
134 nomem:
135 if (_save)
135 if (_save)
136 PyEval_RestoreThread(_save);
136 PyEval_RestoreThread(_save);
137 free(al);
137 free(al);
138 free(bl);
138 free(bl);
139 bdiff_freehunks(l.next);
139 bdiff_freehunks(l.next);
140 return result ? result : PyErr_NoMemory();
140 return result ? result : PyErr_NoMemory();
141 }
141 }
142
142
143 /*
143 /*
144 * If allws != 0, remove all whitespace (' ', \t and \r). Otherwise,
144 * If allws != 0, remove all whitespace (' ', \t and \r). Otherwise,
145 * reduce whitespace sequences to a single space and trim remaining whitespace
145 * reduce whitespace sequences to a single space and trim remaining whitespace
146 * from end of lines.
146 * from end of lines.
147 */
147 */
148 static PyObject *fixws(PyObject *self, PyObject *args)
148 static PyObject *fixws(PyObject *self, PyObject *args)
149 {
149 {
150 PyObject *s, *result = NULL;
150 PyObject *s, *result = NULL;
151 char allws, c;
151 char allws, c;
152 const char *r;
152 const char *r;
153 Py_ssize_t i, rlen, wlen = 0;
153 Py_ssize_t i, rlen, wlen = 0;
154 char *w;
154 char *w;
155
155
156 if (!PyArg_ParseTuple(args, "Sb:fixws", &s, &allws))
156 if (!PyArg_ParseTuple(args, "Sb:fixws", &s, &allws))
157 return NULL;
157 return NULL;
158 r = PyBytes_AsString(s);
158 r = PyBytes_AsString(s);
159 rlen = PyBytes_Size(s);
159 rlen = PyBytes_Size(s);
160
160
161 w = (char *)PyMem_Malloc(rlen ? rlen : 1);
161 w = (char *)PyMem_Malloc(rlen ? rlen : 1);
162 if (!w)
162 if (!w)
163 goto nomem;
163 goto nomem;
164
164
165 for (i = 0; i != rlen; i++) {
165 for (i = 0; i != rlen; i++) {
166 c = r[i];
166 c = r[i];
167 if (c == ' ' || c == '\t' || c == '\r') {
167 if (c == ' ' || c == '\t' || c == '\r') {
168 if (!allws && (wlen == 0 || w[wlen - 1] != ' '))
168 if (!allws && (wlen == 0 || w[wlen - 1] != ' '))
169 w[wlen++] = ' ';
169 w[wlen++] = ' ';
170 } else if (c == '\n' && !allws
170 } else if (c == '\n' && !allws
171 && wlen > 0 && w[wlen - 1] == ' ') {
171 && wlen > 0 && w[wlen - 1] == ' ') {
172 w[wlen - 1] = '\n';
172 w[wlen - 1] = '\n';
173 } else {
173 } else {
174 w[wlen++] = c;
174 w[wlen++] = c;
175 }
175 }
176 }
176 }
177
177
178 result = PyBytes_FromStringAndSize(w, wlen);
178 result = PyBytes_FromStringAndSize(w, wlen);
179
179
180 nomem:
180 nomem:
181 PyMem_Free(w);
181 PyMem_Free(w);
182 return result ? result : PyErr_NoMemory();
182 return result ? result : PyErr_NoMemory();
183 }
183 }
184
184
185
185
186 static char mdiff_doc[] = "Efficient binary diff.";
186 static char mdiff_doc[] = "Efficient binary diff.";
187
187
188 static PyMethodDef methods[] = {
188 static PyMethodDef methods[] = {
189 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
189 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
190 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
190 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
191 {"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"},
191 {"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"},
192 {NULL, NULL}
192 {NULL, NULL}
193 };
193 };
194
194
195 static const int version = 1;
196
195 #ifdef IS_PY3K
197 #ifdef IS_PY3K
196 static struct PyModuleDef bdiff_module = {
198 static struct PyModuleDef bdiff_module = {
197 PyModuleDef_HEAD_INIT,
199 PyModuleDef_HEAD_INIT,
198 "bdiff",
200 "bdiff",
199 mdiff_doc,
201 mdiff_doc,
200 -1,
202 -1,
201 methods
203 methods
202 };
204 };
203
205
204 PyMODINIT_FUNC PyInit_bdiff(void)
206 PyMODINIT_FUNC PyInit_bdiff(void)
205 {
207 {
206 return PyModule_Create(&bdiff_module);
208 PyObject *m;
209 m = PyModule_Create(&bdiff_module);
210 PyModule_AddIntConstant(m, "version", version);
211 return m;
207 }
212 }
208 #else
213 #else
209 PyMODINIT_FUNC initbdiff(void)
214 PyMODINIT_FUNC initbdiff(void)
210 {
215 {
211 Py_InitModule3("bdiff", methods, mdiff_doc);
216 PyObject *m;
217 m = Py_InitModule3("bdiff", methods, mdiff_doc);
218 PyModule_AddIntConstant(m, "version", version);
212 }
219 }
213 #endif
220 #endif
General Comments 0
You need to be logged in to leave comments. Login now