##// END OF EJS Templates
bdiff: include util.h...
Gregory Szorc -
r30170:15635d8b default
parent child Browse files
Show More
@@ -1,203 +1,204 b''
1 /*
1 /*
2 bdiff.c - efficient binary diff extension for Mercurial
2 bdiff.c - efficient binary diff extension for Mercurial
3
3
4 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8
8
9 Based roughly on Python difflib
9 Based roughly on Python difflib
10 */
10 */
11
11
12 #define PY_SSIZE_T_CLEAN
12 #define PY_SSIZE_T_CLEAN
13 #include <Python.h>
13 #include <Python.h>
14 #include <stdlib.h>
14 #include <stdlib.h>
15 #include <string.h>
15 #include <string.h>
16 #include <limits.h>
16 #include <limits.h>
17
17
18 #include "bdiff.h"
18 #include "bdiff.h"
19 #include "bitmanipulation.h"
19 #include "bitmanipulation.h"
20 #include "util.h"
20
21
21
22
22 static PyObject *blocks(PyObject *self, PyObject *args)
23 static PyObject *blocks(PyObject *self, PyObject *args)
23 {
24 {
24 PyObject *sa, *sb, *rl = NULL, *m;
25 PyObject *sa, *sb, *rl = NULL, *m;
25 struct bdiff_line *a, *b;
26 struct bdiff_line *a, *b;
26 struct bdiff_hunk l, *h;
27 struct bdiff_hunk l, *h;
27 int an, bn, count, pos = 0;
28 int an, bn, count, pos = 0;
28
29
29 l.next = NULL;
30 l.next = NULL;
30
31
31 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
32 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
32 return NULL;
33 return NULL;
33
34
34 an = bdiff_splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a);
35 an = bdiff_splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a);
35 bn = bdiff_splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b);
36 bn = bdiff_splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b);
36
37
37 if (!a || !b)
38 if (!a || !b)
38 goto nomem;
39 goto nomem;
39
40
40 count = bdiff_diff(a, an, b, bn, &l);
41 count = bdiff_diff(a, an, b, bn, &l);
41 if (count < 0)
42 if (count < 0)
42 goto nomem;
43 goto nomem;
43
44
44 rl = PyList_New(count);
45 rl = PyList_New(count);
45 if (!rl)
46 if (!rl)
46 goto nomem;
47 goto nomem;
47
48
48 for (h = l.next; h; h = h->next) {
49 for (h = l.next; h; h = h->next) {
49 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
50 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
50 PyList_SetItem(rl, pos, m);
51 PyList_SetItem(rl, pos, m);
51 pos++;
52 pos++;
52 }
53 }
53
54
54 nomem:
55 nomem:
55 free(a);
56 free(a);
56 free(b);
57 free(b);
57 bdiff_freehunks(l.next);
58 bdiff_freehunks(l.next);
58 return rl ? rl : PyErr_NoMemory();
59 return rl ? rl : PyErr_NoMemory();
59 }
60 }
60
61
61 static PyObject *bdiff(PyObject *self, PyObject *args)
62 static PyObject *bdiff(PyObject *self, PyObject *args)
62 {
63 {
63 char *sa, *sb, *rb;
64 char *sa, *sb, *rb;
64 PyObject *result = NULL;
65 PyObject *result = NULL;
65 struct bdiff_line *al, *bl;
66 struct bdiff_line *al, *bl;
66 struct bdiff_hunk l, *h;
67 struct bdiff_hunk l, *h;
67 int an, bn, count;
68 int an, bn, count;
68 Py_ssize_t len = 0, la, lb;
69 Py_ssize_t len = 0, la, lb;
69 PyThreadState *_save;
70 PyThreadState *_save;
70
71
71 l.next = NULL;
72 l.next = NULL;
72
73
73 if (!PyArg_ParseTuple(args, "s#s#:bdiff", &sa, &la, &sb, &lb))
74 if (!PyArg_ParseTuple(args, "s#s#:bdiff", &sa, &la, &sb, &lb))
74 return NULL;
75 return NULL;
75
76
76 if (la > UINT_MAX || lb > UINT_MAX) {
77 if (la > UINT_MAX || lb > UINT_MAX) {
77 PyErr_SetString(PyExc_ValueError, "bdiff inputs too large");
78 PyErr_SetString(PyExc_ValueError, "bdiff inputs too large");
78 return NULL;
79 return NULL;
79 }
80 }
80
81
81 _save = PyEval_SaveThread();
82 _save = PyEval_SaveThread();
82 an = bdiff_splitlines(sa, la, &al);
83 an = bdiff_splitlines(sa, la, &al);
83 bn = bdiff_splitlines(sb, lb, &bl);
84 bn = bdiff_splitlines(sb, lb, &bl);
84 if (!al || !bl)
85 if (!al || !bl)
85 goto nomem;
86 goto nomem;
86
87
87 count = bdiff_diff(al, an, bl, bn, &l);
88 count = bdiff_diff(al, an, bl, bn, &l);
88 if (count < 0)
89 if (count < 0)
89 goto nomem;
90 goto nomem;
90
91
91 /* calculate length of output */
92 /* calculate length of output */
92 la = lb = 0;
93 la = lb = 0;
93 for (h = l.next; h; h = h->next) {
94 for (h = l.next; h; h = h->next) {
94 if (h->a1 != la || h->b1 != lb)
95 if (h->a1 != la || h->b1 != lb)
95 len += 12 + bl[h->b1].l - bl[lb].l;
96 len += 12 + bl[h->b1].l - bl[lb].l;
96 la = h->a2;
97 la = h->a2;
97 lb = h->b2;
98 lb = h->b2;
98 }
99 }
99 PyEval_RestoreThread(_save);
100 PyEval_RestoreThread(_save);
100 _save = NULL;
101 _save = NULL;
101
102
102 result = PyBytes_FromStringAndSize(NULL, len);
103 result = PyBytes_FromStringAndSize(NULL, len);
103
104
104 if (!result)
105 if (!result)
105 goto nomem;
106 goto nomem;
106
107
107 /* build binary patch */
108 /* build binary patch */
108 rb = PyBytes_AsString(result);
109 rb = PyBytes_AsString(result);
109 la = lb = 0;
110 la = lb = 0;
110
111
111 for (h = l.next; h; h = h->next) {
112 for (h = l.next; h; h = h->next) {
112 if (h->a1 != la || h->b1 != lb) {
113 if (h->a1 != la || h->b1 != lb) {
113 len = bl[h->b1].l - bl[lb].l;
114 len = bl[h->b1].l - bl[lb].l;
114 putbe32((uint32_t)(al[la].l - al->l), rb);
115 putbe32((uint32_t)(al[la].l - al->l), rb);
115 putbe32((uint32_t)(al[h->a1].l - al->l), rb + 4);
116 putbe32((uint32_t)(al[h->a1].l - al->l), rb + 4);
116 putbe32((uint32_t)len, rb + 8);
117 putbe32((uint32_t)len, rb + 8);
117 memcpy(rb + 12, bl[lb].l, len);
118 memcpy(rb + 12, bl[lb].l, len);
118 rb += 12 + len;
119 rb += 12 + len;
119 }
120 }
120 la = h->a2;
121 la = h->a2;
121 lb = h->b2;
122 lb = h->b2;
122 }
123 }
123
124
124 nomem:
125 nomem:
125 if (_save)
126 if (_save)
126 PyEval_RestoreThread(_save);
127 PyEval_RestoreThread(_save);
127 free(al);
128 free(al);
128 free(bl);
129 free(bl);
129 bdiff_freehunks(l.next);
130 bdiff_freehunks(l.next);
130 return result ? result : PyErr_NoMemory();
131 return result ? result : PyErr_NoMemory();
131 }
132 }
132
133
133 /*
134 /*
134 * If allws != 0, remove all whitespace (' ', \t and \r). Otherwise,
135 * If allws != 0, remove all whitespace (' ', \t and \r). Otherwise,
135 * reduce whitespace sequences to a single space and trim remaining whitespace
136 * reduce whitespace sequences to a single space and trim remaining whitespace
136 * from end of lines.
137 * from end of lines.
137 */
138 */
138 static PyObject *fixws(PyObject *self, PyObject *args)
139 static PyObject *fixws(PyObject *self, PyObject *args)
139 {
140 {
140 PyObject *s, *result = NULL;
141 PyObject *s, *result = NULL;
141 char allws, c;
142 char allws, c;
142 const char *r;
143 const char *r;
143 Py_ssize_t i, rlen, wlen = 0;
144 Py_ssize_t i, rlen, wlen = 0;
144 char *w;
145 char *w;
145
146
146 if (!PyArg_ParseTuple(args, "Sb:fixws", &s, &allws))
147 if (!PyArg_ParseTuple(args, "Sb:fixws", &s, &allws))
147 return NULL;
148 return NULL;
148 r = PyBytes_AsString(s);
149 r = PyBytes_AsString(s);
149 rlen = PyBytes_Size(s);
150 rlen = PyBytes_Size(s);
150
151
151 w = (char *)malloc(rlen ? rlen : 1);
152 w = (char *)malloc(rlen ? rlen : 1);
152 if (!w)
153 if (!w)
153 goto nomem;
154 goto nomem;
154
155
155 for (i = 0; i != rlen; i++) {
156 for (i = 0; i != rlen; i++) {
156 c = r[i];
157 c = r[i];
157 if (c == ' ' || c == '\t' || c == '\r') {
158 if (c == ' ' || c == '\t' || c == '\r') {
158 if (!allws && (wlen == 0 || w[wlen - 1] != ' '))
159 if (!allws && (wlen == 0 || w[wlen - 1] != ' '))
159 w[wlen++] = ' ';
160 w[wlen++] = ' ';
160 } else if (c == '\n' && !allws
161 } else if (c == '\n' && !allws
161 && wlen > 0 && w[wlen - 1] == ' ') {
162 && wlen > 0 && w[wlen - 1] == ' ') {
162 w[wlen - 1] = '\n';
163 w[wlen - 1] = '\n';
163 } else {
164 } else {
164 w[wlen++] = c;
165 w[wlen++] = c;
165 }
166 }
166 }
167 }
167
168
168 result = PyBytes_FromStringAndSize(w, wlen);
169 result = PyBytes_FromStringAndSize(w, wlen);
169
170
170 nomem:
171 nomem:
171 free(w);
172 free(w);
172 return result ? result : PyErr_NoMemory();
173 return result ? result : PyErr_NoMemory();
173 }
174 }
174
175
175
176
176 static char mdiff_doc[] = "Efficient binary diff.";
177 static char mdiff_doc[] = "Efficient binary diff.";
177
178
178 static PyMethodDef methods[] = {
179 static PyMethodDef methods[] = {
179 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
180 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
180 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
181 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
181 {"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"},
182 {"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"},
182 {NULL, NULL}
183 {NULL, NULL}
183 };
184 };
184
185
185 #ifdef IS_PY3K
186 #ifdef IS_PY3K
186 static struct PyModuleDef bdiff_module = {
187 static struct PyModuleDef bdiff_module = {
187 PyModuleDef_HEAD_INIT,
188 PyModuleDef_HEAD_INIT,
188 "bdiff",
189 "bdiff",
189 mdiff_doc,
190 mdiff_doc,
190 -1,
191 -1,
191 methods
192 methods
192 };
193 };
193
194
194 PyMODINIT_FUNC PyInit_bdiff(void)
195 PyMODINIT_FUNC PyInit_bdiff(void)
195 {
196 {
196 return PyModule_Create(&bdiff_module);
197 return PyModule_Create(&bdiff_module);
197 }
198 }
198 #else
199 #else
199 PyMODINIT_FUNC initbdiff(void)
200 PyMODINIT_FUNC initbdiff(void)
200 {
201 {
201 Py_InitModule3("bdiff", methods, mdiff_doc);
202 Py_InitModule3("bdiff", methods, mdiff_doc);
202 }
203 }
203 #endif
204 #endif
General Comments 0
You need to be logged in to leave comments. Login now