##// END OF EJS Templates
xdiff: remove xemit related logic...
Jun Wu -
r36781:90f8fe72 default
parent child Browse files
Show More
@@ -1,350 +1,345 b''
1 /*
1 /*
2 bdiff.c - efficient binary diff extension for Mercurial
2 bdiff.c - efficient binary diff extension for Mercurial
3
3
4 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8
8
9 Based roughly on Python difflib
9 Based roughly on Python difflib
10 */
10 */
11
11
12 #define PY_SSIZE_T_CLEAN
12 #define PY_SSIZE_T_CLEAN
13 #include <Python.h>
13 #include <Python.h>
14 #include <limits.h>
14 #include <limits.h>
15 #include <stdlib.h>
15 #include <stdlib.h>
16 #include <string.h>
16 #include <string.h>
17
17
18 #include "bdiff.h"
18 #include "bdiff.h"
19 #include "bitmanipulation.h"
19 #include "bitmanipulation.h"
20 #include "thirdparty/xdiff/xdiff.h"
20 #include "thirdparty/xdiff/xdiff.h"
21 #include "util.h"
21 #include "util.h"
22
22
23 static PyObject *blocks(PyObject *self, PyObject *args)
23 static PyObject *blocks(PyObject *self, PyObject *args)
24 {
24 {
25 PyObject *sa, *sb, *rl = NULL, *m;
25 PyObject *sa, *sb, *rl = NULL, *m;
26 struct bdiff_line *a, *b;
26 struct bdiff_line *a, *b;
27 struct bdiff_hunk l, *h;
27 struct bdiff_hunk l, *h;
28 int an, bn, count, pos = 0;
28 int an, bn, count, pos = 0;
29
29
30 l.next = NULL;
30 l.next = NULL;
31
31
32 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
32 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
33 return NULL;
33 return NULL;
34
34
35 an = bdiff_splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a);
35 an = bdiff_splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a);
36 bn = bdiff_splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b);
36 bn = bdiff_splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b);
37
37
38 if (!a || !b)
38 if (!a || !b)
39 goto nomem;
39 goto nomem;
40
40
41 count = bdiff_diff(a, an, b, bn, &l);
41 count = bdiff_diff(a, an, b, bn, &l);
42 if (count < 0)
42 if (count < 0)
43 goto nomem;
43 goto nomem;
44
44
45 rl = PyList_New(count);
45 rl = PyList_New(count);
46 if (!rl)
46 if (!rl)
47 goto nomem;
47 goto nomem;
48
48
49 for (h = l.next; h; h = h->next) {
49 for (h = l.next; h; h = h->next) {
50 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
50 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
51 PyList_SetItem(rl, pos, m);
51 PyList_SetItem(rl, pos, m);
52 pos++;
52 pos++;
53 }
53 }
54
54
55 nomem:
55 nomem:
56 free(a);
56 free(a);
57 free(b);
57 free(b);
58 bdiff_freehunks(l.next);
58 bdiff_freehunks(l.next);
59 return rl ? rl : PyErr_NoMemory();
59 return rl ? rl : PyErr_NoMemory();
60 }
60 }
61
61
62 static PyObject *bdiff(PyObject *self, PyObject *args)
62 static PyObject *bdiff(PyObject *self, PyObject *args)
63 {
63 {
64 Py_buffer ba, bb;
64 Py_buffer ba, bb;
65 char *rb, *ia, *ib;
65 char *rb, *ia, *ib;
66 PyObject *result = NULL;
66 PyObject *result = NULL;
67 struct bdiff_line *al = NULL, *bl = NULL;
67 struct bdiff_line *al = NULL, *bl = NULL;
68 struct bdiff_hunk l, *h;
68 struct bdiff_hunk l, *h;
69 int an, bn, count;
69 int an, bn, count;
70 Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lmax;
70 Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lmax;
71 PyThreadState *_save = NULL;
71 PyThreadState *_save = NULL;
72
72
73 l.next = NULL;
73 l.next = NULL;
74
74
75 if (!PyArg_ParseTuple(args, PY23("s*s*:bdiff", "y*y*:bdiff"), &ba, &bb))
75 if (!PyArg_ParseTuple(args, PY23("s*s*:bdiff", "y*y*:bdiff"), &ba, &bb))
76 return NULL;
76 return NULL;
77
77
78 if (!PyBuffer_IsContiguous(&ba, 'C') || ba.ndim > 1) {
78 if (!PyBuffer_IsContiguous(&ba, 'C') || ba.ndim > 1) {
79 PyErr_SetString(PyExc_ValueError, "bdiff input not contiguous");
79 PyErr_SetString(PyExc_ValueError, "bdiff input not contiguous");
80 goto cleanup;
80 goto cleanup;
81 }
81 }
82
82
83 if (!PyBuffer_IsContiguous(&bb, 'C') || bb.ndim > 1) {
83 if (!PyBuffer_IsContiguous(&bb, 'C') || bb.ndim > 1) {
84 PyErr_SetString(PyExc_ValueError, "bdiff input not contiguous");
84 PyErr_SetString(PyExc_ValueError, "bdiff input not contiguous");
85 goto cleanup;
85 goto cleanup;
86 }
86 }
87
87
88 la = ba.len;
88 la = ba.len;
89 lb = bb.len;
89 lb = bb.len;
90
90
91 if (la > UINT_MAX || lb > UINT_MAX) {
91 if (la > UINT_MAX || lb > UINT_MAX) {
92 PyErr_SetString(PyExc_ValueError, "bdiff inputs too large");
92 PyErr_SetString(PyExc_ValueError, "bdiff inputs too large");
93 goto cleanup;
93 goto cleanup;
94 }
94 }
95
95
96 _save = PyEval_SaveThread();
96 _save = PyEval_SaveThread();
97
97
98 lmax = la > lb ? lb : la;
98 lmax = la > lb ? lb : la;
99 for (ia = ba.buf, ib = bb.buf; li < lmax && *ia == *ib;
99 for (ia = ba.buf, ib = bb.buf; li < lmax && *ia == *ib;
100 ++li, ++ia, ++ib) {
100 ++li, ++ia, ++ib) {
101 if (*ia == '\n')
101 if (*ia == '\n')
102 lcommon = li + 1;
102 lcommon = li + 1;
103 }
103 }
104 /* we can almost add: if (li == lmax) lcommon = li; */
104 /* we can almost add: if (li == lmax) lcommon = li; */
105
105
106 an = bdiff_splitlines((char *)ba.buf + lcommon, la - lcommon, &al);
106 an = bdiff_splitlines((char *)ba.buf + lcommon, la - lcommon, &al);
107 bn = bdiff_splitlines((char *)bb.buf + lcommon, lb - lcommon, &bl);
107 bn = bdiff_splitlines((char *)bb.buf + lcommon, lb - lcommon, &bl);
108 if (!al || !bl) {
108 if (!al || !bl) {
109 PyErr_NoMemory();
109 PyErr_NoMemory();
110 goto cleanup;
110 goto cleanup;
111 }
111 }
112
112
113 count = bdiff_diff(al, an, bl, bn, &l);
113 count = bdiff_diff(al, an, bl, bn, &l);
114 if (count < 0) {
114 if (count < 0) {
115 PyErr_NoMemory();
115 PyErr_NoMemory();
116 goto cleanup;
116 goto cleanup;
117 }
117 }
118
118
119 /* calculate length of output */
119 /* calculate length of output */
120 la = lb = 0;
120 la = lb = 0;
121 for (h = l.next; h; h = h->next) {
121 for (h = l.next; h; h = h->next) {
122 if (h->a1 != la || h->b1 != lb)
122 if (h->a1 != la || h->b1 != lb)
123 len += 12 + bl[h->b1].l - bl[lb].l;
123 len += 12 + bl[h->b1].l - bl[lb].l;
124 la = h->a2;
124 la = h->a2;
125 lb = h->b2;
125 lb = h->b2;
126 }
126 }
127 PyEval_RestoreThread(_save);
127 PyEval_RestoreThread(_save);
128 _save = NULL;
128 _save = NULL;
129
129
130 result = PyBytes_FromStringAndSize(NULL, len);
130 result = PyBytes_FromStringAndSize(NULL, len);
131
131
132 if (!result)
132 if (!result)
133 goto cleanup;
133 goto cleanup;
134
134
135 /* build binary patch */
135 /* build binary patch */
136 rb = PyBytes_AsString(result);
136 rb = PyBytes_AsString(result);
137 la = lb = 0;
137 la = lb = 0;
138
138
139 for (h = l.next; h; h = h->next) {
139 for (h = l.next; h; h = h->next) {
140 if (h->a1 != la || h->b1 != lb) {
140 if (h->a1 != la || h->b1 != lb) {
141 len = bl[h->b1].l - bl[lb].l;
141 len = bl[h->b1].l - bl[lb].l;
142 putbe32((uint32_t)(al[la].l + lcommon - al->l), rb);
142 putbe32((uint32_t)(al[la].l + lcommon - al->l), rb);
143 putbe32((uint32_t)(al[h->a1].l + lcommon - al->l),
143 putbe32((uint32_t)(al[h->a1].l + lcommon - al->l),
144 rb + 4);
144 rb + 4);
145 putbe32((uint32_t)len, rb + 8);
145 putbe32((uint32_t)len, rb + 8);
146 memcpy(rb + 12, bl[lb].l, len);
146 memcpy(rb + 12, bl[lb].l, len);
147 rb += 12 + len;
147 rb += 12 + len;
148 }
148 }
149 la = h->a2;
149 la = h->a2;
150 lb = h->b2;
150 lb = h->b2;
151 }
151 }
152
152
153 cleanup:
153 cleanup:
154 if (_save)
154 if (_save)
155 PyEval_RestoreThread(_save);
155 PyEval_RestoreThread(_save);
156 PyBuffer_Release(&ba);
156 PyBuffer_Release(&ba);
157 PyBuffer_Release(&bb);
157 PyBuffer_Release(&bb);
158 if (al) {
158 if (al) {
159 free(al);
159 free(al);
160 }
160 }
161 if (bl) {
161 if (bl) {
162 free(bl);
162 free(bl);
163 }
163 }
164 if (l.next) {
164 if (l.next) {
165 bdiff_freehunks(l.next);
165 bdiff_freehunks(l.next);
166 }
166 }
167 return result;
167 return result;
168 }
168 }
169
169
170 /*
170 /*
171 * If allws != 0, remove all whitespace (' ', \t and \r). Otherwise,
171 * If allws != 0, remove all whitespace (' ', \t and \r). Otherwise,
172 * reduce whitespace sequences to a single space and trim remaining whitespace
172 * reduce whitespace sequences to a single space and trim remaining whitespace
173 * from end of lines.
173 * from end of lines.
174 */
174 */
175 static PyObject *fixws(PyObject *self, PyObject *args)
175 static PyObject *fixws(PyObject *self, PyObject *args)
176 {
176 {
177 PyObject *s, *result = NULL;
177 PyObject *s, *result = NULL;
178 char allws, c;
178 char allws, c;
179 const char *r;
179 const char *r;
180 Py_ssize_t i, rlen, wlen = 0;
180 Py_ssize_t i, rlen, wlen = 0;
181 char *w;
181 char *w;
182
182
183 if (!PyArg_ParseTuple(args, "Sb:fixws", &s, &allws))
183 if (!PyArg_ParseTuple(args, "Sb:fixws", &s, &allws))
184 return NULL;
184 return NULL;
185 r = PyBytes_AsString(s);
185 r = PyBytes_AsString(s);
186 rlen = PyBytes_Size(s);
186 rlen = PyBytes_Size(s);
187
187
188 w = (char *)PyMem_Malloc(rlen ? rlen : 1);
188 w = (char *)PyMem_Malloc(rlen ? rlen : 1);
189 if (!w)
189 if (!w)
190 goto nomem;
190 goto nomem;
191
191
192 for (i = 0; i != rlen; i++) {
192 for (i = 0; i != rlen; i++) {
193 c = r[i];
193 c = r[i];
194 if (c == ' ' || c == '\t' || c == '\r') {
194 if (c == ' ' || c == '\t' || c == '\r') {
195 if (!allws && (wlen == 0 || w[wlen - 1] != ' '))
195 if (!allws && (wlen == 0 || w[wlen - 1] != ' '))
196 w[wlen++] = ' ';
196 w[wlen++] = ' ';
197 } else if (c == '\n' && !allws && wlen > 0 &&
197 } else if (c == '\n' && !allws && wlen > 0 &&
198 w[wlen - 1] == ' ') {
198 w[wlen - 1] == ' ') {
199 w[wlen - 1] = '\n';
199 w[wlen - 1] = '\n';
200 } else {
200 } else {
201 w[wlen++] = c;
201 w[wlen++] = c;
202 }
202 }
203 }
203 }
204
204
205 result = PyBytes_FromStringAndSize(w, wlen);
205 result = PyBytes_FromStringAndSize(w, wlen);
206
206
207 nomem:
207 nomem:
208 PyMem_Free(w);
208 PyMem_Free(w);
209 return result ? result : PyErr_NoMemory();
209 return result ? result : PyErr_NoMemory();
210 }
210 }
211
211
212 static bool sliceintolist(PyObject *list, Py_ssize_t destidx,
212 static bool sliceintolist(PyObject *list, Py_ssize_t destidx,
213 const char *source, Py_ssize_t len)
213 const char *source, Py_ssize_t len)
214 {
214 {
215 PyObject *sliced = PyBytes_FromStringAndSize(source, len);
215 PyObject *sliced = PyBytes_FromStringAndSize(source, len);
216 if (sliced == NULL)
216 if (sliced == NULL)
217 return false;
217 return false;
218 PyList_SET_ITEM(list, destidx, sliced);
218 PyList_SET_ITEM(list, destidx, sliced);
219 return true;
219 return true;
220 }
220 }
221
221
222 static PyObject *splitnewlines(PyObject *self, PyObject *args)
222 static PyObject *splitnewlines(PyObject *self, PyObject *args)
223 {
223 {
224 const char *text;
224 const char *text;
225 Py_ssize_t nelts = 0, size, i, start = 0;
225 Py_ssize_t nelts = 0, size, i, start = 0;
226 PyObject *result = NULL;
226 PyObject *result = NULL;
227
227
228 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &text, &size)) {
228 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &text, &size)) {
229 goto abort;
229 goto abort;
230 }
230 }
231 if (!size) {
231 if (!size) {
232 return PyList_New(0);
232 return PyList_New(0);
233 }
233 }
234 /* This loops to size-1 because if the last byte is a newline,
234 /* This loops to size-1 because if the last byte is a newline,
235 * we don't want to perform a split there. */
235 * we don't want to perform a split there. */
236 for (i = 0; i < size - 1; ++i) {
236 for (i = 0; i < size - 1; ++i) {
237 if (text[i] == '\n') {
237 if (text[i] == '\n') {
238 ++nelts;
238 ++nelts;
239 }
239 }
240 }
240 }
241 if ((result = PyList_New(nelts + 1)) == NULL)
241 if ((result = PyList_New(nelts + 1)) == NULL)
242 goto abort;
242 goto abort;
243 nelts = 0;
243 nelts = 0;
244 for (i = 0; i < size - 1; ++i) {
244 for (i = 0; i < size - 1; ++i) {
245 if (text[i] == '\n') {
245 if (text[i] == '\n') {
246 if (!sliceintolist(result, nelts++, text + start,
246 if (!sliceintolist(result, nelts++, text + start,
247 i - start + 1))
247 i - start + 1))
248 goto abort;
248 goto abort;
249 start = i + 1;
249 start = i + 1;
250 }
250 }
251 }
251 }
252 if (!sliceintolist(result, nelts++, text + start, size - start))
252 if (!sliceintolist(result, nelts++, text + start, size - start))
253 goto abort;
253 goto abort;
254 return result;
254 return result;
255 abort:
255 abort:
256 Py_XDECREF(result);
256 Py_XDECREF(result);
257 return NULL;
257 return NULL;
258 }
258 }
259
259
260 static int hunk_consumer(long a1, long a2, long b1, long b2, void *priv)
260 static int hunk_consumer(long a1, long a2, long b1, long b2, void *priv)
261 {
261 {
262 PyObject *rl = (PyObject *)priv;
262 PyObject *rl = (PyObject *)priv;
263 PyObject *m = Py_BuildValue("llll", a1, a2, b1, b2);
263 PyObject *m = Py_BuildValue("llll", a1, a2, b1, b2);
264 if (!m)
264 if (!m)
265 return -1;
265 return -1;
266 if (PyList_Append(rl, m) != 0) {
266 if (PyList_Append(rl, m) != 0) {
267 Py_DECREF(m);
267 Py_DECREF(m);
268 return -1;
268 return -1;
269 }
269 }
270 return 0;
270 return 0;
271 }
271 }
272
272
273 static PyObject *xdiffblocks(PyObject *self, PyObject *args)
273 static PyObject *xdiffblocks(PyObject *self, PyObject *args)
274 {
274 {
275 Py_ssize_t la, lb;
275 Py_ssize_t la, lb;
276 mmfile_t a, b;
276 mmfile_t a, b;
277 PyObject *rl;
277 PyObject *rl;
278
278
279 xpparam_t xpp = {
279 xpparam_t xpp = {
280 XDF_INDENT_HEURISTIC, /* flags */
280 XDF_INDENT_HEURISTIC, /* flags */
281 };
281 };
282 xdemitconf_t xecfg = {
282 xdemitconf_t xecfg = {
283 0, /* ctxlen */
284 0, /* interhunkctxlen */
285 XDL_EMIT_BDIFFHUNK, /* flags */
283 XDL_EMIT_BDIFFHUNK, /* flags */
286 NULL, /* find_func */
287 NULL, /* find_func_priv */
288 hunk_consumer, /* hunk_consume_func */
284 hunk_consumer, /* hunk_consume_func */
289 };
285 };
290 xdemitcb_t ecb = {
286 xdemitcb_t ecb = {
291 NULL, /* priv */
287 NULL, /* priv */
292 NULL, /* outf */
293 };
288 };
294
289
295 if (!PyArg_ParseTuple(args, PY23("s#s#", "y#y#"), &a.ptr, &la, &b.ptr,
290 if (!PyArg_ParseTuple(args, PY23("s#s#", "y#y#"), &a.ptr, &la, &b.ptr,
296 &lb))
291 &lb))
297 return NULL;
292 return NULL;
298
293
299 a.size = la;
294 a.size = la;
300 b.size = lb;
295 b.size = lb;
301
296
302 rl = PyList_New(0);
297 rl = PyList_New(0);
303 if (!rl)
298 if (!rl)
304 return PyErr_NoMemory();
299 return PyErr_NoMemory();
305
300
306 ecb.priv = rl;
301 ecb.priv = rl;
307
302
308 if (xdl_diff(&a, &b, &xpp, &xecfg, &ecb) != 0) {
303 if (xdl_diff(&a, &b, &xpp, &xecfg, &ecb) != 0) {
309 Py_DECREF(rl);
304 Py_DECREF(rl);
310 return PyErr_NoMemory();
305 return PyErr_NoMemory();
311 }
306 }
312
307
313 return rl;
308 return rl;
314 }
309 }
315
310
316 static char mdiff_doc[] = "Efficient binary diff.";
311 static char mdiff_doc[] = "Efficient binary diff.";
317
312
318 static PyMethodDef methods[] = {
313 static PyMethodDef methods[] = {
319 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
314 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
320 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
315 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
321 {"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"},
316 {"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"},
322 {"splitnewlines", splitnewlines, METH_VARARGS,
317 {"splitnewlines", splitnewlines, METH_VARARGS,
323 "like str.splitlines, but only split on newlines\n"},
318 "like str.splitlines, but only split on newlines\n"},
324 {"xdiffblocks", xdiffblocks, METH_VARARGS,
319 {"xdiffblocks", xdiffblocks, METH_VARARGS,
325 "find a list of matching lines using xdiff algorithm\n"},
320 "find a list of matching lines using xdiff algorithm\n"},
326 {NULL, NULL},
321 {NULL, NULL},
327 };
322 };
328
323
329 static const int version = 3;
324 static const int version = 3;
330
325
331 #ifdef IS_PY3K
326 #ifdef IS_PY3K
332 static struct PyModuleDef bdiff_module = {
327 static struct PyModuleDef bdiff_module = {
333 PyModuleDef_HEAD_INIT, "bdiff", mdiff_doc, -1, methods,
328 PyModuleDef_HEAD_INIT, "bdiff", mdiff_doc, -1, methods,
334 };
329 };
335
330
336 PyMODINIT_FUNC PyInit_bdiff(void)
331 PyMODINIT_FUNC PyInit_bdiff(void)
337 {
332 {
338 PyObject *m;
333 PyObject *m;
339 m = PyModule_Create(&bdiff_module);
334 m = PyModule_Create(&bdiff_module);
340 PyModule_AddIntConstant(m, "version", version);
335 PyModule_AddIntConstant(m, "version", version);
341 return m;
336 return m;
342 }
337 }
343 #else
338 #else
344 PyMODINIT_FUNC initbdiff(void)
339 PyMODINIT_FUNC initbdiff(void)
345 {
340 {
346 PyObject *m;
341 PyObject *m;
347 m = Py_InitModule3("bdiff", methods, mdiff_doc);
342 m = Py_InitModule3("bdiff", methods, mdiff_doc);
348 PyModule_AddIntConstant(m, "version", version);
343 PyModule_AddIntConstant(m, "version", version);
349 }
344 }
350 #endif
345 #endif
@@ -1,123 +1,113 b''
1 /*
1 /*
2 * LibXDiff by Davide Libenzi ( File Differential Library )
2 * LibXDiff by Davide Libenzi ( File Differential Library )
3 * Copyright (C) 2003 Davide Libenzi
3 * Copyright (C) 2003 Davide Libenzi
4 *
4 *
5 * This library is free software; you can redistribute it and/or
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
8 * version 2.1 of the License, or (at your option) any later version.
9 *
9 *
10 * This library is distributed in the hope that it will be useful,
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
13 * Lesser General Public License for more details.
14 *
14 *
15 * You should have received a copy of the GNU Lesser General Public
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, see
16 * License along with this library; if not, see
17 * <http://www.gnu.org/licenses/>.
17 * <http://www.gnu.org/licenses/>.
18 *
18 *
19 * Davide Libenzi <davidel@xmailserver.org>
19 * Davide Libenzi <davidel@xmailserver.org>
20 *
20 *
21 */
21 */
22
22
23 #if !defined(XDIFF_H)
23 #if !defined(XDIFF_H)
24 #define XDIFF_H
24 #define XDIFF_H
25
25
26 #ifdef __cplusplus
26 #ifdef __cplusplus
27 extern "C" {
27 extern "C" {
28 #endif /* #ifdef __cplusplus */
28 #endif /* #ifdef __cplusplus */
29
29
30 #include <stddef.h> /* size_t */
30 #include <stddef.h> /* size_t */
31
31
32 /* xpparm_t.flags */
32 /* xpparm_t.flags */
33 #define XDF_NEED_MINIMAL (1 << 0)
33 #define XDF_NEED_MINIMAL (1 << 0)
34
34
35 #define XDF_INDENT_HEURISTIC (1 << 23)
35 #define XDF_INDENT_HEURISTIC (1 << 23)
36
36
37 /* xdemitconf_t.flags */
38 #define XDL_EMIT_FUNCNAMES (1 << 0)
39 #define XDL_EMIT_FUNCCONTEXT (1 << 2)
40 /* emit bdiff-style "matched" (a1, a2, b1, b2) hunks instead of "different"
37 /* emit bdiff-style "matched" (a1, a2, b1, b2) hunks instead of "different"
41 * (a1, a2 - a1, b1, b2 - b1) hunks */
38 * (a1, a2 - a1, b1, b2 - b1) hunks */
42 #define XDL_EMIT_BDIFFHUNK (1 << 4)
39 #define XDL_EMIT_BDIFFHUNK (1 << 4)
43
40
44 /* merge simplification levels */
41 /* merge simplification levels */
45 #define XDL_MERGE_MINIMAL 0
42 #define XDL_MERGE_MINIMAL 0
46 #define XDL_MERGE_EAGER 1
43 #define XDL_MERGE_EAGER 1
47 #define XDL_MERGE_ZEALOUS 2
44 #define XDL_MERGE_ZEALOUS 2
48 #define XDL_MERGE_ZEALOUS_ALNUM 3
45 #define XDL_MERGE_ZEALOUS_ALNUM 3
49
46
50 /* merge favor modes */
47 /* merge favor modes */
51 #define XDL_MERGE_FAVOR_OURS 1
48 #define XDL_MERGE_FAVOR_OURS 1
52 #define XDL_MERGE_FAVOR_THEIRS 2
49 #define XDL_MERGE_FAVOR_THEIRS 2
53 #define XDL_MERGE_FAVOR_UNION 3
50 #define XDL_MERGE_FAVOR_UNION 3
54
51
55 /* merge output styles */
52 /* merge output styles */
56 #define XDL_MERGE_DIFF3 1
53 #define XDL_MERGE_DIFF3 1
57
54
58 typedef struct s_mmfile {
55 typedef struct s_mmfile {
59 char *ptr;
56 char *ptr;
60 long size;
57 long size;
61 } mmfile_t;
58 } mmfile_t;
62
59
63 typedef struct s_mmbuffer {
60 typedef struct s_mmbuffer {
64 char *ptr;
61 char *ptr;
65 long size;
62 long size;
66 } mmbuffer_t;
63 } mmbuffer_t;
67
64
68 typedef struct s_xpparam {
65 typedef struct s_xpparam {
69 unsigned long flags;
66 unsigned long flags;
70 } xpparam_t;
67 } xpparam_t;
71
68
72 typedef struct s_xdemitcb {
69 typedef struct s_xdemitcb {
73 void *priv;
70 void *priv;
74 int (*outf)(void *, mmbuffer_t *, int);
75 } xdemitcb_t;
71 } xdemitcb_t;
76
72
77 typedef long (*find_func_t)(const char *line, long line_len, char *buffer, long buffer_size, void *priv);
78
79 typedef int (*xdl_emit_hunk_consume_func_t)(long start_a, long count_a,
73 typedef int (*xdl_emit_hunk_consume_func_t)(long start_a, long count_a,
80 long start_b, long count_b,
74 long start_b, long count_b,
81 void *cb_data);
75 void *cb_data);
82
76
83 typedef struct s_xdemitconf {
77 typedef struct s_xdemitconf {
84 long ctxlen;
85 long interhunkctxlen;
86 unsigned long flags;
78 unsigned long flags;
87 find_func_t find_func;
88 void *find_func_priv;
89 xdl_emit_hunk_consume_func_t hunk_func;
79 xdl_emit_hunk_consume_func_t hunk_func;
90 } xdemitconf_t;
80 } xdemitconf_t;
91
81
92
82
93 #define xdl_malloc(x) malloc(x)
83 #define xdl_malloc(x) malloc(x)
94 #define xdl_free(ptr) free(ptr)
84 #define xdl_free(ptr) free(ptr)
95 #define xdl_realloc(ptr,x) realloc(ptr,x)
85 #define xdl_realloc(ptr,x) realloc(ptr,x)
96
86
97 void *xdl_mmfile_first(mmfile_t *mmf, long *size);
87 void *xdl_mmfile_first(mmfile_t *mmf, long *size);
98 long xdl_mmfile_size(mmfile_t *mmf);
88 long xdl_mmfile_size(mmfile_t *mmf);
99
89
100 int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
90 int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
101 xdemitconf_t const *xecfg, xdemitcb_t *ecb);
91 xdemitconf_t const *xecfg, xdemitcb_t *ecb);
102
92
103 typedef struct s_xmparam {
93 typedef struct s_xmparam {
104 xpparam_t xpp;
94 xpparam_t xpp;
105 int marker_size;
95 int marker_size;
106 int level;
96 int level;
107 int favor;
97 int favor;
108 int style;
98 int style;
109 const char *ancestor; /* label for orig */
99 const char *ancestor; /* label for orig */
110 const char *file1; /* label for mf1 */
100 const char *file1; /* label for mf1 */
111 const char *file2; /* label for mf2 */
101 const char *file2; /* label for mf2 */
112 } xmparam_t;
102 } xmparam_t;
113
103
114 #define DEFAULT_CONFLICT_MARKER_SIZE 7
104 #define DEFAULT_CONFLICT_MARKER_SIZE 7
115
105
116 int xdl_merge(mmfile_t *orig, mmfile_t *mf1, mmfile_t *mf2,
106 int xdl_merge(mmfile_t *orig, mmfile_t *mf1, mmfile_t *mf2,
117 xmparam_t const *xmp, mmbuffer_t *result);
107 xmparam_t const *xmp, mmbuffer_t *result);
118
108
119 #ifdef __cplusplus
109 #ifdef __cplusplus
120 }
110 }
121 #endif /* #ifdef __cplusplus */
111 #endif /* #ifdef __cplusplus */
122
112
123 #endif /* #if !defined(XDIFF_H) */
113 #endif /* #if !defined(XDIFF_H) */
@@ -1,1071 +1,1126 b''
1 /*
1 /*
2 * LibXDiff by Davide Libenzi ( File Differential Library )
2 * LibXDiff by Davide Libenzi ( File Differential Library )
3 * Copyright (C) 2003 Davide Libenzi
3 * Copyright (C) 2003 Davide Libenzi
4 *
4 *
5 * This library is free software; you can redistribute it and/or
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
8 * version 2.1 of the License, or (at your option) any later version.
9 *
9 *
10 * This library is distributed in the hope that it will be useful,
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
13 * Lesser General Public License for more details.
14 *
14 *
15 * You should have received a copy of the GNU Lesser General Public
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, see
16 * License along with this library; if not, see
17 * <http://www.gnu.org/licenses/>.
17 * <http://www.gnu.org/licenses/>.
18 *
18 *
19 * Davide Libenzi <davidel@xmailserver.org>
19 * Davide Libenzi <davidel@xmailserver.org>
20 *
20 *
21 */
21 */
22
22
23 #include "xinclude.h"
23 #include "xinclude.h"
24
24
25
25
26
26
27 #define XDL_MAX_COST_MIN 256
27 #define XDL_MAX_COST_MIN 256
28 #define XDL_HEUR_MIN_COST 256
28 #define XDL_HEUR_MIN_COST 256
29 #define XDL_LINE_MAX (long)((1UL << (CHAR_BIT * sizeof(long) - 1)) - 1)
29 #define XDL_LINE_MAX (long)((1UL << (CHAR_BIT * sizeof(long) - 1)) - 1)
30 #define XDL_SNAKE_CNT 20
30 #define XDL_SNAKE_CNT 20
31 #define XDL_K_HEUR 4
31 #define XDL_K_HEUR 4
32
32
33 /* VC 2008 doesn't know about the inline keyword. */
33 /* VC 2008 doesn't know about the inline keyword. */
34 #if defined(_MSC_VER)
34 #if defined(_MSC_VER)
35 #define inline __forceinline
35 #define inline __forceinline
36 #endif
36 #endif
37
37
38
38
39 typedef struct s_xdpsplit {
39 typedef struct s_xdpsplit {
40 long i1, i2;
40 long i1, i2;
41 int min_lo, min_hi;
41 int min_lo, min_hi;
42 } xdpsplit_t;
42 } xdpsplit_t;
43
43
44
44
45
45
46
46
47 static long xdl_split(unsigned long const *ha1, long off1, long lim1,
47 static long xdl_split(unsigned long const *ha1, long off1, long lim1,
48 unsigned long const *ha2, long off2, long lim2,
48 unsigned long const *ha2, long off2, long lim2,
49 long *kvdf, long *kvdb, int need_min, xdpsplit_t *spl,
49 long *kvdf, long *kvdb, int need_min, xdpsplit_t *spl,
50 xdalgoenv_t *xenv);
50 xdalgoenv_t *xenv);
51 static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, long chg2);
51 static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, long chg2);
52
52
53
53
54
54
55
55
56
56
57 /*
57 /*
58 * See "An O(ND) Difference Algorithm and its Variations", by Eugene Myers.
58 * See "An O(ND) Difference Algorithm and its Variations", by Eugene Myers.
59 * Basically considers a "box" (off1, off2, lim1, lim2) and scan from both
59 * Basically considers a "box" (off1, off2, lim1, lim2) and scan from both
60 * the forward diagonal starting from (off1, off2) and the backward diagonal
60 * the forward diagonal starting from (off1, off2) and the backward diagonal
61 * starting from (lim1, lim2). If the K values on the same diagonal crosses
61 * starting from (lim1, lim2). If the K values on the same diagonal crosses
62 * returns the furthest point of reach. We might end up having to expensive
62 * returns the furthest point of reach. We might end up having to expensive
63 * cases using this algorithm is full, so a little bit of heuristic is needed
63 * cases using this algorithm is full, so a little bit of heuristic is needed
64 * to cut the search and to return a suboptimal point.
64 * to cut the search and to return a suboptimal point.
65 */
65 */
66 static long xdl_split(unsigned long const *ha1, long off1, long lim1,
66 static long xdl_split(unsigned long const *ha1, long off1, long lim1,
67 unsigned long const *ha2, long off2, long lim2,
67 unsigned long const *ha2, long off2, long lim2,
68 long *kvdf, long *kvdb, int need_min, xdpsplit_t *spl,
68 long *kvdf, long *kvdb, int need_min, xdpsplit_t *spl,
69 xdalgoenv_t *xenv) {
69 xdalgoenv_t *xenv) {
70 long dmin = off1 - lim2, dmax = lim1 - off2;
70 long dmin = off1 - lim2, dmax = lim1 - off2;
71 long fmid = off1 - off2, bmid = lim1 - lim2;
71 long fmid = off1 - off2, bmid = lim1 - lim2;
72 long odd = (fmid - bmid) & 1;
72 long odd = (fmid - bmid) & 1;
73 long fmin = fmid, fmax = fmid;
73 long fmin = fmid, fmax = fmid;
74 long bmin = bmid, bmax = bmid;
74 long bmin = bmid, bmax = bmid;
75 long ec, d, i1, i2, prev1, best, dd, v, k;
75 long ec, d, i1, i2, prev1, best, dd, v, k;
76
76
77 /*
77 /*
78 * Set initial diagonal values for both forward and backward path.
78 * Set initial diagonal values for both forward and backward path.
79 */
79 */
80 kvdf[fmid] = off1;
80 kvdf[fmid] = off1;
81 kvdb[bmid] = lim1;
81 kvdb[bmid] = lim1;
82
82
83 for (ec = 1;; ec++) {
83 for (ec = 1;; ec++) {
84 int got_snake = 0;
84 int got_snake = 0;
85
85
86 /*
86 /*
87 * We need to extent the diagonal "domain" by one. If the next
87 * We need to extent the diagonal "domain" by one. If the next
88 * values exits the box boundaries we need to change it in the
88 * values exits the box boundaries we need to change it in the
89 * opposite direction because (max - min) must be a power of two.
89 * opposite direction because (max - min) must be a power of two.
90 * Also we initialize the external K value to -1 so that we can
90 * Also we initialize the external K value to -1 so that we can
91 * avoid extra conditions check inside the core loop.
91 * avoid extra conditions check inside the core loop.
92 */
92 */
93 if (fmin > dmin)
93 if (fmin > dmin)
94 kvdf[--fmin - 1] = -1;
94 kvdf[--fmin - 1] = -1;
95 else
95 else
96 ++fmin;
96 ++fmin;
97 if (fmax < dmax)
97 if (fmax < dmax)
98 kvdf[++fmax + 1] = -1;
98 kvdf[++fmax + 1] = -1;
99 else
99 else
100 --fmax;
100 --fmax;
101
101
102 for (d = fmax; d >= fmin; d -= 2) {
102 for (d = fmax; d >= fmin; d -= 2) {
103 if (kvdf[d - 1] >= kvdf[d + 1])
103 if (kvdf[d - 1] >= kvdf[d + 1])
104 i1 = kvdf[d - 1] + 1;
104 i1 = kvdf[d - 1] + 1;
105 else
105 else
106 i1 = kvdf[d + 1];
106 i1 = kvdf[d + 1];
107 prev1 = i1;
107 prev1 = i1;
108 i2 = i1 - d;
108 i2 = i1 - d;
109 for (; i1 < lim1 && i2 < lim2 && ha1[i1] == ha2[i2]; i1++, i2++);
109 for (; i1 < lim1 && i2 < lim2 && ha1[i1] == ha2[i2]; i1++, i2++);
110 if (i1 - prev1 > xenv->snake_cnt)
110 if (i1 - prev1 > xenv->snake_cnt)
111 got_snake = 1;
111 got_snake = 1;
112 kvdf[d] = i1;
112 kvdf[d] = i1;
113 if (odd && bmin <= d && d <= bmax && kvdb[d] <= i1) {
113 if (odd && bmin <= d && d <= bmax && kvdb[d] <= i1) {
114 spl->i1 = i1;
114 spl->i1 = i1;
115 spl->i2 = i2;
115 spl->i2 = i2;
116 spl->min_lo = spl->min_hi = 1;
116 spl->min_lo = spl->min_hi = 1;
117 return ec;
117 return ec;
118 }
118 }
119 }
119 }
120
120
121 /*
121 /*
122 * We need to extent the diagonal "domain" by one. If the next
122 * We need to extent the diagonal "domain" by one. If the next
123 * values exits the box boundaries we need to change it in the
123 * values exits the box boundaries we need to change it in the
124 * opposite direction because (max - min) must be a power of two.
124 * opposite direction because (max - min) must be a power of two.
125 * Also we initialize the external K value to -1 so that we can
125 * Also we initialize the external K value to -1 so that we can
126 * avoid extra conditions check inside the core loop.
126 * avoid extra conditions check inside the core loop.
127 */
127 */
128 if (bmin > dmin)
128 if (bmin > dmin)
129 kvdb[--bmin - 1] = XDL_LINE_MAX;
129 kvdb[--bmin - 1] = XDL_LINE_MAX;
130 else
130 else
131 ++bmin;
131 ++bmin;
132 if (bmax < dmax)
132 if (bmax < dmax)
133 kvdb[++bmax + 1] = XDL_LINE_MAX;
133 kvdb[++bmax + 1] = XDL_LINE_MAX;
134 else
134 else
135 --bmax;
135 --bmax;
136
136
137 for (d = bmax; d >= bmin; d -= 2) {
137 for (d = bmax; d >= bmin; d -= 2) {
138 if (kvdb[d - 1] < kvdb[d + 1])
138 if (kvdb[d - 1] < kvdb[d + 1])
139 i1 = kvdb[d - 1];
139 i1 = kvdb[d - 1];
140 else
140 else
141 i1 = kvdb[d + 1] - 1;
141 i1 = kvdb[d + 1] - 1;
142 prev1 = i1;
142 prev1 = i1;
143 i2 = i1 - d;
143 i2 = i1 - d;
144 for (; i1 > off1 && i2 > off2 && ha1[i1 - 1] == ha2[i2 - 1]; i1--, i2--);
144 for (; i1 > off1 && i2 > off2 && ha1[i1 - 1] == ha2[i2 - 1]; i1--, i2--);
145 if (prev1 - i1 > xenv->snake_cnt)
145 if (prev1 - i1 > xenv->snake_cnt)
146 got_snake = 1;
146 got_snake = 1;
147 kvdb[d] = i1;
147 kvdb[d] = i1;
148 if (!odd && fmin <= d && d <= fmax && i1 <= kvdf[d]) {
148 if (!odd && fmin <= d && d <= fmax && i1 <= kvdf[d]) {
149 spl->i1 = i1;
149 spl->i1 = i1;
150 spl->i2 = i2;
150 spl->i2 = i2;
151 spl->min_lo = spl->min_hi = 1;
151 spl->min_lo = spl->min_hi = 1;
152 return ec;
152 return ec;
153 }
153 }
154 }
154 }
155
155
156 if (need_min)
156 if (need_min)
157 continue;
157 continue;
158
158
159 /*
159 /*
160 * If the edit cost is above the heuristic trigger and if
160 * If the edit cost is above the heuristic trigger and if
161 * we got a good snake, we sample current diagonals to see
161 * we got a good snake, we sample current diagonals to see
162 * if some of the, have reached an "interesting" path. Our
162 * if some of the, have reached an "interesting" path. Our
163 * measure is a function of the distance from the diagonal
163 * measure is a function of the distance from the diagonal
164 * corner (i1 + i2) penalized with the distance from the
164 * corner (i1 + i2) penalized with the distance from the
165 * mid diagonal itself. If this value is above the current
165 * mid diagonal itself. If this value is above the current
166 * edit cost times a magic factor (XDL_K_HEUR) we consider
166 * edit cost times a magic factor (XDL_K_HEUR) we consider
167 * it interesting.
167 * it interesting.
168 */
168 */
169 if (got_snake && ec > xenv->heur_min) {
169 if (got_snake && ec > xenv->heur_min) {
170 for (best = 0, d = fmax; d >= fmin; d -= 2) {
170 for (best = 0, d = fmax; d >= fmin; d -= 2) {
171 dd = d > fmid ? d - fmid: fmid - d;
171 dd = d > fmid ? d - fmid: fmid - d;
172 i1 = kvdf[d];
172 i1 = kvdf[d];
173 i2 = i1 - d;
173 i2 = i1 - d;
174 v = (i1 - off1) + (i2 - off2) - dd;
174 v = (i1 - off1) + (i2 - off2) - dd;
175
175
176 if (v > XDL_K_HEUR * ec && v > best &&
176 if (v > XDL_K_HEUR * ec && v > best &&
177 off1 + xenv->snake_cnt <= i1 && i1 < lim1 &&
177 off1 + xenv->snake_cnt <= i1 && i1 < lim1 &&
178 off2 + xenv->snake_cnt <= i2 && i2 < lim2) {
178 off2 + xenv->snake_cnt <= i2 && i2 < lim2) {
179 for (k = 1; ha1[i1 - k] == ha2[i2 - k]; k++)
179 for (k = 1; ha1[i1 - k] == ha2[i2 - k]; k++)
180 if (k == xenv->snake_cnt) {
180 if (k == xenv->snake_cnt) {
181 best = v;
181 best = v;
182 spl->i1 = i1;
182 spl->i1 = i1;
183 spl->i2 = i2;
183 spl->i2 = i2;
184 break;
184 break;
185 }
185 }
186 }
186 }
187 }
187 }
188 if (best > 0) {
188 if (best > 0) {
189 spl->min_lo = 1;
189 spl->min_lo = 1;
190 spl->min_hi = 0;
190 spl->min_hi = 0;
191 return ec;
191 return ec;
192 }
192 }
193
193
194 for (best = 0, d = bmax; d >= bmin; d -= 2) {
194 for (best = 0, d = bmax; d >= bmin; d -= 2) {
195 dd = d > bmid ? d - bmid: bmid - d;
195 dd = d > bmid ? d - bmid: bmid - d;
196 i1 = kvdb[d];
196 i1 = kvdb[d];
197 i2 = i1 - d;
197 i2 = i1 - d;
198 v = (lim1 - i1) + (lim2 - i2) - dd;
198 v = (lim1 - i1) + (lim2 - i2) - dd;
199
199
200 if (v > XDL_K_HEUR * ec && v > best &&
200 if (v > XDL_K_HEUR * ec && v > best &&
201 off1 < i1 && i1 <= lim1 - xenv->snake_cnt &&
201 off1 < i1 && i1 <= lim1 - xenv->snake_cnt &&
202 off2 < i2 && i2 <= lim2 - xenv->snake_cnt) {
202 off2 < i2 && i2 <= lim2 - xenv->snake_cnt) {
203 for (k = 0; ha1[i1 + k] == ha2[i2 + k]; k++)
203 for (k = 0; ha1[i1 + k] == ha2[i2 + k]; k++)
204 if (k == xenv->snake_cnt - 1) {
204 if (k == xenv->snake_cnt - 1) {
205 best = v;
205 best = v;
206 spl->i1 = i1;
206 spl->i1 = i1;
207 spl->i2 = i2;
207 spl->i2 = i2;
208 break;
208 break;
209 }
209 }
210 }
210 }
211 }
211 }
212 if (best > 0) {
212 if (best > 0) {
213 spl->min_lo = 0;
213 spl->min_lo = 0;
214 spl->min_hi = 1;
214 spl->min_hi = 1;
215 return ec;
215 return ec;
216 }
216 }
217 }
217 }
218
218
219 /*
219 /*
220 * Enough is enough. We spent too much time here and now we collect
220 * Enough is enough. We spent too much time here and now we collect
221 * the furthest reaching path using the (i1 + i2) measure.
221 * the furthest reaching path using the (i1 + i2) measure.
222 */
222 */
223 if (ec >= xenv->mxcost) {
223 if (ec >= xenv->mxcost) {
224 long fbest, fbest1, bbest, bbest1;
224 long fbest, fbest1, bbest, bbest1;
225
225
226 fbest = fbest1 = -1;
226 fbest = fbest1 = -1;
227 for (d = fmax; d >= fmin; d -= 2) {
227 for (d = fmax; d >= fmin; d -= 2) {
228 i1 = XDL_MIN(kvdf[d], lim1);
228 i1 = XDL_MIN(kvdf[d], lim1);
229 i2 = i1 - d;
229 i2 = i1 - d;
230 if (lim2 < i2)
230 if (lim2 < i2)
231 i1 = lim2 + d, i2 = lim2;
231 i1 = lim2 + d, i2 = lim2;
232 if (fbest < i1 + i2) {
232 if (fbest < i1 + i2) {
233 fbest = i1 + i2;
233 fbest = i1 + i2;
234 fbest1 = i1;
234 fbest1 = i1;
235 }
235 }
236 }
236 }
237
237
238 bbest = bbest1 = XDL_LINE_MAX;
238 bbest = bbest1 = XDL_LINE_MAX;
239 for (d = bmax; d >= bmin; d -= 2) {
239 for (d = bmax; d >= bmin; d -= 2) {
240 i1 = XDL_MAX(off1, kvdb[d]);
240 i1 = XDL_MAX(off1, kvdb[d]);
241 i2 = i1 - d;
241 i2 = i1 - d;
242 if (i2 < off2)
242 if (i2 < off2)
243 i1 = off2 + d, i2 = off2;
243 i1 = off2 + d, i2 = off2;
244 if (i1 + i2 < bbest) {
244 if (i1 + i2 < bbest) {
245 bbest = i1 + i2;
245 bbest = i1 + i2;
246 bbest1 = i1;
246 bbest1 = i1;
247 }
247 }
248 }
248 }
249
249
250 if ((lim1 + lim2) - bbest < fbest - (off1 + off2)) {
250 if ((lim1 + lim2) - bbest < fbest - (off1 + off2)) {
251 spl->i1 = fbest1;
251 spl->i1 = fbest1;
252 spl->i2 = fbest - fbest1;
252 spl->i2 = fbest - fbest1;
253 spl->min_lo = 1;
253 spl->min_lo = 1;
254 spl->min_hi = 0;
254 spl->min_hi = 0;
255 } else {
255 } else {
256 spl->i1 = bbest1;
256 spl->i1 = bbest1;
257 spl->i2 = bbest - bbest1;
257 spl->i2 = bbest - bbest1;
258 spl->min_lo = 0;
258 spl->min_lo = 0;
259 spl->min_hi = 1;
259 spl->min_hi = 1;
260 }
260 }
261 return ec;
261 return ec;
262 }
262 }
263 }
263 }
264 }
264 }
265
265
266
266
267 /*
267 /*
268 * Rule: "Divide et Impera". Recursively split the box in sub-boxes by calling
268 * Rule: "Divide et Impera". Recursively split the box in sub-boxes by calling
269 * the box splitting function. Note that the real job (marking changed lines)
269 * the box splitting function. Note that the real job (marking changed lines)
270 * is done in the two boundary reaching checks.
270 * is done in the two boundary reaching checks.
271 */
271 */
272 int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1,
272 int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1,
273 diffdata_t *dd2, long off2, long lim2,
273 diffdata_t *dd2, long off2, long lim2,
274 long *kvdf, long *kvdb, int need_min, xdalgoenv_t *xenv) {
274 long *kvdf, long *kvdb, int need_min, xdalgoenv_t *xenv) {
275 unsigned long const *ha1 = dd1->ha, *ha2 = dd2->ha;
275 unsigned long const *ha1 = dd1->ha, *ha2 = dd2->ha;
276
276
277 /*
277 /*
278 * Shrink the box by walking through each diagonal snake (SW and NE).
278 * Shrink the box by walking through each diagonal snake (SW and NE).
279 */
279 */
280 for (; off1 < lim1 && off2 < lim2 && ha1[off1] == ha2[off2]; off1++, off2++);
280 for (; off1 < lim1 && off2 < lim2 && ha1[off1] == ha2[off2]; off1++, off2++);
281 for (; off1 < lim1 && off2 < lim2 && ha1[lim1 - 1] == ha2[lim2 - 1]; lim1--, lim2--);
281 for (; off1 < lim1 && off2 < lim2 && ha1[lim1 - 1] == ha2[lim2 - 1]; lim1--, lim2--);
282
282
283 /*
283 /*
284 * If one dimension is empty, then all records on the other one must
284 * If one dimension is empty, then all records on the other one must
285 * be obviously changed.
285 * be obviously changed.
286 */
286 */
287 if (off1 == lim1) {
287 if (off1 == lim1) {
288 char *rchg2 = dd2->rchg;
288 char *rchg2 = dd2->rchg;
289 long *rindex2 = dd2->rindex;
289 long *rindex2 = dd2->rindex;
290
290
291 for (; off2 < lim2; off2++)
291 for (; off2 < lim2; off2++)
292 rchg2[rindex2[off2]] = 1;
292 rchg2[rindex2[off2]] = 1;
293 } else if (off2 == lim2) {
293 } else if (off2 == lim2) {
294 char *rchg1 = dd1->rchg;
294 char *rchg1 = dd1->rchg;
295 long *rindex1 = dd1->rindex;
295 long *rindex1 = dd1->rindex;
296
296
297 for (; off1 < lim1; off1++)
297 for (; off1 < lim1; off1++)
298 rchg1[rindex1[off1]] = 1;
298 rchg1[rindex1[off1]] = 1;
299 } else {
299 } else {
300 xdpsplit_t spl;
300 xdpsplit_t spl;
301 spl.i1 = spl.i2 = 0;
301 spl.i1 = spl.i2 = 0;
302
302
303 /*
303 /*
304 * Divide ...
304 * Divide ...
305 */
305 */
306 if (xdl_split(ha1, off1, lim1, ha2, off2, lim2, kvdf, kvdb,
306 if (xdl_split(ha1, off1, lim1, ha2, off2, lim2, kvdf, kvdb,
307 need_min, &spl, xenv) < 0) {
307 need_min, &spl, xenv) < 0) {
308
308
309 return -1;
309 return -1;
310 }
310 }
311
311
312 /*
312 /*
313 * ... et Impera.
313 * ... et Impera.
314 */
314 */
315 if (xdl_recs_cmp(dd1, off1, spl.i1, dd2, off2, spl.i2,
315 if (xdl_recs_cmp(dd1, off1, spl.i1, dd2, off2, spl.i2,
316 kvdf, kvdb, spl.min_lo, xenv) < 0 ||
316 kvdf, kvdb, spl.min_lo, xenv) < 0 ||
317 xdl_recs_cmp(dd1, spl.i1, lim1, dd2, spl.i2, lim2,
317 xdl_recs_cmp(dd1, spl.i1, lim1, dd2, spl.i2, lim2,
318 kvdf, kvdb, spl.min_hi, xenv) < 0) {
318 kvdf, kvdb, spl.min_hi, xenv) < 0) {
319
319
320 return -1;
320 return -1;
321 }
321 }
322 }
322 }
323
323
324 return 0;
324 return 0;
325 }
325 }
326
326
327
327
328 int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
328 int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
329 xdfenv_t *xe) {
329 xdfenv_t *xe) {
330 long ndiags;
330 long ndiags;
331 long *kvd, *kvdf, *kvdb;
331 long *kvd, *kvdf, *kvdb;
332 xdalgoenv_t xenv;
332 xdalgoenv_t xenv;
333 diffdata_t dd1, dd2;
333 diffdata_t dd1, dd2;
334
334
335 if (xdl_prepare_env(mf1, mf2, xpp, xe) < 0) {
335 if (xdl_prepare_env(mf1, mf2, xpp, xe) < 0) {
336
336
337 return -1;
337 return -1;
338 }
338 }
339
339
340 /*
340 /*
341 * Allocate and setup K vectors to be used by the differential algorithm.
341 * Allocate and setup K vectors to be used by the differential algorithm.
342 * One is to store the forward path and one to store the backward path.
342 * One is to store the forward path and one to store the backward path.
343 */
343 */
344 ndiags = xe->xdf1.nreff + xe->xdf2.nreff + 3;
344 ndiags = xe->xdf1.nreff + xe->xdf2.nreff + 3;
345 if (!(kvd = (long *) xdl_malloc((2 * ndiags + 2) * sizeof(long)))) {
345 if (!(kvd = (long *) xdl_malloc((2 * ndiags + 2) * sizeof(long)))) {
346
346
347 xdl_free_env(xe);
347 xdl_free_env(xe);
348 return -1;
348 return -1;
349 }
349 }
350 kvdf = kvd;
350 kvdf = kvd;
351 kvdb = kvdf + ndiags;
351 kvdb = kvdf + ndiags;
352 kvdf += xe->xdf2.nreff + 1;
352 kvdf += xe->xdf2.nreff + 1;
353 kvdb += xe->xdf2.nreff + 1;
353 kvdb += xe->xdf2.nreff + 1;
354
354
355 xenv.mxcost = xdl_bogosqrt(ndiags);
355 xenv.mxcost = xdl_bogosqrt(ndiags);
356 if (xenv.mxcost < XDL_MAX_COST_MIN)
356 if (xenv.mxcost < XDL_MAX_COST_MIN)
357 xenv.mxcost = XDL_MAX_COST_MIN;
357 xenv.mxcost = XDL_MAX_COST_MIN;
358 xenv.snake_cnt = XDL_SNAKE_CNT;
358 xenv.snake_cnt = XDL_SNAKE_CNT;
359 xenv.heur_min = XDL_HEUR_MIN_COST;
359 xenv.heur_min = XDL_HEUR_MIN_COST;
360
360
361 dd1.nrec = xe->xdf1.nreff;
361 dd1.nrec = xe->xdf1.nreff;
362 dd1.ha = xe->xdf1.ha;
362 dd1.ha = xe->xdf1.ha;
363 dd1.rchg = xe->xdf1.rchg;
363 dd1.rchg = xe->xdf1.rchg;
364 dd1.rindex = xe->xdf1.rindex;
364 dd1.rindex = xe->xdf1.rindex;
365 dd2.nrec = xe->xdf2.nreff;
365 dd2.nrec = xe->xdf2.nreff;
366 dd2.ha = xe->xdf2.ha;
366 dd2.ha = xe->xdf2.ha;
367 dd2.rchg = xe->xdf2.rchg;
367 dd2.rchg = xe->xdf2.rchg;
368 dd2.rindex = xe->xdf2.rindex;
368 dd2.rindex = xe->xdf2.rindex;
369
369
370 if (xdl_recs_cmp(&dd1, 0, dd1.nrec, &dd2, 0, dd2.nrec,
370 if (xdl_recs_cmp(&dd1, 0, dd1.nrec, &dd2, 0, dd2.nrec,
371 kvdf, kvdb, (xpp->flags & XDF_NEED_MINIMAL) != 0, &xenv) < 0) {
371 kvdf, kvdb, (xpp->flags & XDF_NEED_MINIMAL) != 0, &xenv) < 0) {
372
372
373 xdl_free(kvd);
373 xdl_free(kvd);
374 xdl_free_env(xe);
374 xdl_free_env(xe);
375 return -1;
375 return -1;
376 }
376 }
377
377
378 xdl_free(kvd);
378 xdl_free(kvd);
379
379
380 return 0;
380 return 0;
381 }
381 }
382
382
383
383
384 static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, long chg2) {
384 static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, long chg2) {
385 xdchange_t *xch;
385 xdchange_t *xch;
386
386
387 if (!(xch = (xdchange_t *) xdl_malloc(sizeof(xdchange_t))))
387 if (!(xch = (xdchange_t *) xdl_malloc(sizeof(xdchange_t))))
388 return NULL;
388 return NULL;
389
389
390 xch->next = xscr;
390 xch->next = xscr;
391 xch->i1 = i1;
391 xch->i1 = i1;
392 xch->i2 = i2;
392 xch->i2 = i2;
393 xch->chg1 = chg1;
393 xch->chg1 = chg1;
394 xch->chg2 = chg2;
394 xch->chg2 = chg2;
395 xch->ignore = 0;
395 xch->ignore = 0;
396
396
397 return xch;
397 return xch;
398 }
398 }
399
399
400
400
401 static int recs_match(xrecord_t *rec1, xrecord_t *rec2, long flags)
401 static int recs_match(xrecord_t *rec1, xrecord_t *rec2, long flags)
402 {
402 {
403 return (rec1->ha == rec2->ha &&
403 return (rec1->ha == rec2->ha &&
404 xdl_recmatch(rec1->ptr, rec1->size,
404 xdl_recmatch(rec1->ptr, rec1->size,
405 rec2->ptr, rec2->size,
405 rec2->ptr, rec2->size,
406 flags));
406 flags));
407 }
407 }
408
408
409 /*
409 /*
410 * If a line is indented more than this, get_indent() just returns this value.
410 * If a line is indented more than this, get_indent() just returns this value.
411 * This avoids having to do absurd amounts of work for data that are not
411 * This avoids having to do absurd amounts of work for data that are not
412 * human-readable text, and also ensures that the output of get_indent fits within
412 * human-readable text, and also ensures that the output of get_indent fits within
413 * an int.
413 * an int.
414 */
414 */
415 #define MAX_INDENT 200
415 #define MAX_INDENT 200
416
416
417 /*
417 /*
418 * Return the amount of indentation of the specified line, treating TAB as 8
418 * Return the amount of indentation of the specified line, treating TAB as 8
419 * columns. Return -1 if line is empty or contains only whitespace. Clamp the
419 * columns. Return -1 if line is empty or contains only whitespace. Clamp the
420 * output value at MAX_INDENT.
420 * output value at MAX_INDENT.
421 */
421 */
422 static int get_indent(xrecord_t *rec)
422 static int get_indent(xrecord_t *rec)
423 {
423 {
424 long i;
424 long i;
425 int ret = 0;
425 int ret = 0;
426
426
427 for (i = 0; i < rec->size; i++) {
427 for (i = 0; i < rec->size; i++) {
428 char c = rec->ptr[i];
428 char c = rec->ptr[i];
429
429
430 if (!XDL_ISSPACE(c))
430 if (!XDL_ISSPACE(c))
431 return ret;
431 return ret;
432 else if (c == ' ')
432 else if (c == ' ')
433 ret += 1;
433 ret += 1;
434 else if (c == '\t')
434 else if (c == '\t')
435 ret += 8 - ret % 8;
435 ret += 8 - ret % 8;
436 /* ignore other whitespace characters */
436 /* ignore other whitespace characters */
437
437
438 if (ret >= MAX_INDENT)
438 if (ret >= MAX_INDENT)
439 return MAX_INDENT;
439 return MAX_INDENT;
440 }
440 }
441
441
442 /* The line contains only whitespace. */
442 /* The line contains only whitespace. */
443 return -1;
443 return -1;
444 }
444 }
445
445
446 /*
446 /*
447 * If more than this number of consecutive blank rows are found, just return this
447 * If more than this number of consecutive blank rows are found, just return this
448 * value. This avoids requiring O(N^2) work for pathological cases, and also
448 * value. This avoids requiring O(N^2) work for pathological cases, and also
449 * ensures that the output of score_split fits in an int.
449 * ensures that the output of score_split fits in an int.
450 */
450 */
451 #define MAX_BLANKS 20
451 #define MAX_BLANKS 20
452
452
453 /* Characteristics measured about a hypothetical split position. */
453 /* Characteristics measured about a hypothetical split position. */
454 struct split_measurement {
454 struct split_measurement {
455 /*
455 /*
456 * Is the split at the end of the file (aside from any blank lines)?
456 * Is the split at the end of the file (aside from any blank lines)?
457 */
457 */
458 int end_of_file;
458 int end_of_file;
459
459
460 /*
460 /*
461 * How much is the line immediately following the split indented (or -1 if
461 * How much is the line immediately following the split indented (or -1 if
462 * the line is blank):
462 * the line is blank):
463 */
463 */
464 int indent;
464 int indent;
465
465
466 /*
466 /*
467 * How many consecutive lines above the split are blank?
467 * How many consecutive lines above the split are blank?
468 */
468 */
469 int pre_blank;
469 int pre_blank;
470
470
471 /*
471 /*
472 * How much is the nearest non-blank line above the split indented (or -1
472 * How much is the nearest non-blank line above the split indented (or -1
473 * if there is no such line)?
473 * if there is no such line)?
474 */
474 */
475 int pre_indent;
475 int pre_indent;
476
476
477 /*
477 /*
478 * How many lines after the line following the split are blank?
478 * How many lines after the line following the split are blank?
479 */
479 */
480 int post_blank;
480 int post_blank;
481
481
482 /*
482 /*
483 * How much is the nearest non-blank line after the line following the
483 * How much is the nearest non-blank line after the line following the
484 * split indented (or -1 if there is no such line)?
484 * split indented (or -1 if there is no such line)?
485 */
485 */
486 int post_indent;
486 int post_indent;
487 };
487 };
488
488
489 struct split_score {
489 struct split_score {
490 /* The effective indent of this split (smaller is preferred). */
490 /* The effective indent of this split (smaller is preferred). */
491 int effective_indent;
491 int effective_indent;
492
492
493 /* Penalty for this split (smaller is preferred). */
493 /* Penalty for this split (smaller is preferred). */
494 int penalty;
494 int penalty;
495 };
495 };
496
496
497 /*
497 /*
498 * Fill m with information about a hypothetical split of xdf above line split.
498 * Fill m with information about a hypothetical split of xdf above line split.
499 */
499 */
500 static void measure_split(const xdfile_t *xdf, long split,
500 static void measure_split(const xdfile_t *xdf, long split,
501 struct split_measurement *m)
501 struct split_measurement *m)
502 {
502 {
503 long i;
503 long i;
504
504
505 if (split >= xdf->nrec) {
505 if (split >= xdf->nrec) {
506 m->end_of_file = 1;
506 m->end_of_file = 1;
507 m->indent = -1;
507 m->indent = -1;
508 } else {
508 } else {
509 m->end_of_file = 0;
509 m->end_of_file = 0;
510 m->indent = get_indent(xdf->recs[split]);
510 m->indent = get_indent(xdf->recs[split]);
511 }
511 }
512
512
513 m->pre_blank = 0;
513 m->pre_blank = 0;
514 m->pre_indent = -1;
514 m->pre_indent = -1;
515 for (i = split - 1; i >= 0; i--) {
515 for (i = split - 1; i >= 0; i--) {
516 m->pre_indent = get_indent(xdf->recs[i]);
516 m->pre_indent = get_indent(xdf->recs[i]);
517 if (m->pre_indent != -1)
517 if (m->pre_indent != -1)
518 break;
518 break;
519 m->pre_blank += 1;
519 m->pre_blank += 1;
520 if (m->pre_blank == MAX_BLANKS) {
520 if (m->pre_blank == MAX_BLANKS) {
521 m->pre_indent = 0;
521 m->pre_indent = 0;
522 break;
522 break;
523 }
523 }
524 }
524 }
525
525
526 m->post_blank = 0;
526 m->post_blank = 0;
527 m->post_indent = -1;
527 m->post_indent = -1;
528 for (i = split + 1; i < xdf->nrec; i++) {
528 for (i = split + 1; i < xdf->nrec; i++) {
529 m->post_indent = get_indent(xdf->recs[i]);
529 m->post_indent = get_indent(xdf->recs[i]);
530 if (m->post_indent != -1)
530 if (m->post_indent != -1)
531 break;
531 break;
532 m->post_blank += 1;
532 m->post_blank += 1;
533 if (m->post_blank == MAX_BLANKS) {
533 if (m->post_blank == MAX_BLANKS) {
534 m->post_indent = 0;
534 m->post_indent = 0;
535 break;
535 break;
536 }
536 }
537 }
537 }
538 }
538 }
539
539
540 /*
540 /*
541 * The empirically-determined weight factors used by score_split() below.
541 * The empirically-determined weight factors used by score_split() below.
542 * Larger values means that the position is a less favorable place to split.
542 * Larger values means that the position is a less favorable place to split.
543 *
543 *
544 * Note that scores are only ever compared against each other, so multiplying
544 * Note that scores are only ever compared against each other, so multiplying
545 * all of these weight/penalty values by the same factor wouldn't change the
545 * all of these weight/penalty values by the same factor wouldn't change the
546 * heuristic's behavior. Still, we need to set that arbitrary scale *somehow*.
546 * heuristic's behavior. Still, we need to set that arbitrary scale *somehow*.
547 * In practice, these numbers are chosen to be large enough that they can be
547 * In practice, these numbers are chosen to be large enough that they can be
548 * adjusted relative to each other with sufficient precision despite using
548 * adjusted relative to each other with sufficient precision despite using
549 * integer math.
549 * integer math.
550 */
550 */
551
551
552 /* Penalty if there are no non-blank lines before the split */
552 /* Penalty if there are no non-blank lines before the split */
553 #define START_OF_FILE_PENALTY 1
553 #define START_OF_FILE_PENALTY 1
554
554
555 /* Penalty if there are no non-blank lines after the split */
555 /* Penalty if there are no non-blank lines after the split */
556 #define END_OF_FILE_PENALTY 21
556 #define END_OF_FILE_PENALTY 21
557
557
558 /* Multiplier for the number of blank lines around the split */
558 /* Multiplier for the number of blank lines around the split */
559 #define TOTAL_BLANK_WEIGHT (-30)
559 #define TOTAL_BLANK_WEIGHT (-30)
560
560
561 /* Multiplier for the number of blank lines after the split */
561 /* Multiplier for the number of blank lines after the split */
562 #define POST_BLANK_WEIGHT 6
562 #define POST_BLANK_WEIGHT 6
563
563
564 /*
564 /*
565 * Penalties applied if the line is indented more than its predecessor
565 * Penalties applied if the line is indented more than its predecessor
566 */
566 */
567 #define RELATIVE_INDENT_PENALTY (-4)
567 #define RELATIVE_INDENT_PENALTY (-4)
568 #define RELATIVE_INDENT_WITH_BLANK_PENALTY 10
568 #define RELATIVE_INDENT_WITH_BLANK_PENALTY 10
569
569
570 /*
570 /*
571 * Penalties applied if the line is indented less than both its predecessor and
571 * Penalties applied if the line is indented less than both its predecessor and
572 * its successor
572 * its successor
573 */
573 */
574 #define RELATIVE_OUTDENT_PENALTY 24
574 #define RELATIVE_OUTDENT_PENALTY 24
575 #define RELATIVE_OUTDENT_WITH_BLANK_PENALTY 17
575 #define RELATIVE_OUTDENT_WITH_BLANK_PENALTY 17
576
576
577 /*
577 /*
578 * Penalties applied if the line is indented less than its predecessor but not
578 * Penalties applied if the line is indented less than its predecessor but not
579 * less than its successor
579 * less than its successor
580 */
580 */
581 #define RELATIVE_DEDENT_PENALTY 23
581 #define RELATIVE_DEDENT_PENALTY 23
582 #define RELATIVE_DEDENT_WITH_BLANK_PENALTY 17
582 #define RELATIVE_DEDENT_WITH_BLANK_PENALTY 17
583
583
584 /*
584 /*
585 * We only consider whether the sum of the effective indents for splits are
585 * We only consider whether the sum of the effective indents for splits are
586 * less than (-1), equal to (0), or greater than (+1) each other. The resulting
586 * less than (-1), equal to (0), or greater than (+1) each other. The resulting
587 * value is multiplied by the following weight and combined with the penalty to
587 * value is multiplied by the following weight and combined with the penalty to
588 * determine the better of two scores.
588 * determine the better of two scores.
589 */
589 */
590 #define INDENT_WEIGHT 60
590 #define INDENT_WEIGHT 60
591
591
592 /*
592 /*
593 * Compute a badness score for the hypothetical split whose measurements are
593 * Compute a badness score for the hypothetical split whose measurements are
594 * stored in m. The weight factors were determined empirically using the tools and
594 * stored in m. The weight factors were determined empirically using the tools and
595 * corpus described in
595 * corpus described in
596 *
596 *
597 * https://github.com/mhagger/diff-slider-tools
597 * https://github.com/mhagger/diff-slider-tools
598 *
598 *
599 * Also see that project if you want to improve the weights based on, for example,
599 * Also see that project if you want to improve the weights based on, for example,
600 * a larger or more diverse corpus.
600 * a larger or more diverse corpus.
601 */
601 */
602 static void score_add_split(const struct split_measurement *m, struct split_score *s)
602 static void score_add_split(const struct split_measurement *m, struct split_score *s)
603 {
603 {
604 /*
604 /*
605 * A place to accumulate penalty factors (positive makes this index more
605 * A place to accumulate penalty factors (positive makes this index more
606 * favored):
606 * favored):
607 */
607 */
608 int post_blank, total_blank, indent, any_blanks;
608 int post_blank, total_blank, indent, any_blanks;
609
609
610 if (m->pre_indent == -1 && m->pre_blank == 0)
610 if (m->pre_indent == -1 && m->pre_blank == 0)
611 s->penalty += START_OF_FILE_PENALTY;
611 s->penalty += START_OF_FILE_PENALTY;
612
612
613 if (m->end_of_file)
613 if (m->end_of_file)
614 s->penalty += END_OF_FILE_PENALTY;
614 s->penalty += END_OF_FILE_PENALTY;
615
615
616 /*
616 /*
617 * Set post_blank to the number of blank lines following the split,
617 * Set post_blank to the number of blank lines following the split,
618 * including the line immediately after the split:
618 * including the line immediately after the split:
619 */
619 */
620 post_blank = (m->indent == -1) ? 1 + m->post_blank : 0;
620 post_blank = (m->indent == -1) ? 1 + m->post_blank : 0;
621 total_blank = m->pre_blank + post_blank;
621 total_blank = m->pre_blank + post_blank;
622
622
623 /* Penalties based on nearby blank lines: */
623 /* Penalties based on nearby blank lines: */
624 s->penalty += TOTAL_BLANK_WEIGHT * total_blank;
624 s->penalty += TOTAL_BLANK_WEIGHT * total_blank;
625 s->penalty += POST_BLANK_WEIGHT * post_blank;
625 s->penalty += POST_BLANK_WEIGHT * post_blank;
626
626
627 if (m->indent != -1)
627 if (m->indent != -1)
628 indent = m->indent;
628 indent = m->indent;
629 else
629 else
630 indent = m->post_indent;
630 indent = m->post_indent;
631
631
632 any_blanks = (total_blank != 0);
632 any_blanks = (total_blank != 0);
633
633
634 /* Note that the effective indent is -1 at the end of the file: */
634 /* Note that the effective indent is -1 at the end of the file: */
635 s->effective_indent += indent;
635 s->effective_indent += indent;
636
636
637 if (indent == -1) {
637 if (indent == -1) {
638 /* No additional adjustments needed. */
638 /* No additional adjustments needed. */
639 } else if (m->pre_indent == -1) {
639 } else if (m->pre_indent == -1) {
640 /* No additional adjustments needed. */
640 /* No additional adjustments needed. */
641 } else if (indent > m->pre_indent) {
641 } else if (indent > m->pre_indent) {
642 /*
642 /*
643 * The line is indented more than its predecessor.
643 * The line is indented more than its predecessor.
644 */
644 */
645 s->penalty += any_blanks ?
645 s->penalty += any_blanks ?
646 RELATIVE_INDENT_WITH_BLANK_PENALTY :
646 RELATIVE_INDENT_WITH_BLANK_PENALTY :
647 RELATIVE_INDENT_PENALTY;
647 RELATIVE_INDENT_PENALTY;
648 } else if (indent == m->pre_indent) {
648 } else if (indent == m->pre_indent) {
649 /*
649 /*
650 * The line has the same indentation level as its predecessor.
650 * The line has the same indentation level as its predecessor.
651 * No additional adjustments needed.
651 * No additional adjustments needed.
652 */
652 */
653 } else {
653 } else {
654 /*
654 /*
655 * The line is indented less than its predecessor. It could be
655 * The line is indented less than its predecessor. It could be
656 * the block terminator of the previous block, but it could
656 * the block terminator of the previous block, but it could
657 * also be the start of a new block (e.g., an "else" block, or
657 * also be the start of a new block (e.g., an "else" block, or
658 * maybe the previous block didn't have a block terminator).
658 * maybe the previous block didn't have a block terminator).
659 * Try to distinguish those cases based on what comes next:
659 * Try to distinguish those cases based on what comes next:
660 */
660 */
661 if (m->post_indent != -1 && m->post_indent > indent) {
661 if (m->post_indent != -1 && m->post_indent > indent) {
662 /*
662 /*
663 * The following line is indented more. So it is likely
663 * The following line is indented more. So it is likely
664 * that this line is the start of a block.
664 * that this line is the start of a block.
665 */
665 */
666 s->penalty += any_blanks ?
666 s->penalty += any_blanks ?
667 RELATIVE_OUTDENT_WITH_BLANK_PENALTY :
667 RELATIVE_OUTDENT_WITH_BLANK_PENALTY :
668 RELATIVE_OUTDENT_PENALTY;
668 RELATIVE_OUTDENT_PENALTY;
669 } else {
669 } else {
670 /*
670 /*
671 * That was probably the end of a block.
671 * That was probably the end of a block.
672 */
672 */
673 s->penalty += any_blanks ?
673 s->penalty += any_blanks ?
674 RELATIVE_DEDENT_WITH_BLANK_PENALTY :
674 RELATIVE_DEDENT_WITH_BLANK_PENALTY :
675 RELATIVE_DEDENT_PENALTY;
675 RELATIVE_DEDENT_PENALTY;
676 }
676 }
677 }
677 }
678 }
678 }
679
679
680 static int score_cmp(struct split_score *s1, struct split_score *s2)
680 static int score_cmp(struct split_score *s1, struct split_score *s2)
681 {
681 {
682 /* -1 if s1.effective_indent < s2->effective_indent, etc. */
682 /* -1 if s1.effective_indent < s2->effective_indent, etc. */
683 int cmp_indents = ((s1->effective_indent > s2->effective_indent) -
683 int cmp_indents = ((s1->effective_indent > s2->effective_indent) -
684 (s1->effective_indent < s2->effective_indent));
684 (s1->effective_indent < s2->effective_indent));
685
685
686 return INDENT_WEIGHT * cmp_indents + (s1->penalty - s2->penalty);
686 return INDENT_WEIGHT * cmp_indents + (s1->penalty - s2->penalty);
687 }
687 }
688
688
689 /*
689 /*
690 * Represent a group of changed lines in an xdfile_t (i.e., a contiguous group
690 * Represent a group of changed lines in an xdfile_t (i.e., a contiguous group
691 * of lines that was inserted or deleted from the corresponding version of the
691 * of lines that was inserted or deleted from the corresponding version of the
692 * file). We consider there to be such a group at the beginning of the file, at
692 * file). We consider there to be such a group at the beginning of the file, at
693 * the end of the file, and between any two unchanged lines, though most such
693 * the end of the file, and between any two unchanged lines, though most such
694 * groups will usually be empty.
694 * groups will usually be empty.
695 *
695 *
696 * If the first line in a group is equal to the line following the group, then
696 * If the first line in a group is equal to the line following the group, then
697 * the group can be slid down. Similarly, if the last line in a group is equal
697 * the group can be slid down. Similarly, if the last line in a group is equal
698 * to the line preceding the group, then the group can be slid up. See
698 * to the line preceding the group, then the group can be slid up. See
699 * group_slide_down() and group_slide_up().
699 * group_slide_down() and group_slide_up().
700 *
700 *
701 * Note that loops that are testing for changed lines in xdf->rchg do not need
701 * Note that loops that are testing for changed lines in xdf->rchg do not need
702 * index bounding since the array is prepared with a zero at position -1 and N.
702 * index bounding since the array is prepared with a zero at position -1 and N.
703 */
703 */
704 struct xdlgroup {
704 struct xdlgroup {
705 /*
705 /*
706 * The index of the first changed line in the group, or the index of
706 * The index of the first changed line in the group, or the index of
707 * the unchanged line above which the (empty) group is located.
707 * the unchanged line above which the (empty) group is located.
708 */
708 */
709 long start;
709 long start;
710
710
711 /*
711 /*
712 * The index of the first unchanged line after the group. For an empty
712 * The index of the first unchanged line after the group. For an empty
713 * group, end is equal to start.
713 * group, end is equal to start.
714 */
714 */
715 long end;
715 long end;
716 };
716 };
717
717
718 /*
718 /*
719 * Initialize g to point at the first group in xdf.
719 * Initialize g to point at the first group in xdf.
720 */
720 */
721 static void group_init(xdfile_t *xdf, struct xdlgroup *g)
721 static void group_init(xdfile_t *xdf, struct xdlgroup *g)
722 {
722 {
723 g->start = g->end = 0;
723 g->start = g->end = 0;
724 while (xdf->rchg[g->end])
724 while (xdf->rchg[g->end])
725 g->end++;
725 g->end++;
726 }
726 }
727
727
728 /*
728 /*
729 * Move g to describe the next (possibly empty) group in xdf and return 0. If g
729 * Move g to describe the next (possibly empty) group in xdf and return 0. If g
730 * is already at the end of the file, do nothing and return -1.
730 * is already at the end of the file, do nothing and return -1.
731 */
731 */
732 static inline int group_next(xdfile_t *xdf, struct xdlgroup *g)
732 static inline int group_next(xdfile_t *xdf, struct xdlgroup *g)
733 {
733 {
734 if (g->end == xdf->nrec)
734 if (g->end == xdf->nrec)
735 return -1;
735 return -1;
736
736
737 g->start = g->end + 1;
737 g->start = g->end + 1;
738 for (g->end = g->start; xdf->rchg[g->end]; g->end++)
738 for (g->end = g->start; xdf->rchg[g->end]; g->end++)
739 ;
739 ;
740
740
741 return 0;
741 return 0;
742 }
742 }
743
743
744 /*
744 /*
745 * Move g to describe the previous (possibly empty) group in xdf and return 0.
745 * Move g to describe the previous (possibly empty) group in xdf and return 0.
746 * If g is already at the beginning of the file, do nothing and return -1.
746 * If g is already at the beginning of the file, do nothing and return -1.
747 */
747 */
748 static inline int group_previous(xdfile_t *xdf, struct xdlgroup *g)
748 static inline int group_previous(xdfile_t *xdf, struct xdlgroup *g)
749 {
749 {
750 if (g->start == 0)
750 if (g->start == 0)
751 return -1;
751 return -1;
752
752
753 g->end = g->start - 1;
753 g->end = g->start - 1;
754 for (g->start = g->end; xdf->rchg[g->start - 1]; g->start--)
754 for (g->start = g->end; xdf->rchg[g->start - 1]; g->start--)
755 ;
755 ;
756
756
757 return 0;
757 return 0;
758 }
758 }
759
759
760 /*
760 /*
761 * If g can be slid toward the end of the file, do so, and if it bumps into a
761 * If g can be slid toward the end of the file, do so, and if it bumps into a
762 * following group, expand this group to include it. Return 0 on success or -1
762 * following group, expand this group to include it. Return 0 on success or -1
763 * if g cannot be slid down.
763 * if g cannot be slid down.
764 */
764 */
765 static int group_slide_down(xdfile_t *xdf, struct xdlgroup *g, long flags)
765 static int group_slide_down(xdfile_t *xdf, struct xdlgroup *g, long flags)
766 {
766 {
767 if (g->end < xdf->nrec &&
767 if (g->end < xdf->nrec &&
768 recs_match(xdf->recs[g->start], xdf->recs[g->end], flags)) {
768 recs_match(xdf->recs[g->start], xdf->recs[g->end], flags)) {
769 xdf->rchg[g->start++] = 0;
769 xdf->rchg[g->start++] = 0;
770 xdf->rchg[g->end++] = 1;
770 xdf->rchg[g->end++] = 1;
771
771
772 while (xdf->rchg[g->end])
772 while (xdf->rchg[g->end])
773 g->end++;
773 g->end++;
774
774
775 return 0;
775 return 0;
776 } else {
776 } else {
777 return -1;
777 return -1;
778 }
778 }
779 }
779 }
780
780
781 /*
781 /*
782 * If g can be slid toward the beginning of the file, do so, and if it bumps
782 * If g can be slid toward the beginning of the file, do so, and if it bumps
783 * into a previous group, expand this group to include it. Return 0 on success
783 * into a previous group, expand this group to include it. Return 0 on success
784 * or -1 if g cannot be slid up.
784 * or -1 if g cannot be slid up.
785 */
785 */
786 static int group_slide_up(xdfile_t *xdf, struct xdlgroup *g, long flags)
786 static int group_slide_up(xdfile_t *xdf, struct xdlgroup *g, long flags)
787 {
787 {
788 if (g->start > 0 &&
788 if (g->start > 0 &&
789 recs_match(xdf->recs[g->start - 1], xdf->recs[g->end - 1], flags)) {
789 recs_match(xdf->recs[g->start - 1], xdf->recs[g->end - 1], flags)) {
790 xdf->rchg[--g->start] = 1;
790 xdf->rchg[--g->start] = 1;
791 xdf->rchg[--g->end] = 0;
791 xdf->rchg[--g->end] = 0;
792
792
793 while (xdf->rchg[g->start - 1])
793 while (xdf->rchg[g->start - 1])
794 g->start--;
794 g->start--;
795
795
796 return 0;
796 return 0;
797 } else {
797 } else {
798 return -1;
798 return -1;
799 }
799 }
800 }
800 }
801
801
802 static void xdl_bug(const char *msg)
802 static void xdl_bug(const char *msg)
803 {
803 {
804 fprintf(stderr, "BUG: %s\n", msg);
804 fprintf(stderr, "BUG: %s\n", msg);
805 exit(1);
805 exit(1);
806 }
806 }
807
807
808 /*
808 /*
809 * For indentation heuristic, skip searching for better slide position after
809 * For indentation heuristic, skip searching for better slide position after
810 * checking MAX_BORING lines without finding an improvement. This defends the
810 * checking MAX_BORING lines without finding an improvement. This defends the
811 * indentation heuristic logic against pathological cases. The value is not
811 * indentation heuristic logic against pathological cases. The value is not
812 * picked scientifically but should be good enough.
812 * picked scientifically but should be good enough.
813 */
813 */
814 #define MAX_BORING 100
814 #define MAX_BORING 100
815
815
816 /*
816 /*
817 * Move back and forward change groups for a consistent and pretty diff output.
817 * Move back and forward change groups for a consistent and pretty diff output.
818 * This also helps in finding joinable change groups and reducing the diff
818 * This also helps in finding joinable change groups and reducing the diff
819 * size.
819 * size.
820 */
820 */
821 int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) {
821 int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) {
822 struct xdlgroup g, go;
822 struct xdlgroup g, go;
823 long earliest_end, end_matching_other;
823 long earliest_end, end_matching_other;
824 long groupsize;
824 long groupsize;
825
825
826 group_init(xdf, &g);
826 group_init(xdf, &g);
827 group_init(xdfo, &go);
827 group_init(xdfo, &go);
828
828
829 while (1) {
829 while (1) {
830 /* If the group is empty in the to-be-compacted file, skip it: */
830 /* If the group is empty in the to-be-compacted file, skip it: */
831 if (g.end == g.start)
831 if (g.end == g.start)
832 goto next;
832 goto next;
833
833
834 /*
834 /*
835 * Now shift the change up and then down as far as possible in
835 * Now shift the change up and then down as far as possible in
836 * each direction. If it bumps into any other changes, merge them.
836 * each direction. If it bumps into any other changes, merge them.
837 */
837 */
838 do {
838 do {
839 groupsize = g.end - g.start;
839 groupsize = g.end - g.start;
840
840
841 /*
841 /*
842 * Keep track of the last "end" index that causes this
842 * Keep track of the last "end" index that causes this
843 * group to align with a group of changed lines in the
843 * group to align with a group of changed lines in the
844 * other file. -1 indicates that we haven't found such
844 * other file. -1 indicates that we haven't found such
845 * a match yet:
845 * a match yet:
846 */
846 */
847 end_matching_other = -1;
847 end_matching_other = -1;
848
848
849 /* Shift the group backward as much as possible: */
849 /* Shift the group backward as much as possible: */
850 while (!group_slide_up(xdf, &g, flags))
850 while (!group_slide_up(xdf, &g, flags))
851 if (group_previous(xdfo, &go))
851 if (group_previous(xdfo, &go))
852 xdl_bug("group sync broken sliding up");
852 xdl_bug("group sync broken sliding up");
853
853
854 /*
854 /*
855 * This is this highest that this group can be shifted.
855 * This is this highest that this group can be shifted.
856 * Record its end index:
856 * Record its end index:
857 */
857 */
858 earliest_end = g.end;
858 earliest_end = g.end;
859
859
860 if (go.end > go.start)
860 if (go.end > go.start)
861 end_matching_other = g.end;
861 end_matching_other = g.end;
862
862
863 /* Now shift the group forward as far as possible: */
863 /* Now shift the group forward as far as possible: */
864 while (1) {
864 while (1) {
865 if (group_slide_down(xdf, &g, flags))
865 if (group_slide_down(xdf, &g, flags))
866 break;
866 break;
867 if (group_next(xdfo, &go))
867 if (group_next(xdfo, &go))
868 xdl_bug("group sync broken sliding down");
868 xdl_bug("group sync broken sliding down");
869
869
870 if (go.end > go.start)
870 if (go.end > go.start)
871 end_matching_other = g.end;
871 end_matching_other = g.end;
872 }
872 }
873 } while (groupsize != g.end - g.start);
873 } while (groupsize != g.end - g.start);
874
874
875 /*
875 /*
876 * If the group can be shifted, then we can possibly use this
876 * If the group can be shifted, then we can possibly use this
877 * freedom to produce a more intuitive diff.
877 * freedom to produce a more intuitive diff.
878 *
878 *
879 * The group is currently shifted as far down as possible, so the
879 * The group is currently shifted as far down as possible, so the
880 * heuristics below only have to handle upwards shifts.
880 * heuristics below only have to handle upwards shifts.
881 */
881 */
882
882
883 if (g.end == earliest_end) {
883 if (g.end == earliest_end) {
884 /* no shifting was possible */
884 /* no shifting was possible */
885 } else if (end_matching_other != -1) {
885 } else if (end_matching_other != -1) {
886 /*
886 /*
887 * Move the possibly merged group of changes back to line
887 * Move the possibly merged group of changes back to line
888 * up with the last group of changes from the other file
888 * up with the last group of changes from the other file
889 * that it can align with.
889 * that it can align with.
890 */
890 */
891 while (go.end == go.start) {
891 while (go.end == go.start) {
892 if (group_slide_up(xdf, &g, flags))
892 if (group_slide_up(xdf, &g, flags))
893 xdl_bug("match disappeared");
893 xdl_bug("match disappeared");
894 if (group_previous(xdfo, &go))
894 if (group_previous(xdfo, &go))
895 xdl_bug("group sync broken sliding to match");
895 xdl_bug("group sync broken sliding to match");
896 }
896 }
897 } else if (flags & XDF_INDENT_HEURISTIC) {
897 } else if (flags & XDF_INDENT_HEURISTIC) {
898 /*
898 /*
899 * Indent heuristic: a group of pure add/delete lines
899 * Indent heuristic: a group of pure add/delete lines
900 * implies two splits, one between the end of the "before"
900 * implies two splits, one between the end of the "before"
901 * context and the start of the group, and another between
901 * context and the start of the group, and another between
902 * the end of the group and the beginning of the "after"
902 * the end of the group and the beginning of the "after"
903 * context. Some splits are aesthetically better and some
903 * context. Some splits are aesthetically better and some
904 * are worse. We compute a badness "score" for each split,
904 * are worse. We compute a badness "score" for each split,
905 * and add the scores for the two splits to define a
905 * and add the scores for the two splits to define a
906 * "score" for each position that the group can be shifted
906 * "score" for each position that the group can be shifted
907 * to. Then we pick the shift with the lowest score.
907 * to. Then we pick the shift with the lowest score.
908 */
908 */
909 long shift, best_shift = -1;
909 long shift, best_shift = -1;
910 struct split_score best_score;
910 struct split_score best_score;
911
911
912 /*
912 /*
913 * This is O(N * MAX_BLANKS) (N = shift-able lines).
913 * This is O(N * MAX_BLANKS) (N = shift-able lines).
914 * Even with MAX_BLANKS bounded to a small value, a
914 * Even with MAX_BLANKS bounded to a small value, a
915 * large N could still make this loop take several
915 * large N could still make this loop take several
916 * times longer than the main diff algorithm. The
916 * times longer than the main diff algorithm. The
917 * "boring" value is to help cut down N to something
917 * "boring" value is to help cut down N to something
918 * like (MAX_BORING + groupsize).
918 * like (MAX_BORING + groupsize).
919 *
919 *
920 * Scan from bottom to top. So we can exit the loop
920 * Scan from bottom to top. So we can exit the loop
921 * without compromising the assumption "for a same best
921 * without compromising the assumption "for a same best
922 * score, pick the bottommost shift".
922 * score, pick the bottommost shift".
923 */
923 */
924 int boring = 0;
924 int boring = 0;
925 for (shift = g.end; shift >= earliest_end; shift--) {
925 for (shift = g.end; shift >= earliest_end; shift--) {
926 struct split_measurement m;
926 struct split_measurement m;
927 struct split_score score = {0, 0};
927 struct split_score score = {0, 0};
928 int cmp;
928 int cmp;
929
929
930 measure_split(xdf, shift, &m);
930 measure_split(xdf, shift, &m);
931 score_add_split(&m, &score);
931 score_add_split(&m, &score);
932 measure_split(xdf, shift - groupsize, &m);
932 measure_split(xdf, shift - groupsize, &m);
933 score_add_split(&m, &score);
933 score_add_split(&m, &score);
934
934
935 if (best_shift == -1) {
935 if (best_shift == -1) {
936 cmp = -1;
936 cmp = -1;
937 } else {
937 } else {
938 cmp = score_cmp(&score, &best_score);
938 cmp = score_cmp(&score, &best_score);
939 }
939 }
940 if (cmp < 0) {
940 if (cmp < 0) {
941 boring = 0;
941 boring = 0;
942 best_score.effective_indent = score.effective_indent;
942 best_score.effective_indent = score.effective_indent;
943 best_score.penalty = score.penalty;
943 best_score.penalty = score.penalty;
944 best_shift = shift;
944 best_shift = shift;
945 } else {
945 } else {
946 boring += 1;
946 boring += 1;
947 if (boring >= MAX_BORING)
947 if (boring >= MAX_BORING)
948 break;
948 break;
949 }
949 }
950 }
950 }
951
951
952 while (g.end > best_shift) {
952 while (g.end > best_shift) {
953 if (group_slide_up(xdf, &g, flags))
953 if (group_slide_up(xdf, &g, flags))
954 xdl_bug("best shift unreached");
954 xdl_bug("best shift unreached");
955 if (group_previous(xdfo, &go))
955 if (group_previous(xdfo, &go))
956 xdl_bug("group sync broken sliding to blank line");
956 xdl_bug("group sync broken sliding to blank line");
957 }
957 }
958 }
958 }
959
959
960 next:
960 next:
961 /* Move past the just-processed group: */
961 /* Move past the just-processed group: */
962 if (group_next(xdf, &g))
962 if (group_next(xdf, &g))
963 break;
963 break;
964 if (group_next(xdfo, &go))
964 if (group_next(xdfo, &go))
965 xdl_bug("group sync broken moving to next group");
965 xdl_bug("group sync broken moving to next group");
966 }
966 }
967
967
968 if (!group_next(xdfo, &go))
968 if (!group_next(xdfo, &go))
969 xdl_bug("group sync broken at end of file");
969 xdl_bug("group sync broken at end of file");
970
970
971 return 0;
971 return 0;
972 }
972 }
973
973
974
974
975 int xdl_build_script(xdfenv_t *xe, xdchange_t **xscr) {
975 int xdl_build_script(xdfenv_t *xe, xdchange_t **xscr) {
976 xdchange_t *cscr = NULL, *xch;
976 xdchange_t *cscr = NULL, *xch;
977 char *rchg1 = xe->xdf1.rchg, *rchg2 = xe->xdf2.rchg;
977 char *rchg1 = xe->xdf1.rchg, *rchg2 = xe->xdf2.rchg;
978 long i1, i2, l1, l2;
978 long i1, i2, l1, l2;
979
979
980 /*
980 /*
981 * Trivial. Collects "groups" of changes and creates an edit script.
981 * Trivial. Collects "groups" of changes and creates an edit script.
982 */
982 */
983 for (i1 = xe->xdf1.nrec, i2 = xe->xdf2.nrec; i1 >= 0 || i2 >= 0; i1--, i2--)
983 for (i1 = xe->xdf1.nrec, i2 = xe->xdf2.nrec; i1 >= 0 || i2 >= 0; i1--, i2--)
984 if (rchg1[i1 - 1] || rchg2[i2 - 1]) {
984 if (rchg1[i1 - 1] || rchg2[i2 - 1]) {
985 for (l1 = i1; rchg1[i1 - 1]; i1--);
985 for (l1 = i1; rchg1[i1 - 1]; i1--);
986 for (l2 = i2; rchg2[i2 - 1]; i2--);
986 for (l2 = i2; rchg2[i2 - 1]; i2--);
987
987
988 if (!(xch = xdl_add_change(cscr, i1, i2, l1 - i1, l2 - i2))) {
988 if (!(xch = xdl_add_change(cscr, i1, i2, l1 - i1, l2 - i2))) {
989 xdl_free_script(cscr);
989 xdl_free_script(cscr);
990 return -1;
990 return -1;
991 }
991 }
992 cscr = xch;
992 cscr = xch;
993 }
993 }
994
994
995 *xscr = cscr;
995 *xscr = cscr;
996
996
997 return 0;
997 return 0;
998 }
998 }
999
999
1000
1000
1001 void xdl_free_script(xdchange_t *xscr) {
1001 void xdl_free_script(xdchange_t *xscr) {
1002 xdchange_t *xch;
1002 xdchange_t *xch;
1003
1003
1004 while ((xch = xscr) != NULL) {
1004 while ((xch = xscr) != NULL) {
1005 xscr = xscr->next;
1005 xscr = xscr->next;
1006 xdl_free(xch);
1006 xdl_free(xch);
1007 }
1007 }
1008 }
1008 }
1009
1009
1010
1011 /*
1012 * Starting at the passed change atom, find the latest change atom to be included
1013 * inside the differential hunk according to the specified configuration.
1014 * Also advance xscr if the first changes must be discarded.
1015 */
1016 xdchange_t *xdl_get_hunk(xdchange_t **xscr, xdemitconf_t const *xecfg)
1017 {
1018 xdchange_t *xch, *xchp, *lxch;
1019 long max_common = 0;
1020 long max_ignorable = 0;
1021 unsigned long ignored = 0; /* number of ignored blank lines */
1022
1023 /* remove ignorable changes that are too far before other changes */
1024 for (xchp = *xscr; xchp && xchp->ignore; xchp = xchp->next) {
1025 xch = xchp->next;
1026
1027 if (xch == NULL ||
1028 xch->i1 - (xchp->i1 + xchp->chg1) >= max_ignorable)
1029 *xscr = xch;
1030 }
1031
1032 if (*xscr == NULL)
1033 return NULL;
1034
1035 lxch = *xscr;
1036
1037 for (xchp = *xscr, xch = xchp->next; xch; xchp = xch, xch = xch->next) {
1038 long distance = xch->i1 - (xchp->i1 + xchp->chg1);
1039 if (distance > max_common)
1040 break;
1041
1042 if (distance < max_ignorable && (!xch->ignore || lxch == xchp)) {
1043 lxch = xch;
1044 ignored = 0;
1045 } else if (distance < max_ignorable && xch->ignore) {
1046 ignored += xch->chg2;
1047 } else if (lxch != xchp &&
1048 xch->i1 + ignored - (lxch->i1 + lxch->chg1) > max_common) {
1049 break;
1050 } else if (!xch->ignore) {
1051 lxch = xch;
1052 ignored = 0;
1053 } else {
1054 ignored += xch->chg2;
1055 }
1056 }
1057
1058 return lxch;
1059 }
1060
1061
1010 static int xdl_call_hunk_func(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
1062 static int xdl_call_hunk_func(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
1011 xdemitconf_t const *xecfg)
1063 xdemitconf_t const *xecfg)
1012 {
1064 {
1013 xdchange_t *xch, *xche;
1065 xdchange_t *xch, *xche;
1066
1067 if (!xecfg->hunk_func)
1068 return -1;
1069
1014 if ((xecfg->flags & XDL_EMIT_BDIFFHUNK) != 0) {
1070 if ((xecfg->flags & XDL_EMIT_BDIFFHUNK) != 0) {
1015 long i1 = 0, i2 = 0, n1 = xe->xdf1.nrec, n2 = xe->xdf2.nrec;
1071 long i1 = 0, i2 = 0, n1 = xe->xdf1.nrec, n2 = xe->xdf2.nrec;
1016 for (xch = xscr; xch; xch = xche->next) {
1072 for (xch = xscr; xch; xch = xche->next) {
1017 xche = xdl_get_hunk(&xch, xecfg);
1073 xche = xdl_get_hunk(&xch, xecfg);
1018 if (!xch)
1074 if (!xch)
1019 break;
1075 break;
1020 if (xch->i1 > i1 || xch->i2 > i2) {
1076 if (xch->i1 > i1 || xch->i2 > i2) {
1021 if (xecfg->hunk_func(i1, xch->i1, i2, xch->i2, ecb->priv) < 0)
1077 if (xecfg->hunk_func(i1, xch->i1, i2, xch->i2, ecb->priv) < 0)
1022 return -1;
1078 return -1;
1023 }
1079 }
1024 i1 = xche->i1 + xche->chg1;
1080 i1 = xche->i1 + xche->chg1;
1025 i2 = xche->i2 + xche->chg2;
1081 i2 = xche->i2 + xche->chg2;
1026 }
1082 }
1027 if (xecfg->hunk_func(i1, n1, i2, n2, ecb->priv) < 0)
1083 if (xecfg->hunk_func(i1, n1, i2, n2, ecb->priv) < 0)
1028 return -1;
1084 return -1;
1029 } else {
1085 } else {
1030 for (xch = xscr; xch; xch = xche->next) {
1086 for (xch = xscr; xch; xch = xche->next) {
1031 xche = xdl_get_hunk(&xch, xecfg);
1087 xche = xdl_get_hunk(&xch, xecfg);
1032 if (!xch)
1088 if (!xch)
1033 break;
1089 break;
1034 if (xecfg->hunk_func(
1090 if (xecfg->hunk_func(
1035 xch->i1, xche->i1 + xche->chg1 - xch->i1,
1091 xch->i1, xche->i1 + xche->chg1 - xch->i1,
1036 xch->i2, xche->i2 + xche->chg2 - xch->i2,
1092 xch->i2, xche->i2 + xche->chg2 - xch->i2,
1037 ecb->priv) < 0)
1093 ecb->priv) < 0)
1038 return -1;
1094 return -1;
1039 }
1095 }
1040 }
1096 }
1041 return 0;
1097 return 0;
1042 }
1098 }
1043
1099
1044 int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
1100 int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
1045 xdemitconf_t const *xecfg, xdemitcb_t *ecb) {
1101 xdemitconf_t const *xecfg, xdemitcb_t *ecb) {
1046 xdchange_t *xscr;
1102 xdchange_t *xscr;
1047 xdfenv_t xe;
1103 xdfenv_t xe;
1048 emit_func_t ef = xecfg->hunk_func ? xdl_call_hunk_func : xdl_emit_diff;
1049
1104
1050 if (xdl_do_diff(mf1, mf2, xpp, &xe) < 0) {
1105 if (xdl_do_diff(mf1, mf2, xpp, &xe) < 0) {
1051
1106
1052 return -1;
1107 return -1;
1053 }
1108 }
1054 if (xdl_change_compact(&xe.xdf1, &xe.xdf2, xpp->flags) < 0 ||
1109 if (xdl_change_compact(&xe.xdf1, &xe.xdf2, xpp->flags) < 0 ||
1055 xdl_change_compact(&xe.xdf2, &xe.xdf1, xpp->flags) < 0 ||
1110 xdl_change_compact(&xe.xdf2, &xe.xdf1, xpp->flags) < 0 ||
1056 xdl_build_script(&xe, &xscr) < 0) {
1111 xdl_build_script(&xe, &xscr) < 0) {
1057
1112
1058 xdl_free_env(&xe);
1113 xdl_free_env(&xe);
1059 return -1;
1114 return -1;
1060 }
1115 }
1061
1116
1062 if (ef(&xe, xscr, ecb, xecfg) < 0) {
1117 if (xdl_call_hunk_func(&xe, xscr, ecb, xecfg) < 0) {
1063 xdl_free_script(xscr);
1118 xdl_free_script(xscr);
1064 xdl_free_env(&xe);
1119 xdl_free_env(&xe);
1065 return -1;
1120 return -1;
1066 }
1121 }
1067 xdl_free_script(xscr);
1122 xdl_free_script(xscr);
1068 xdl_free_env(&xe);
1123 xdl_free_env(&xe);
1069
1124
1070 return 0;
1125 return 0;
1071 }
1126 }
@@ -1,60 +1,58 b''
1 /*
1 /*
2 * LibXDiff by Davide Libenzi ( File Differential Library )
2 * LibXDiff by Davide Libenzi ( File Differential Library )
3 * Copyright (C) 2003 Davide Libenzi
3 * Copyright (C) 2003 Davide Libenzi
4 *
4 *
5 * This library is free software; you can redistribute it and/or
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
8 * version 2.1 of the License, or (at your option) any later version.
9 *
9 *
10 * This library is distributed in the hope that it will be useful,
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
13 * Lesser General Public License for more details.
14 *
14 *
15 * You should have received a copy of the GNU Lesser General Public
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, see
16 * License along with this library; if not, see
17 * <http://www.gnu.org/licenses/>.
17 * <http://www.gnu.org/licenses/>.
18 *
18 *
19 * Davide Libenzi <davidel@xmailserver.org>
19 * Davide Libenzi <davidel@xmailserver.org>
20 *
20 *
21 */
21 */
22
22
23 #if !defined(XDIFFI_H)
23 #if !defined(XDIFFI_H)
24 #define XDIFFI_H
24 #define XDIFFI_H
25
25
26
26
27 typedef struct s_diffdata {
27 typedef struct s_diffdata {
28 long nrec;
28 long nrec;
29 unsigned long const *ha;
29 unsigned long const *ha;
30 long *rindex;
30 long *rindex;
31 char *rchg;
31 char *rchg;
32 } diffdata_t;
32 } diffdata_t;
33
33
34 typedef struct s_xdalgoenv {
34 typedef struct s_xdalgoenv {
35 long mxcost;
35 long mxcost;
36 long snake_cnt;
36 long snake_cnt;
37 long heur_min;
37 long heur_min;
38 } xdalgoenv_t;
38 } xdalgoenv_t;
39
39
40 typedef struct s_xdchange {
40 typedef struct s_xdchange {
41 struct s_xdchange *next;
41 struct s_xdchange *next;
42 long i1, i2;
42 long i1, i2;
43 long chg1, chg2;
43 long chg1, chg2;
44 int ignore;
44 int ignore;
45 } xdchange_t;
45 } xdchange_t;
46
46
47
47
48
48
49 int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1,
49 int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1,
50 diffdata_t *dd2, long off2, long lim2,
50 diffdata_t *dd2, long off2, long lim2,
51 long *kvdf, long *kvdb, int need_min, xdalgoenv_t *xenv);
51 long *kvdf, long *kvdb, int need_min, xdalgoenv_t *xenv);
52 int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
52 int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
53 xdfenv_t *xe);
53 xdfenv_t *xe);
54 int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags);
54 int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags);
55 int xdl_build_script(xdfenv_t *xe, xdchange_t **xscr);
55 int xdl_build_script(xdfenv_t *xe, xdchange_t **xscr);
56 void xdl_free_script(xdchange_t *xscr);
56 void xdl_free_script(xdchange_t *xscr);
57 int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
58 xdemitconf_t const *xecfg);
59
57
60 #endif /* #if !defined(XDIFFI_H) */
58 #endif /* #if !defined(XDIFFI_H) */
@@ -1,41 +1,40 b''
1 /*
1 /*
2 * LibXDiff by Davide Libenzi ( File Differential Library )
2 * LibXDiff by Davide Libenzi ( File Differential Library )
3 * Copyright (C) 2003 Davide Libenzi
3 * Copyright (C) 2003 Davide Libenzi
4 *
4 *
5 * This library is free software; you can redistribute it and/or
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
8 * version 2.1 of the License, or (at your option) any later version.
9 *
9 *
10 * This library is distributed in the hope that it will be useful,
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
13 * Lesser General Public License for more details.
14 *
14 *
15 * You should have received a copy of the GNU Lesser General Public
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, see
16 * License along with this library; if not, see
17 * <http://www.gnu.org/licenses/>.
17 * <http://www.gnu.org/licenses/>.
18 *
18 *
19 * Davide Libenzi <davidel@xmailserver.org>
19 * Davide Libenzi <davidel@xmailserver.org>
20 *
20 *
21 */
21 */
22
22
23 #if !defined(XINCLUDE_H)
23 #if !defined(XINCLUDE_H)
24 #define XINCLUDE_H
24 #define XINCLUDE_H
25
25
26 #include <ctype.h>
26 #include <ctype.h>
27 #include <stdio.h>
27 #include <stdio.h>
28 #include <stdlib.h>
28 #include <stdlib.h>
29 #include <string.h>
29 #include <string.h>
30 #include <limits.h>
30 #include <limits.h>
31
31
32 #include "xmacros.h"
32 #include "xmacros.h"
33 #include "xdiff.h"
33 #include "xdiff.h"
34 #include "xtypes.h"
34 #include "xtypes.h"
35 #include "xutils.h"
35 #include "xutils.h"
36 #include "xprepare.h"
36 #include "xprepare.h"
37 #include "xdiffi.h"
37 #include "xdiffi.h"
38 #include "xemit.h"
39
38
40
39
41 #endif /* #if !defined(XINCLUDE_H) */
40 #endif /* #if !defined(XINCLUDE_H) */
@@ -1,243 +1,149 b''
1 /*
1 /*
2 * LibXDiff by Davide Libenzi ( File Differential Library )
2 * LibXDiff by Davide Libenzi ( File Differential Library )
3 * Copyright (C) 2003 Davide Libenzi
3 * Copyright (C) 2003 Davide Libenzi
4 *
4 *
5 * This library is free software; you can redistribute it and/or
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
8 * version 2.1 of the License, or (at your option) any later version.
9 *
9 *
10 * This library is distributed in the hope that it will be useful,
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
13 * Lesser General Public License for more details.
14 *
14 *
15 * You should have received a copy of the GNU Lesser General Public
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, see
16 * License along with this library; if not, see
17 * <http://www.gnu.org/licenses/>.
17 * <http://www.gnu.org/licenses/>.
18 *
18 *
19 * Davide Libenzi <davidel@xmailserver.org>
19 * Davide Libenzi <davidel@xmailserver.org>
20 *
20 *
21 */
21 */
22
22
23 #include <limits.h>
23 #include <limits.h>
24 #include <assert.h>
24 #include <assert.h>
25 #include "xinclude.h"
25 #include "xinclude.h"
26
26
27
27
28
28
29
29
30 long xdl_bogosqrt(long n) {
30 long xdl_bogosqrt(long n) {
31 long i;
31 long i;
32
32
33 /*
33 /*
34 * Classical integer square root approximation using shifts.
34 * Classical integer square root approximation using shifts.
35 */
35 */
36 for (i = 1; n > 0; n >>= 2)
36 for (i = 1; n > 0; n >>= 2)
37 i <<= 1;
37 i <<= 1;
38
38
39 return i;
39 return i;
40 }
40 }
41
41
42
42
43 int xdl_emit_diffrec(char const *rec, long size, char const *pre, long psize,
44 xdemitcb_t *ecb) {
45 int i = 2;
46 mmbuffer_t mb[3];
47
48 mb[0].ptr = (char *) pre;
49 mb[0].size = psize;
50 mb[1].ptr = (char *) rec;
51 mb[1].size = size;
52 if (size > 0 && rec[size - 1] != '\n') {
53 mb[2].ptr = (char *) "\n\\ No newline at end of file\n";
54 mb[2].size = strlen(mb[2].ptr);
55 i++;
56 }
57 if (ecb->outf(ecb->priv, mb, i) < 0) {
58
59 return -1;
60 }
61
62 return 0;
63 }
64
65 void *xdl_mmfile_first(mmfile_t *mmf, long *size)
43 void *xdl_mmfile_first(mmfile_t *mmf, long *size)
66 {
44 {
67 *size = mmf->size;
45 *size = mmf->size;
68 return mmf->ptr;
46 return mmf->ptr;
69 }
47 }
70
48
71
49
72 long xdl_mmfile_size(mmfile_t *mmf)
50 long xdl_mmfile_size(mmfile_t *mmf)
73 {
51 {
74 return mmf->size;
52 return mmf->size;
75 }
53 }
76
54
77
55
78 int xdl_cha_init(chastore_t *cha, long isize, long icount) {
56 int xdl_cha_init(chastore_t *cha, long isize, long icount) {
79
57
80 cha->head = cha->tail = NULL;
58 cha->head = cha->tail = NULL;
81 cha->isize = isize;
59 cha->isize = isize;
82 cha->nsize = icount * isize;
60 cha->nsize = icount * isize;
83 cha->ancur = cha->sncur = NULL;
61 cha->ancur = cha->sncur = NULL;
84 cha->scurr = 0;
62 cha->scurr = 0;
85
63
86 return 0;
64 return 0;
87 }
65 }
88
66
89
67
90 void xdl_cha_free(chastore_t *cha) {
68 void xdl_cha_free(chastore_t *cha) {
91 chanode_t *cur, *tmp;
69 chanode_t *cur, *tmp;
92
70
93 for (cur = cha->head; (tmp = cur) != NULL;) {
71 for (cur = cha->head; (tmp = cur) != NULL;) {
94 cur = cur->next;
72 cur = cur->next;
95 xdl_free(tmp);
73 xdl_free(tmp);
96 }
74 }
97 }
75 }
98
76
99
77
100 void *xdl_cha_alloc(chastore_t *cha) {
78 void *xdl_cha_alloc(chastore_t *cha) {
101 chanode_t *ancur;
79 chanode_t *ancur;
102 void *data;
80 void *data;
103
81
104 if (!(ancur = cha->ancur) || ancur->icurr == cha->nsize) {
82 if (!(ancur = cha->ancur) || ancur->icurr == cha->nsize) {
105 if (!(ancur = (chanode_t *) xdl_malloc(sizeof(chanode_t) + cha->nsize))) {
83 if (!(ancur = (chanode_t *) xdl_malloc(sizeof(chanode_t) + cha->nsize))) {
106
84
107 return NULL;
85 return NULL;
108 }
86 }
109 ancur->icurr = 0;
87 ancur->icurr = 0;
110 ancur->next = NULL;
88 ancur->next = NULL;
111 if (cha->tail)
89 if (cha->tail)
112 cha->tail->next = ancur;
90 cha->tail->next = ancur;
113 if (!cha->head)
91 if (!cha->head)
114 cha->head = ancur;
92 cha->head = ancur;
115 cha->tail = ancur;
93 cha->tail = ancur;
116 cha->ancur = ancur;
94 cha->ancur = ancur;
117 }
95 }
118
96
119 data = (char *) ancur + sizeof(chanode_t) + ancur->icurr;
97 data = (char *) ancur + sizeof(chanode_t) + ancur->icurr;
120 ancur->icurr += cha->isize;
98 ancur->icurr += cha->isize;
121
99
122 return data;
100 return data;
123 }
101 }
124
102
125 long xdl_guess_lines(mmfile_t *mf, long sample) {
103 long xdl_guess_lines(mmfile_t *mf, long sample) {
126 long nl = 0, size, tsize = 0;
104 long nl = 0, size, tsize = 0;
127 char const *data, *cur, *top;
105 char const *data, *cur, *top;
128
106
129 if ((cur = data = xdl_mmfile_first(mf, &size)) != NULL) {
107 if ((cur = data = xdl_mmfile_first(mf, &size)) != NULL) {
130 for (top = data + size; nl < sample && cur < top; ) {
108 for (top = data + size; nl < sample && cur < top; ) {
131 nl++;
109 nl++;
132 if (!(cur = memchr(cur, '\n', top - cur)))
110 if (!(cur = memchr(cur, '\n', top - cur)))
133 cur = top;
111 cur = top;
134 else
112 else
135 cur++;
113 cur++;
136 }
114 }
137 tsize += (long) (cur - data);
115 tsize += (long) (cur - data);
138 }
116 }
139
117
140 if (nl && tsize)
118 if (nl && tsize)
141 nl = xdl_mmfile_size(mf) / (tsize / nl);
119 nl = xdl_mmfile_size(mf) / (tsize / nl);
142
120
143 return nl + 1;
121 return nl + 1;
144 }
122 }
145
123
146 int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags)
124 int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags)
147 {
125 {
148 if (s1 == s2 && !memcmp(l1, l2, s1))
126 if (s1 == s2 && !memcmp(l1, l2, s1))
149 return 1;
127 return 1;
150 return 0;
128 return 0;
151 }
129 }
152
130
153 unsigned long xdl_hash_record(char const **data, char const *top, long flags) {
131 unsigned long xdl_hash_record(char const **data, char const *top, long flags) {
154 unsigned long ha = 5381;
132 unsigned long ha = 5381;
155 char const *ptr = *data;
133 char const *ptr = *data;
156
134
157 for (; ptr < top && *ptr != '\n'; ptr++) {
135 for (; ptr < top && *ptr != '\n'; ptr++) {
158 ha += (ha << 5);
136 ha += (ha << 5);
159 ha ^= (unsigned long) *ptr;
137 ha ^= (unsigned long) *ptr;
160 }
138 }
161 *data = ptr < top ? ptr + 1: ptr;
139 *data = ptr < top ? ptr + 1: ptr;
162
140
163 return ha;
141 return ha;
164 }
142 }
165
143
166 unsigned int xdl_hashbits(unsigned int size) {
144 unsigned int xdl_hashbits(unsigned int size) {
167 unsigned int val = 1, bits = 0;
145 unsigned int val = 1, bits = 0;
168
146
169 for (; val < size && bits < CHAR_BIT * sizeof(unsigned int); val <<= 1, bits++);
147 for (; val < size && bits < CHAR_BIT * sizeof(unsigned int); val <<= 1, bits++);
170 return bits ? bits: 1;
148 return bits ? bits: 1;
171 }
149 }
172
173
174 int xdl_num_out(char *out, long val) {
175 char *ptr, *str = out;
176 char buf[32];
177
178 ptr = buf + sizeof(buf) - 1;
179 *ptr = '\0';
180 if (val < 0) {
181 *--ptr = '-';
182 val = -val;
183 }
184 for (; val && ptr > buf; val /= 10)
185 *--ptr = "0123456789"[val % 10];
186 if (*ptr)
187 for (; *ptr; ptr++, str++)
188 *str = *ptr;
189 else
190 *str++ = '0';
191 *str = '\0';
192
193 return str - out;
194 }
195
196 int xdl_emit_hunk_hdr(long s1, long c1, long s2, long c2,
197 const char *func, long funclen, xdemitcb_t *ecb) {
198 int nb = 0;
199 mmbuffer_t mb;
200 char buf[128];
201
202 memcpy(buf, "@@ -", 4);
203 nb += 4;
204
205 nb += xdl_num_out(buf + nb, c1 ? s1: s1 - 1);
206
207 if (c1 != 1) {
208 memcpy(buf + nb, ",", 1);
209 nb += 1;
210
211 nb += xdl_num_out(buf + nb, c1);
212 }
213
214 memcpy(buf + nb, " +", 2);
215 nb += 2;
216
217 nb += xdl_num_out(buf + nb, c2 ? s2: s2 - 1);
218
219 if (c2 != 1) {
220 memcpy(buf + nb, ",", 1);
221 nb += 1;
222
223 nb += xdl_num_out(buf + nb, c2);
224 }
225
226 memcpy(buf + nb, " @@", 3);
227 nb += 3;
228 if (func && funclen) {
229 buf[nb++] = ' ';
230 if (funclen > sizeof(buf) - nb - 1)
231 funclen = sizeof(buf) - nb - 1;
232 memcpy(buf + nb, func, funclen);
233 nb += funclen;
234 }
235 buf[nb++] = '\n';
236
237 mb.ptr = buf;
238 mb.size = nb;
239 if (ecb->outf(ecb->priv, &mb, 1) < 0)
240 return -1;
241
242 return 0;
243 }
@@ -1,44 +1,39 b''
1 /*
1 /*
2 * LibXDiff by Davide Libenzi ( File Differential Library )
2 * LibXDiff by Davide Libenzi ( File Differential Library )
3 * Copyright (C) 2003 Davide Libenzi
3 * Copyright (C) 2003 Davide Libenzi
4 *
4 *
5 * This library is free software; you can redistribute it and/or
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
8 * version 2.1 of the License, or (at your option) any later version.
9 *
9 *
10 * This library is distributed in the hope that it will be useful,
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
13 * Lesser General Public License for more details.
14 *
14 *
15 * You should have received a copy of the GNU Lesser General Public
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, see
16 * License along with this library; if not, see
17 * <http://www.gnu.org/licenses/>.
17 * <http://www.gnu.org/licenses/>.
18 *
18 *
19 * Davide Libenzi <davidel@xmailserver.org>
19 * Davide Libenzi <davidel@xmailserver.org>
20 *
20 *
21 */
21 */
22
22
23 #if !defined(XUTILS_H)
23 #if !defined(XUTILS_H)
24 #define XUTILS_H
24 #define XUTILS_H
25
25
26
26
27
27
28 long xdl_bogosqrt(long n);
28 long xdl_bogosqrt(long n);
29 int xdl_emit_diffrec(char const *rec, long size, char const *pre, long psize,
30 xdemitcb_t *ecb);
31 int xdl_cha_init(chastore_t *cha, long isize, long icount);
29 int xdl_cha_init(chastore_t *cha, long isize, long icount);
32 void xdl_cha_free(chastore_t *cha);
30 void xdl_cha_free(chastore_t *cha);
33 void *xdl_cha_alloc(chastore_t *cha);
31 void *xdl_cha_alloc(chastore_t *cha);
34 long xdl_guess_lines(mmfile_t *mf, long sample);
32 long xdl_guess_lines(mmfile_t *mf, long sample);
35 int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags);
33 int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags);
36 unsigned long xdl_hash_record(char const **data, char const *top, long flags);
34 unsigned long xdl_hash_record(char const **data, char const *top, long flags);
37 unsigned int xdl_hashbits(unsigned int size);
35 unsigned int xdl_hashbits(unsigned int size);
38 int xdl_num_out(char *out, long val);
39 int xdl_emit_hunk_hdr(long s1, long c1, long s2, long c2,
40 const char *func, long funclen, xdemitcb_t *ecb);
41
36
42
37
43
38
44 #endif /* #if !defined(XUTILS_H) */
39 #endif /* #if !defined(XUTILS_H) */
@@ -1,1075 +1,1073 b''
1 #
1 #
2 # This is the mercurial setup script.
2 # This is the mercurial setup script.
3 #
3 #
4 # 'python setup.py install', or
4 # 'python setup.py install', or
5 # 'python setup.py --help' for more options
5 # 'python setup.py --help' for more options
6
6
7 import os
7 import os
8
8
9 supportedpy = '~= 2.7'
9 supportedpy = '~= 2.7'
10 if os.environ.get('HGALLOWPYTHON3', ''):
10 if os.environ.get('HGALLOWPYTHON3', ''):
11 # Mercurial will never work on Python 3 before 3.5 due to a lack
11 # Mercurial will never work on Python 3 before 3.5 due to a lack
12 # of % formatting on bytestrings, and can't work on 3.6.0 or 3.6.1
12 # of % formatting on bytestrings, and can't work on 3.6.0 or 3.6.1
13 # due to a bug in % formatting in bytestrings.
13 # due to a bug in % formatting in bytestrings.
14 #
14 #
15 # TODO: when we actually work on Python 3, use this string as the
15 # TODO: when we actually work on Python 3, use this string as the
16 # actual supportedpy string.
16 # actual supportedpy string.
17 supportedpy = ','.join([
17 supportedpy = ','.join([
18 '>=2.7',
18 '>=2.7',
19 '!=3.0.*',
19 '!=3.0.*',
20 '!=3.1.*',
20 '!=3.1.*',
21 '!=3.2.*',
21 '!=3.2.*',
22 '!=3.3.*',
22 '!=3.3.*',
23 '!=3.4.*',
23 '!=3.4.*',
24 '!=3.6.0',
24 '!=3.6.0',
25 '!=3.6.1',
25 '!=3.6.1',
26 ])
26 ])
27
27
28 import sys, platform
28 import sys, platform
29 if sys.version_info[0] >= 3:
29 if sys.version_info[0] >= 3:
30 printf = eval('print')
30 printf = eval('print')
31 libdir_escape = 'unicode_escape'
31 libdir_escape = 'unicode_escape'
32 def sysstr(s):
32 def sysstr(s):
33 return s.decode('latin-1')
33 return s.decode('latin-1')
34 else:
34 else:
35 libdir_escape = 'string_escape'
35 libdir_escape = 'string_escape'
36 def printf(*args, **kwargs):
36 def printf(*args, **kwargs):
37 f = kwargs.get('file', sys.stdout)
37 f = kwargs.get('file', sys.stdout)
38 end = kwargs.get('end', '\n')
38 end = kwargs.get('end', '\n')
39 f.write(b' '.join(args) + end)
39 f.write(b' '.join(args) + end)
40 def sysstr(s):
40 def sysstr(s):
41 return s
41 return s
42
42
43 # Attempt to guide users to a modern pip - this means that 2.6 users
43 # Attempt to guide users to a modern pip - this means that 2.6 users
44 # should have a chance of getting a 4.2 release, and when we ratchet
44 # should have a chance of getting a 4.2 release, and when we ratchet
45 # the version requirement forward again hopefully everyone will get
45 # the version requirement forward again hopefully everyone will get
46 # something that works for them.
46 # something that works for them.
47 if sys.version_info < (2, 7, 0, 'final'):
47 if sys.version_info < (2, 7, 0, 'final'):
48 pip_message = ('This may be due to an out of date pip. '
48 pip_message = ('This may be due to an out of date pip. '
49 'Make sure you have pip >= 9.0.1.')
49 'Make sure you have pip >= 9.0.1.')
50 try:
50 try:
51 import pip
51 import pip
52 pip_version = tuple([int(x) for x in pip.__version__.split('.')[:3]])
52 pip_version = tuple([int(x) for x in pip.__version__.split('.')[:3]])
53 if pip_version < (9, 0, 1) :
53 if pip_version < (9, 0, 1) :
54 pip_message = (
54 pip_message = (
55 'Your pip version is out of date, please install '
55 'Your pip version is out of date, please install '
56 'pip >= 9.0.1. pip {} detected.'.format(pip.__version__))
56 'pip >= 9.0.1. pip {} detected.'.format(pip.__version__))
57 else:
57 else:
58 # pip is new enough - it must be something else
58 # pip is new enough - it must be something else
59 pip_message = ''
59 pip_message = ''
60 except Exception:
60 except Exception:
61 pass
61 pass
62 error = """
62 error = """
63 Mercurial does not support Python older than 2.7.
63 Mercurial does not support Python older than 2.7.
64 Python {py} detected.
64 Python {py} detected.
65 {pip}
65 {pip}
66 """.format(py=sys.version_info, pip=pip_message)
66 """.format(py=sys.version_info, pip=pip_message)
67 printf(error, file=sys.stderr)
67 printf(error, file=sys.stderr)
68 sys.exit(1)
68 sys.exit(1)
69
69
70 # We don't yet officially support Python 3. But we want to allow developers to
70 # We don't yet officially support Python 3. But we want to allow developers to
71 # hack on. Detect and disallow running on Python 3 by default. But provide a
71 # hack on. Detect and disallow running on Python 3 by default. But provide a
72 # backdoor to enable working on Python 3.
72 # backdoor to enable working on Python 3.
73 if sys.version_info[0] != 2:
73 if sys.version_info[0] != 2:
74 badpython = True
74 badpython = True
75
75
76 # Allow Python 3 from source checkouts.
76 # Allow Python 3 from source checkouts.
77 if os.path.isdir('.hg'):
77 if os.path.isdir('.hg'):
78 badpython = False
78 badpython = False
79
79
80 if badpython:
80 if badpython:
81 error = """
81 error = """
82 Mercurial only supports Python 2.7.
82 Mercurial only supports Python 2.7.
83 Python {py} detected.
83 Python {py} detected.
84 Please re-run with Python 2.7.
84 Please re-run with Python 2.7.
85 """.format(py=sys.version_info)
85 """.format(py=sys.version_info)
86
86
87 printf(error, file=sys.stderr)
87 printf(error, file=sys.stderr)
88 sys.exit(1)
88 sys.exit(1)
89
89
90 # Solaris Python packaging brain damage
90 # Solaris Python packaging brain damage
91 try:
91 try:
92 import hashlib
92 import hashlib
93 sha = hashlib.sha1()
93 sha = hashlib.sha1()
94 except ImportError:
94 except ImportError:
95 try:
95 try:
96 import sha
96 import sha
97 sha.sha # silence unused import warning
97 sha.sha # silence unused import warning
98 except ImportError:
98 except ImportError:
99 raise SystemExit(
99 raise SystemExit(
100 "Couldn't import standard hashlib (incomplete Python install).")
100 "Couldn't import standard hashlib (incomplete Python install).")
101
101
102 try:
102 try:
103 import zlib
103 import zlib
104 zlib.compressobj # silence unused import warning
104 zlib.compressobj # silence unused import warning
105 except ImportError:
105 except ImportError:
106 raise SystemExit(
106 raise SystemExit(
107 "Couldn't import standard zlib (incomplete Python install).")
107 "Couldn't import standard zlib (incomplete Python install).")
108
108
109 # The base IronPython distribution (as of 2.7.1) doesn't support bz2
109 # The base IronPython distribution (as of 2.7.1) doesn't support bz2
110 isironpython = False
110 isironpython = False
111 try:
111 try:
112 isironpython = (platform.python_implementation()
112 isironpython = (platform.python_implementation()
113 .lower().find("ironpython") != -1)
113 .lower().find("ironpython") != -1)
114 except AttributeError:
114 except AttributeError:
115 pass
115 pass
116
116
117 if isironpython:
117 if isironpython:
118 sys.stderr.write("warning: IronPython detected (no bz2 support)\n")
118 sys.stderr.write("warning: IronPython detected (no bz2 support)\n")
119 else:
119 else:
120 try:
120 try:
121 import bz2
121 import bz2
122 bz2.BZ2Compressor # silence unused import warning
122 bz2.BZ2Compressor # silence unused import warning
123 except ImportError:
123 except ImportError:
124 raise SystemExit(
124 raise SystemExit(
125 "Couldn't import standard bz2 (incomplete Python install).")
125 "Couldn't import standard bz2 (incomplete Python install).")
126
126
127 ispypy = "PyPy" in sys.version
127 ispypy = "PyPy" in sys.version
128
128
129 import ctypes
129 import ctypes
130 import stat, subprocess, time
130 import stat, subprocess, time
131 import re
131 import re
132 import shutil
132 import shutil
133 import tempfile
133 import tempfile
134 from distutils import log
134 from distutils import log
135 # We have issues with setuptools on some platforms and builders. Until
135 # We have issues with setuptools on some platforms and builders. Until
136 # those are resolved, setuptools is opt-in except for platforms where
136 # those are resolved, setuptools is opt-in except for platforms where
137 # we don't have issues.
137 # we don't have issues.
138 issetuptools = (os.name == 'nt' or 'FORCE_SETUPTOOLS' in os.environ)
138 issetuptools = (os.name == 'nt' or 'FORCE_SETUPTOOLS' in os.environ)
139 if issetuptools:
139 if issetuptools:
140 from setuptools import setup
140 from setuptools import setup
141 else:
141 else:
142 from distutils.core import setup
142 from distutils.core import setup
143 from distutils.ccompiler import new_compiler
143 from distutils.ccompiler import new_compiler
144 from distutils.core import Command, Extension
144 from distutils.core import Command, Extension
145 from distutils.dist import Distribution
145 from distutils.dist import Distribution
146 from distutils.command.build import build
146 from distutils.command.build import build
147 from distutils.command.build_ext import build_ext
147 from distutils.command.build_ext import build_ext
148 from distutils.command.build_py import build_py
148 from distutils.command.build_py import build_py
149 from distutils.command.build_scripts import build_scripts
149 from distutils.command.build_scripts import build_scripts
150 from distutils.command.install import install
150 from distutils.command.install import install
151 from distutils.command.install_lib import install_lib
151 from distutils.command.install_lib import install_lib
152 from distutils.command.install_scripts import install_scripts
152 from distutils.command.install_scripts import install_scripts
153 from distutils.spawn import spawn, find_executable
153 from distutils.spawn import spawn, find_executable
154 from distutils import file_util
154 from distutils import file_util
155 from distutils.errors import (
155 from distutils.errors import (
156 CCompilerError,
156 CCompilerError,
157 DistutilsError,
157 DistutilsError,
158 DistutilsExecError,
158 DistutilsExecError,
159 )
159 )
160 from distutils.sysconfig import get_python_inc, get_config_var
160 from distutils.sysconfig import get_python_inc, get_config_var
161 from distutils.version import StrictVersion
161 from distutils.version import StrictVersion
162
162
163 def write_if_changed(path, content):
163 def write_if_changed(path, content):
164 """Write content to a file iff the content hasn't changed."""
164 """Write content to a file iff the content hasn't changed."""
165 if os.path.exists(path):
165 if os.path.exists(path):
166 with open(path, 'rb') as fh:
166 with open(path, 'rb') as fh:
167 current = fh.read()
167 current = fh.read()
168 else:
168 else:
169 current = b''
169 current = b''
170
170
171 if current != content:
171 if current != content:
172 with open(path, 'wb') as fh:
172 with open(path, 'wb') as fh:
173 fh.write(content)
173 fh.write(content)
174
174
175 scripts = ['hg']
175 scripts = ['hg']
176 if os.name == 'nt':
176 if os.name == 'nt':
177 # We remove hg.bat if we are able to build hg.exe.
177 # We remove hg.bat if we are able to build hg.exe.
178 scripts.append('contrib/win32/hg.bat')
178 scripts.append('contrib/win32/hg.bat')
179
179
180 def cancompile(cc, code):
180 def cancompile(cc, code):
181 tmpdir = tempfile.mkdtemp(prefix='hg-install-')
181 tmpdir = tempfile.mkdtemp(prefix='hg-install-')
182 devnull = oldstderr = None
182 devnull = oldstderr = None
183 try:
183 try:
184 fname = os.path.join(tmpdir, 'testcomp.c')
184 fname = os.path.join(tmpdir, 'testcomp.c')
185 f = open(fname, 'w')
185 f = open(fname, 'w')
186 f.write(code)
186 f.write(code)
187 f.close()
187 f.close()
188 # Redirect stderr to /dev/null to hide any error messages
188 # Redirect stderr to /dev/null to hide any error messages
189 # from the compiler.
189 # from the compiler.
190 # This will have to be changed if we ever have to check
190 # This will have to be changed if we ever have to check
191 # for a function on Windows.
191 # for a function on Windows.
192 devnull = open('/dev/null', 'w')
192 devnull = open('/dev/null', 'w')
193 oldstderr = os.dup(sys.stderr.fileno())
193 oldstderr = os.dup(sys.stderr.fileno())
194 os.dup2(devnull.fileno(), sys.stderr.fileno())
194 os.dup2(devnull.fileno(), sys.stderr.fileno())
195 objects = cc.compile([fname], output_dir=tmpdir)
195 objects = cc.compile([fname], output_dir=tmpdir)
196 cc.link_executable(objects, os.path.join(tmpdir, "a.out"))
196 cc.link_executable(objects, os.path.join(tmpdir, "a.out"))
197 return True
197 return True
198 except Exception:
198 except Exception:
199 return False
199 return False
200 finally:
200 finally:
201 if oldstderr is not None:
201 if oldstderr is not None:
202 os.dup2(oldstderr, sys.stderr.fileno())
202 os.dup2(oldstderr, sys.stderr.fileno())
203 if devnull is not None:
203 if devnull is not None:
204 devnull.close()
204 devnull.close()
205 shutil.rmtree(tmpdir)
205 shutil.rmtree(tmpdir)
206
206
207 # simplified version of distutils.ccompiler.CCompiler.has_function
207 # simplified version of distutils.ccompiler.CCompiler.has_function
208 # that actually removes its temporary files.
208 # that actually removes its temporary files.
209 def hasfunction(cc, funcname):
209 def hasfunction(cc, funcname):
210 code = 'int main(void) { %s(); }\n' % funcname
210 code = 'int main(void) { %s(); }\n' % funcname
211 return cancompile(cc, code)
211 return cancompile(cc, code)
212
212
213 def hasheader(cc, headername):
213 def hasheader(cc, headername):
214 code = '#include <%s>\nint main(void) { return 0; }\n' % headername
214 code = '#include <%s>\nint main(void) { return 0; }\n' % headername
215 return cancompile(cc, code)
215 return cancompile(cc, code)
216
216
217 # py2exe needs to be installed to work
217 # py2exe needs to be installed to work
218 try:
218 try:
219 import py2exe
219 import py2exe
220 py2exe.Distribution # silence unused import warning
220 py2exe.Distribution # silence unused import warning
221 py2exeloaded = True
221 py2exeloaded = True
222 # import py2exe's patched Distribution class
222 # import py2exe's patched Distribution class
223 from distutils.core import Distribution
223 from distutils.core import Distribution
224 except ImportError:
224 except ImportError:
225 py2exeloaded = False
225 py2exeloaded = False
226
226
227 def runcmd(cmd, env):
227 def runcmd(cmd, env):
228 p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
228 p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
229 stderr=subprocess.PIPE, env=env)
229 stderr=subprocess.PIPE, env=env)
230 out, err = p.communicate()
230 out, err = p.communicate()
231 return p.returncode, out, err
231 return p.returncode, out, err
232
232
233 class hgcommand(object):
233 class hgcommand(object):
234 def __init__(self, cmd, env):
234 def __init__(self, cmd, env):
235 self.cmd = cmd
235 self.cmd = cmd
236 self.env = env
236 self.env = env
237
237
238 def run(self, args):
238 def run(self, args):
239 cmd = self.cmd + args
239 cmd = self.cmd + args
240 returncode, out, err = runcmd(cmd, self.env)
240 returncode, out, err = runcmd(cmd, self.env)
241 err = filterhgerr(err)
241 err = filterhgerr(err)
242 if err or returncode != 0:
242 if err or returncode != 0:
243 printf("stderr from '%s':" % (' '.join(cmd)), file=sys.stderr)
243 printf("stderr from '%s':" % (' '.join(cmd)), file=sys.stderr)
244 printf(err, file=sys.stderr)
244 printf(err, file=sys.stderr)
245 return ''
245 return ''
246 return out
246 return out
247
247
248 def filterhgerr(err):
248 def filterhgerr(err):
249 # If root is executing setup.py, but the repository is owned by
249 # If root is executing setup.py, but the repository is owned by
250 # another user (as in "sudo python setup.py install") we will get
250 # another user (as in "sudo python setup.py install") we will get
251 # trust warnings since the .hg/hgrc file is untrusted. That is
251 # trust warnings since the .hg/hgrc file is untrusted. That is
252 # fine, we don't want to load it anyway. Python may warn about
252 # fine, we don't want to load it anyway. Python may warn about
253 # a missing __init__.py in mercurial/locale, we also ignore that.
253 # a missing __init__.py in mercurial/locale, we also ignore that.
254 err = [e for e in err.splitlines()
254 err = [e for e in err.splitlines()
255 if (not e.startswith(b'not trusting file')
255 if (not e.startswith(b'not trusting file')
256 and not e.startswith(b'warning: Not importing')
256 and not e.startswith(b'warning: Not importing')
257 and not e.startswith(b'obsolete feature not enabled')
257 and not e.startswith(b'obsolete feature not enabled')
258 and not e.startswith(b'*** failed to import extension')
258 and not e.startswith(b'*** failed to import extension')
259 and not e.startswith(b'devel-warn:'))]
259 and not e.startswith(b'devel-warn:'))]
260 return b'\n'.join(b' ' + e for e in err)
260 return b'\n'.join(b' ' + e for e in err)
261
261
262 def findhg():
262 def findhg():
263 """Try to figure out how we should invoke hg for examining the local
263 """Try to figure out how we should invoke hg for examining the local
264 repository contents.
264 repository contents.
265
265
266 Returns an hgcommand object."""
266 Returns an hgcommand object."""
267 # By default, prefer the "hg" command in the user's path. This was
267 # By default, prefer the "hg" command in the user's path. This was
268 # presumably the hg command that the user used to create this repository.
268 # presumably the hg command that the user used to create this repository.
269 #
269 #
270 # This repository may require extensions or other settings that would not
270 # This repository may require extensions or other settings that would not
271 # be enabled by running the hg script directly from this local repository.
271 # be enabled by running the hg script directly from this local repository.
272 hgenv = os.environ.copy()
272 hgenv = os.environ.copy()
273 # Use HGPLAIN to disable hgrc settings that would change output formatting,
273 # Use HGPLAIN to disable hgrc settings that would change output formatting,
274 # and disable localization for the same reasons.
274 # and disable localization for the same reasons.
275 hgenv['HGPLAIN'] = '1'
275 hgenv['HGPLAIN'] = '1'
276 hgenv['LANGUAGE'] = 'C'
276 hgenv['LANGUAGE'] = 'C'
277 hgcmd = ['hg']
277 hgcmd = ['hg']
278 # Run a simple "hg log" command just to see if using hg from the user's
278 # Run a simple "hg log" command just to see if using hg from the user's
279 # path works and can successfully interact with this repository.
279 # path works and can successfully interact with this repository.
280 check_cmd = ['log', '-r.', '-Ttest']
280 check_cmd = ['log', '-r.', '-Ttest']
281 try:
281 try:
282 retcode, out, err = runcmd(hgcmd + check_cmd, hgenv)
282 retcode, out, err = runcmd(hgcmd + check_cmd, hgenv)
283 except EnvironmentError:
283 except EnvironmentError:
284 retcode = -1
284 retcode = -1
285 if retcode == 0 and not filterhgerr(err):
285 if retcode == 0 and not filterhgerr(err):
286 return hgcommand(hgcmd, hgenv)
286 return hgcommand(hgcmd, hgenv)
287
287
288 # Fall back to trying the local hg installation.
288 # Fall back to trying the local hg installation.
289 hgenv = localhgenv()
289 hgenv = localhgenv()
290 hgcmd = [sys.executable, 'hg']
290 hgcmd = [sys.executable, 'hg']
291 try:
291 try:
292 retcode, out, err = runcmd(hgcmd + check_cmd, hgenv)
292 retcode, out, err = runcmd(hgcmd + check_cmd, hgenv)
293 except EnvironmentError:
293 except EnvironmentError:
294 retcode = -1
294 retcode = -1
295 if retcode == 0 and not filterhgerr(err):
295 if retcode == 0 and not filterhgerr(err):
296 return hgcommand(hgcmd, hgenv)
296 return hgcommand(hgcmd, hgenv)
297
297
298 raise SystemExit('Unable to find a working hg binary to extract the '
298 raise SystemExit('Unable to find a working hg binary to extract the '
299 'version from the repository tags')
299 'version from the repository tags')
300
300
301 def localhgenv():
301 def localhgenv():
302 """Get an environment dictionary to use for invoking or importing
302 """Get an environment dictionary to use for invoking or importing
303 mercurial from the local repository."""
303 mercurial from the local repository."""
304 # Execute hg out of this directory with a custom environment which takes
304 # Execute hg out of this directory with a custom environment which takes
305 # care to not use any hgrc files and do no localization.
305 # care to not use any hgrc files and do no localization.
306 env = {'HGMODULEPOLICY': 'py',
306 env = {'HGMODULEPOLICY': 'py',
307 'HGRCPATH': '',
307 'HGRCPATH': '',
308 'LANGUAGE': 'C',
308 'LANGUAGE': 'C',
309 'PATH': ''} # make pypi modules that use os.environ['PATH'] happy
309 'PATH': ''} # make pypi modules that use os.environ['PATH'] happy
310 if 'LD_LIBRARY_PATH' in os.environ:
310 if 'LD_LIBRARY_PATH' in os.environ:
311 env['LD_LIBRARY_PATH'] = os.environ['LD_LIBRARY_PATH']
311 env['LD_LIBRARY_PATH'] = os.environ['LD_LIBRARY_PATH']
312 if 'SystemRoot' in os.environ:
312 if 'SystemRoot' in os.environ:
313 # SystemRoot is required by Windows to load various DLLs. See:
313 # SystemRoot is required by Windows to load various DLLs. See:
314 # https://bugs.python.org/issue13524#msg148850
314 # https://bugs.python.org/issue13524#msg148850
315 env['SystemRoot'] = os.environ['SystemRoot']
315 env['SystemRoot'] = os.environ['SystemRoot']
316 return env
316 return env
317
317
318 version = ''
318 version = ''
319
319
320 if os.path.isdir('.hg'):
320 if os.path.isdir('.hg'):
321 hg = findhg()
321 hg = findhg()
322 cmd = ['log', '-r', '.', '--template', '{tags}\n']
322 cmd = ['log', '-r', '.', '--template', '{tags}\n']
323 numerictags = [t for t in sysstr(hg.run(cmd)).split() if t[0:1].isdigit()]
323 numerictags = [t for t in sysstr(hg.run(cmd)).split() if t[0:1].isdigit()]
324 hgid = sysstr(hg.run(['id', '-i'])).strip()
324 hgid = sysstr(hg.run(['id', '-i'])).strip()
325 if not hgid:
325 if not hgid:
326 # Bail out if hg is having problems interacting with this repository,
326 # Bail out if hg is having problems interacting with this repository,
327 # rather than falling through and producing a bogus version number.
327 # rather than falling through and producing a bogus version number.
328 # Continuing with an invalid version number will break extensions
328 # Continuing with an invalid version number will break extensions
329 # that define minimumhgversion.
329 # that define minimumhgversion.
330 raise SystemExit('Unable to determine hg version from local repository')
330 raise SystemExit('Unable to determine hg version from local repository')
331 if numerictags: # tag(s) found
331 if numerictags: # tag(s) found
332 version = numerictags[-1]
332 version = numerictags[-1]
333 if hgid.endswith('+'): # propagate the dirty status to the tag
333 if hgid.endswith('+'): # propagate the dirty status to the tag
334 version += '+'
334 version += '+'
335 else: # no tag found
335 else: # no tag found
336 ltagcmd = ['parents', '--template', '{latesttag}']
336 ltagcmd = ['parents', '--template', '{latesttag}']
337 ltag = sysstr(hg.run(ltagcmd))
337 ltag = sysstr(hg.run(ltagcmd))
338 changessincecmd = ['log', '-T', 'x\n', '-r', "only(.,'%s')" % ltag]
338 changessincecmd = ['log', '-T', 'x\n', '-r', "only(.,'%s')" % ltag]
339 changessince = len(hg.run(changessincecmd).splitlines())
339 changessince = len(hg.run(changessincecmd).splitlines())
340 version = '%s+%s-%s' % (ltag, changessince, hgid)
340 version = '%s+%s-%s' % (ltag, changessince, hgid)
341 if version.endswith('+'):
341 if version.endswith('+'):
342 version += time.strftime('%Y%m%d')
342 version += time.strftime('%Y%m%d')
343 elif os.path.exists('.hg_archival.txt'):
343 elif os.path.exists('.hg_archival.txt'):
344 kw = dict([[t.strip() for t in l.split(':', 1)]
344 kw = dict([[t.strip() for t in l.split(':', 1)]
345 for l in open('.hg_archival.txt')])
345 for l in open('.hg_archival.txt')])
346 if 'tag' in kw:
346 if 'tag' in kw:
347 version = kw['tag']
347 version = kw['tag']
348 elif 'latesttag' in kw:
348 elif 'latesttag' in kw:
349 if 'changessincelatesttag' in kw:
349 if 'changessincelatesttag' in kw:
350 version = '%(latesttag)s+%(changessincelatesttag)s-%(node).12s' % kw
350 version = '%(latesttag)s+%(changessincelatesttag)s-%(node).12s' % kw
351 else:
351 else:
352 version = '%(latesttag)s+%(latesttagdistance)s-%(node).12s' % kw
352 version = '%(latesttag)s+%(latesttagdistance)s-%(node).12s' % kw
353 else:
353 else:
354 version = kw.get('node', '')[:12]
354 version = kw.get('node', '')[:12]
355
355
356 if version:
356 if version:
357 versionb = version
357 versionb = version
358 if not isinstance(versionb, bytes):
358 if not isinstance(versionb, bytes):
359 versionb = versionb.encode('ascii')
359 versionb = versionb.encode('ascii')
360
360
361 write_if_changed('mercurial/__version__.py', b''.join([
361 write_if_changed('mercurial/__version__.py', b''.join([
362 b'# this file is autogenerated by setup.py\n'
362 b'# this file is autogenerated by setup.py\n'
363 b'version = "%s"\n' % versionb,
363 b'version = "%s"\n' % versionb,
364 ]))
364 ]))
365
365
366 try:
366 try:
367 oldpolicy = os.environ.get('HGMODULEPOLICY', None)
367 oldpolicy = os.environ.get('HGMODULEPOLICY', None)
368 os.environ['HGMODULEPOLICY'] = 'py'
368 os.environ['HGMODULEPOLICY'] = 'py'
369 from mercurial import __version__
369 from mercurial import __version__
370 version = __version__.version
370 version = __version__.version
371 except ImportError:
371 except ImportError:
372 version = 'unknown'
372 version = 'unknown'
373 finally:
373 finally:
374 if oldpolicy is None:
374 if oldpolicy is None:
375 del os.environ['HGMODULEPOLICY']
375 del os.environ['HGMODULEPOLICY']
376 else:
376 else:
377 os.environ['HGMODULEPOLICY'] = oldpolicy
377 os.environ['HGMODULEPOLICY'] = oldpolicy
378
378
379 class hgbuild(build):
379 class hgbuild(build):
380 # Insert hgbuildmo first so that files in mercurial/locale/ are found
380 # Insert hgbuildmo first so that files in mercurial/locale/ are found
381 # when build_py is run next.
381 # when build_py is run next.
382 sub_commands = [('build_mo', None)] + build.sub_commands
382 sub_commands = [('build_mo', None)] + build.sub_commands
383
383
384 class hgbuildmo(build):
384 class hgbuildmo(build):
385
385
386 description = "build translations (.mo files)"
386 description = "build translations (.mo files)"
387
387
388 def run(self):
388 def run(self):
389 if not find_executable('msgfmt'):
389 if not find_executable('msgfmt'):
390 self.warn("could not find msgfmt executable, no translations "
390 self.warn("could not find msgfmt executable, no translations "
391 "will be built")
391 "will be built")
392 return
392 return
393
393
394 podir = 'i18n'
394 podir = 'i18n'
395 if not os.path.isdir(podir):
395 if not os.path.isdir(podir):
396 self.warn("could not find %s/ directory" % podir)
396 self.warn("could not find %s/ directory" % podir)
397 return
397 return
398
398
399 join = os.path.join
399 join = os.path.join
400 for po in os.listdir(podir):
400 for po in os.listdir(podir):
401 if not po.endswith('.po'):
401 if not po.endswith('.po'):
402 continue
402 continue
403 pofile = join(podir, po)
403 pofile = join(podir, po)
404 modir = join('locale', po[:-3], 'LC_MESSAGES')
404 modir = join('locale', po[:-3], 'LC_MESSAGES')
405 mofile = join(modir, 'hg.mo')
405 mofile = join(modir, 'hg.mo')
406 mobuildfile = join('mercurial', mofile)
406 mobuildfile = join('mercurial', mofile)
407 cmd = ['msgfmt', '-v', '-o', mobuildfile, pofile]
407 cmd = ['msgfmt', '-v', '-o', mobuildfile, pofile]
408 if sys.platform != 'sunos5':
408 if sys.platform != 'sunos5':
409 # msgfmt on Solaris does not know about -c
409 # msgfmt on Solaris does not know about -c
410 cmd.append('-c')
410 cmd.append('-c')
411 self.mkpath(join('mercurial', modir))
411 self.mkpath(join('mercurial', modir))
412 self.make_file([pofile], mobuildfile, spawn, (cmd,))
412 self.make_file([pofile], mobuildfile, spawn, (cmd,))
413
413
414
414
415 class hgdist(Distribution):
415 class hgdist(Distribution):
416 pure = False
416 pure = False
417 cffi = ispypy
417 cffi = ispypy
418
418
419 global_options = Distribution.global_options + \
419 global_options = Distribution.global_options + \
420 [('pure', None, "use pure (slow) Python "
420 [('pure', None, "use pure (slow) Python "
421 "code instead of C extensions"),
421 "code instead of C extensions"),
422 ]
422 ]
423
423
424 def has_ext_modules(self):
424 def has_ext_modules(self):
425 # self.ext_modules is emptied in hgbuildpy.finalize_options which is
425 # self.ext_modules is emptied in hgbuildpy.finalize_options which is
426 # too late for some cases
426 # too late for some cases
427 return not self.pure and Distribution.has_ext_modules(self)
427 return not self.pure and Distribution.has_ext_modules(self)
428
428
429 # This is ugly as a one-liner. So use a variable.
429 # This is ugly as a one-liner. So use a variable.
430 buildextnegops = dict(getattr(build_ext, 'negative_options', {}))
430 buildextnegops = dict(getattr(build_ext, 'negative_options', {}))
431 buildextnegops['no-zstd'] = 'zstd'
431 buildextnegops['no-zstd'] = 'zstd'
432
432
433 class hgbuildext(build_ext):
433 class hgbuildext(build_ext):
434 user_options = build_ext.user_options + [
434 user_options = build_ext.user_options + [
435 ('zstd', None, 'compile zstd bindings [default]'),
435 ('zstd', None, 'compile zstd bindings [default]'),
436 ('no-zstd', None, 'do not compile zstd bindings'),
436 ('no-zstd', None, 'do not compile zstd bindings'),
437 ]
437 ]
438
438
439 boolean_options = build_ext.boolean_options + ['zstd']
439 boolean_options = build_ext.boolean_options + ['zstd']
440 negative_opt = buildextnegops
440 negative_opt = buildextnegops
441
441
442 def initialize_options(self):
442 def initialize_options(self):
443 self.zstd = True
443 self.zstd = True
444 return build_ext.initialize_options(self)
444 return build_ext.initialize_options(self)
445
445
446 def build_extensions(self):
446 def build_extensions(self):
447 # Filter out zstd if disabled via argument.
447 # Filter out zstd if disabled via argument.
448 if not self.zstd:
448 if not self.zstd:
449 self.extensions = [e for e in self.extensions
449 self.extensions = [e for e in self.extensions
450 if e.name != 'mercurial.zstd']
450 if e.name != 'mercurial.zstd']
451
451
452 return build_ext.build_extensions(self)
452 return build_ext.build_extensions(self)
453
453
454 def build_extension(self, ext):
454 def build_extension(self, ext):
455 try:
455 try:
456 build_ext.build_extension(self, ext)
456 build_ext.build_extension(self, ext)
457 except CCompilerError:
457 except CCompilerError:
458 if not getattr(ext, 'optional', False):
458 if not getattr(ext, 'optional', False):
459 raise
459 raise
460 log.warn("Failed to build optional extension '%s' (skipping)",
460 log.warn("Failed to build optional extension '%s' (skipping)",
461 ext.name)
461 ext.name)
462
462
463 class hgbuildscripts(build_scripts):
463 class hgbuildscripts(build_scripts):
464 def run(self):
464 def run(self):
465 if os.name != 'nt' or self.distribution.pure:
465 if os.name != 'nt' or self.distribution.pure:
466 return build_scripts.run(self)
466 return build_scripts.run(self)
467
467
468 exebuilt = False
468 exebuilt = False
469 try:
469 try:
470 self.run_command('build_hgexe')
470 self.run_command('build_hgexe')
471 exebuilt = True
471 exebuilt = True
472 except (DistutilsError, CCompilerError):
472 except (DistutilsError, CCompilerError):
473 log.warn('failed to build optional hg.exe')
473 log.warn('failed to build optional hg.exe')
474
474
475 if exebuilt:
475 if exebuilt:
476 # Copying hg.exe to the scripts build directory ensures it is
476 # Copying hg.exe to the scripts build directory ensures it is
477 # installed by the install_scripts command.
477 # installed by the install_scripts command.
478 hgexecommand = self.get_finalized_command('build_hgexe')
478 hgexecommand = self.get_finalized_command('build_hgexe')
479 dest = os.path.join(self.build_dir, 'hg.exe')
479 dest = os.path.join(self.build_dir, 'hg.exe')
480 self.mkpath(self.build_dir)
480 self.mkpath(self.build_dir)
481 self.copy_file(hgexecommand.hgexepath, dest)
481 self.copy_file(hgexecommand.hgexepath, dest)
482
482
483 # Remove hg.bat because it is redundant with hg.exe.
483 # Remove hg.bat because it is redundant with hg.exe.
484 self.scripts.remove('contrib/win32/hg.bat')
484 self.scripts.remove('contrib/win32/hg.bat')
485
485
486 return build_scripts.run(self)
486 return build_scripts.run(self)
487
487
488 class hgbuildpy(build_py):
488 class hgbuildpy(build_py):
489 def finalize_options(self):
489 def finalize_options(self):
490 build_py.finalize_options(self)
490 build_py.finalize_options(self)
491
491
492 if self.distribution.pure:
492 if self.distribution.pure:
493 self.distribution.ext_modules = []
493 self.distribution.ext_modules = []
494 elif self.distribution.cffi:
494 elif self.distribution.cffi:
495 from mercurial.cffi import (
495 from mercurial.cffi import (
496 bdiffbuild,
496 bdiffbuild,
497 mpatchbuild,
497 mpatchbuild,
498 )
498 )
499 exts = [mpatchbuild.ffi.distutils_extension(),
499 exts = [mpatchbuild.ffi.distutils_extension(),
500 bdiffbuild.ffi.distutils_extension()]
500 bdiffbuild.ffi.distutils_extension()]
501 # cffi modules go here
501 # cffi modules go here
502 if sys.platform == 'darwin':
502 if sys.platform == 'darwin':
503 from mercurial.cffi import osutilbuild
503 from mercurial.cffi import osutilbuild
504 exts.append(osutilbuild.ffi.distutils_extension())
504 exts.append(osutilbuild.ffi.distutils_extension())
505 self.distribution.ext_modules = exts
505 self.distribution.ext_modules = exts
506 else:
506 else:
507 h = os.path.join(get_python_inc(), 'Python.h')
507 h = os.path.join(get_python_inc(), 'Python.h')
508 if not os.path.exists(h):
508 if not os.path.exists(h):
509 raise SystemExit('Python headers are required to build '
509 raise SystemExit('Python headers are required to build '
510 'Mercurial but weren\'t found in %s' % h)
510 'Mercurial but weren\'t found in %s' % h)
511
511
512 def run(self):
512 def run(self):
513 basepath = os.path.join(self.build_lib, 'mercurial')
513 basepath = os.path.join(self.build_lib, 'mercurial')
514 self.mkpath(basepath)
514 self.mkpath(basepath)
515
515
516 if self.distribution.pure:
516 if self.distribution.pure:
517 modulepolicy = 'py'
517 modulepolicy = 'py'
518 elif self.build_lib == '.':
518 elif self.build_lib == '.':
519 # in-place build should run without rebuilding C extensions
519 # in-place build should run without rebuilding C extensions
520 modulepolicy = 'allow'
520 modulepolicy = 'allow'
521 else:
521 else:
522 modulepolicy = 'c'
522 modulepolicy = 'c'
523
523
524 content = b''.join([
524 content = b''.join([
525 b'# this file is autogenerated by setup.py\n',
525 b'# this file is autogenerated by setup.py\n',
526 b'modulepolicy = b"%s"\n' % modulepolicy.encode('ascii'),
526 b'modulepolicy = b"%s"\n' % modulepolicy.encode('ascii'),
527 ])
527 ])
528 write_if_changed(os.path.join(basepath, '__modulepolicy__.py'),
528 write_if_changed(os.path.join(basepath, '__modulepolicy__.py'),
529 content)
529 content)
530
530
531 build_py.run(self)
531 build_py.run(self)
532
532
533 class buildhgextindex(Command):
533 class buildhgextindex(Command):
534 description = 'generate prebuilt index of hgext (for frozen package)'
534 description = 'generate prebuilt index of hgext (for frozen package)'
535 user_options = []
535 user_options = []
536 _indexfilename = 'hgext/__index__.py'
536 _indexfilename = 'hgext/__index__.py'
537
537
538 def initialize_options(self):
538 def initialize_options(self):
539 pass
539 pass
540
540
541 def finalize_options(self):
541 def finalize_options(self):
542 pass
542 pass
543
543
544 def run(self):
544 def run(self):
545 if os.path.exists(self._indexfilename):
545 if os.path.exists(self._indexfilename):
546 with open(self._indexfilename, 'w') as f:
546 with open(self._indexfilename, 'w') as f:
547 f.write('# empty\n')
547 f.write('# empty\n')
548
548
549 # here no extension enabled, disabled() lists up everything
549 # here no extension enabled, disabled() lists up everything
550 code = ('import pprint; from mercurial import extensions; '
550 code = ('import pprint; from mercurial import extensions; '
551 'pprint.pprint(extensions.disabled())')
551 'pprint.pprint(extensions.disabled())')
552 returncode, out, err = runcmd([sys.executable, '-c', code],
552 returncode, out, err = runcmd([sys.executable, '-c', code],
553 localhgenv())
553 localhgenv())
554 if err or returncode != 0:
554 if err or returncode != 0:
555 raise DistutilsExecError(err)
555 raise DistutilsExecError(err)
556
556
557 with open(self._indexfilename, 'w') as f:
557 with open(self._indexfilename, 'w') as f:
558 f.write('# this file is autogenerated by setup.py\n')
558 f.write('# this file is autogenerated by setup.py\n')
559 f.write('docs = ')
559 f.write('docs = ')
560 f.write(out)
560 f.write(out)
561
561
562 class buildhgexe(build_ext):
562 class buildhgexe(build_ext):
563 description = 'compile hg.exe from mercurial/exewrapper.c'
563 description = 'compile hg.exe from mercurial/exewrapper.c'
564 user_options = build_ext.user_options + [
564 user_options = build_ext.user_options + [
565 ('long-paths-support', None, 'enable support for long paths on '
565 ('long-paths-support', None, 'enable support for long paths on '
566 'Windows (off by default and '
566 'Windows (off by default and '
567 'experimental)'),
567 'experimental)'),
568 ]
568 ]
569
569
570 LONG_PATHS_MANIFEST = """
570 LONG_PATHS_MANIFEST = """
571 <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
571 <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
572 <assembly xmlns="urn:schemas-microsoft-com:asm.v1" manifestVersion="1.0">
572 <assembly xmlns="urn:schemas-microsoft-com:asm.v1" manifestVersion="1.0">
573 <application>
573 <application>
574 <windowsSettings
574 <windowsSettings
575 xmlns:ws2="http://schemas.microsoft.com/SMI/2016/WindowsSettings">
575 xmlns:ws2="http://schemas.microsoft.com/SMI/2016/WindowsSettings">
576 <ws2:longPathAware>true</ws2:longPathAware>
576 <ws2:longPathAware>true</ws2:longPathAware>
577 </windowsSettings>
577 </windowsSettings>
578 </application>
578 </application>
579 </assembly>"""
579 </assembly>"""
580
580
581 def initialize_options(self):
581 def initialize_options(self):
582 build_ext.initialize_options(self)
582 build_ext.initialize_options(self)
583 self.long_paths_support = False
583 self.long_paths_support = False
584
584
585 def build_extensions(self):
585 def build_extensions(self):
586 if os.name != 'nt':
586 if os.name != 'nt':
587 return
587 return
588 if isinstance(self.compiler, HackedMingw32CCompiler):
588 if isinstance(self.compiler, HackedMingw32CCompiler):
589 self.compiler.compiler_so = self.compiler.compiler # no -mdll
589 self.compiler.compiler_so = self.compiler.compiler # no -mdll
590 self.compiler.dll_libraries = [] # no -lmsrvc90
590 self.compiler.dll_libraries = [] # no -lmsrvc90
591
591
592 # Different Python installs can have different Python library
592 # Different Python installs can have different Python library
593 # names. e.g. the official CPython distribution uses pythonXY.dll
593 # names. e.g. the official CPython distribution uses pythonXY.dll
594 # and MinGW uses libpythonX.Y.dll.
594 # and MinGW uses libpythonX.Y.dll.
595 _kernel32 = ctypes.windll.kernel32
595 _kernel32 = ctypes.windll.kernel32
596 _kernel32.GetModuleFileNameA.argtypes = [ctypes.c_void_p,
596 _kernel32.GetModuleFileNameA.argtypes = [ctypes.c_void_p,
597 ctypes.c_void_p,
597 ctypes.c_void_p,
598 ctypes.c_ulong]
598 ctypes.c_ulong]
599 _kernel32.GetModuleFileNameA.restype = ctypes.c_ulong
599 _kernel32.GetModuleFileNameA.restype = ctypes.c_ulong
600 size = 1000
600 size = 1000
601 buf = ctypes.create_string_buffer(size + 1)
601 buf = ctypes.create_string_buffer(size + 1)
602 filelen = _kernel32.GetModuleFileNameA(sys.dllhandle, ctypes.byref(buf),
602 filelen = _kernel32.GetModuleFileNameA(sys.dllhandle, ctypes.byref(buf),
603 size)
603 size)
604
604
605 if filelen > 0 and filelen != size:
605 if filelen > 0 and filelen != size:
606 dllbasename = os.path.basename(buf.value)
606 dllbasename = os.path.basename(buf.value)
607 if not dllbasename.lower().endswith('.dll'):
607 if not dllbasename.lower().endswith('.dll'):
608 raise SystemExit('Python DLL does not end with .dll: %s' %
608 raise SystemExit('Python DLL does not end with .dll: %s' %
609 dllbasename)
609 dllbasename)
610 pythonlib = dllbasename[:-4]
610 pythonlib = dllbasename[:-4]
611 else:
611 else:
612 log.warn('could not determine Python DLL filename; '
612 log.warn('could not determine Python DLL filename; '
613 'assuming pythonXY')
613 'assuming pythonXY')
614
614
615 hv = sys.hexversion
615 hv = sys.hexversion
616 pythonlib = 'python%d%d' % (hv >> 24, (hv >> 16) & 0xff)
616 pythonlib = 'python%d%d' % (hv >> 24, (hv >> 16) & 0xff)
617
617
618 log.info('using %s as Python library name' % pythonlib)
618 log.info('using %s as Python library name' % pythonlib)
619 with open('mercurial/hgpythonlib.h', 'wb') as f:
619 with open('mercurial/hgpythonlib.h', 'wb') as f:
620 f.write('/* this file is autogenerated by setup.py */\n')
620 f.write('/* this file is autogenerated by setup.py */\n')
621 f.write('#define HGPYTHONLIB "%s"\n' % pythonlib)
621 f.write('#define HGPYTHONLIB "%s"\n' % pythonlib)
622 objects = self.compiler.compile(['mercurial/exewrapper.c'],
622 objects = self.compiler.compile(['mercurial/exewrapper.c'],
623 output_dir=self.build_temp)
623 output_dir=self.build_temp)
624 dir = os.path.dirname(self.get_ext_fullpath('dummy'))
624 dir = os.path.dirname(self.get_ext_fullpath('dummy'))
625 self.hgtarget = os.path.join(dir, 'hg')
625 self.hgtarget = os.path.join(dir, 'hg')
626 self.compiler.link_executable(objects, self.hgtarget,
626 self.compiler.link_executable(objects, self.hgtarget,
627 libraries=[],
627 libraries=[],
628 output_dir=self.build_temp)
628 output_dir=self.build_temp)
629 if self.long_paths_support:
629 if self.long_paths_support:
630 self.addlongpathsmanifest()
630 self.addlongpathsmanifest()
631
631
632 def addlongpathsmanifest(self):
632 def addlongpathsmanifest(self):
633 """Add manifest pieces so that hg.exe understands long paths
633 """Add manifest pieces so that hg.exe understands long paths
634
634
635 This is an EXPERIMENTAL feature, use with care.
635 This is an EXPERIMENTAL feature, use with care.
636 To enable long paths support, one needs to do two things:
636 To enable long paths support, one needs to do two things:
637 - build Mercurial with --long-paths-support option
637 - build Mercurial with --long-paths-support option
638 - change HKLM\SYSTEM\CurrentControlSet\Control\FileSystem\
638 - change HKLM\SYSTEM\CurrentControlSet\Control\FileSystem\
639 LongPathsEnabled to have value 1.
639 LongPathsEnabled to have value 1.
640
640
641 Please ignore 'warning 81010002: Unrecognized Element "longPathAware"';
641 Please ignore 'warning 81010002: Unrecognized Element "longPathAware"';
642 it happens because Mercurial uses mt.exe circa 2008, which is not
642 it happens because Mercurial uses mt.exe circa 2008, which is not
643 yet aware of long paths support in the manifest (I think so at least).
643 yet aware of long paths support in the manifest (I think so at least).
644 This does not stop mt.exe from embedding/merging the XML properly.
644 This does not stop mt.exe from embedding/merging the XML properly.
645
645
646 Why resource #1 should be used for .exe manifests? I don't know and
646 Why resource #1 should be used for .exe manifests? I don't know and
647 wasn't able to find an explanation for mortals. But it seems to work.
647 wasn't able to find an explanation for mortals. But it seems to work.
648 """
648 """
649 exefname = self.compiler.executable_filename(self.hgtarget)
649 exefname = self.compiler.executable_filename(self.hgtarget)
650 fdauto, manfname = tempfile.mkstemp(suffix='.hg.exe.manifest')
650 fdauto, manfname = tempfile.mkstemp(suffix='.hg.exe.manifest')
651 os.close(fdauto)
651 os.close(fdauto)
652 with open(manfname, 'w') as f:
652 with open(manfname, 'w') as f:
653 f.write(self.LONG_PATHS_MANIFEST)
653 f.write(self.LONG_PATHS_MANIFEST)
654 log.info("long paths manifest is written to '%s'" % manfname)
654 log.info("long paths manifest is written to '%s'" % manfname)
655 inputresource = '-inputresource:%s;#1' % exefname
655 inputresource = '-inputresource:%s;#1' % exefname
656 outputresource = '-outputresource:%s;#1' % exefname
656 outputresource = '-outputresource:%s;#1' % exefname
657 log.info("running mt.exe to update hg.exe's manifest in-place")
657 log.info("running mt.exe to update hg.exe's manifest in-place")
658 # supplying both -manifest and -inputresource to mt.exe makes
658 # supplying both -manifest and -inputresource to mt.exe makes
659 # it merge the embedded and supplied manifests in the -outputresource
659 # it merge the embedded and supplied manifests in the -outputresource
660 self.spawn(['mt.exe', '-nologo', '-manifest', manfname,
660 self.spawn(['mt.exe', '-nologo', '-manifest', manfname,
661 inputresource, outputresource])
661 inputresource, outputresource])
662 log.info("done updating hg.exe's manifest")
662 log.info("done updating hg.exe's manifest")
663 os.remove(manfname)
663 os.remove(manfname)
664
664
665 @property
665 @property
666 def hgexepath(self):
666 def hgexepath(self):
667 dir = os.path.dirname(self.get_ext_fullpath('dummy'))
667 dir = os.path.dirname(self.get_ext_fullpath('dummy'))
668 return os.path.join(self.build_temp, dir, 'hg.exe')
668 return os.path.join(self.build_temp, dir, 'hg.exe')
669
669
670 class hginstall(install):
670 class hginstall(install):
671
671
672 user_options = install.user_options + [
672 user_options = install.user_options + [
673 ('old-and-unmanageable', None,
673 ('old-and-unmanageable', None,
674 'noop, present for eggless setuptools compat'),
674 'noop, present for eggless setuptools compat'),
675 ('single-version-externally-managed', None,
675 ('single-version-externally-managed', None,
676 'noop, present for eggless setuptools compat'),
676 'noop, present for eggless setuptools compat'),
677 ]
677 ]
678
678
679 # Also helps setuptools not be sad while we refuse to create eggs.
679 # Also helps setuptools not be sad while we refuse to create eggs.
680 single_version_externally_managed = True
680 single_version_externally_managed = True
681
681
682 def get_sub_commands(self):
682 def get_sub_commands(self):
683 # Screen out egg related commands to prevent egg generation. But allow
683 # Screen out egg related commands to prevent egg generation. But allow
684 # mercurial.egg-info generation, since that is part of modern
684 # mercurial.egg-info generation, since that is part of modern
685 # packaging.
685 # packaging.
686 excl = set(['bdist_egg'])
686 excl = set(['bdist_egg'])
687 return filter(lambda x: x not in excl, install.get_sub_commands(self))
687 return filter(lambda x: x not in excl, install.get_sub_commands(self))
688
688
689 class hginstalllib(install_lib):
689 class hginstalllib(install_lib):
690 '''
690 '''
691 This is a specialization of install_lib that replaces the copy_file used
691 This is a specialization of install_lib that replaces the copy_file used
692 there so that it supports setting the mode of files after copying them,
692 there so that it supports setting the mode of files after copying them,
693 instead of just preserving the mode that the files originally had. If your
693 instead of just preserving the mode that the files originally had. If your
694 system has a umask of something like 027, preserving the permissions when
694 system has a umask of something like 027, preserving the permissions when
695 copying will lead to a broken install.
695 copying will lead to a broken install.
696
696
697 Note that just passing keep_permissions=False to copy_file would be
697 Note that just passing keep_permissions=False to copy_file would be
698 insufficient, as it might still be applying a umask.
698 insufficient, as it might still be applying a umask.
699 '''
699 '''
700
700
701 def run(self):
701 def run(self):
702 realcopyfile = file_util.copy_file
702 realcopyfile = file_util.copy_file
703 def copyfileandsetmode(*args, **kwargs):
703 def copyfileandsetmode(*args, **kwargs):
704 src, dst = args[0], args[1]
704 src, dst = args[0], args[1]
705 dst, copied = realcopyfile(*args, **kwargs)
705 dst, copied = realcopyfile(*args, **kwargs)
706 if copied:
706 if copied:
707 st = os.stat(src)
707 st = os.stat(src)
708 # Persist executable bit (apply it to group and other if user
708 # Persist executable bit (apply it to group and other if user
709 # has it)
709 # has it)
710 if st[stat.ST_MODE] & stat.S_IXUSR:
710 if st[stat.ST_MODE] & stat.S_IXUSR:
711 setmode = int('0755', 8)
711 setmode = int('0755', 8)
712 else:
712 else:
713 setmode = int('0644', 8)
713 setmode = int('0644', 8)
714 m = stat.S_IMODE(st[stat.ST_MODE])
714 m = stat.S_IMODE(st[stat.ST_MODE])
715 m = (m & ~int('0777', 8)) | setmode
715 m = (m & ~int('0777', 8)) | setmode
716 os.chmod(dst, m)
716 os.chmod(dst, m)
717 file_util.copy_file = copyfileandsetmode
717 file_util.copy_file = copyfileandsetmode
718 try:
718 try:
719 install_lib.run(self)
719 install_lib.run(self)
720 finally:
720 finally:
721 file_util.copy_file = realcopyfile
721 file_util.copy_file = realcopyfile
722
722
723 class hginstallscripts(install_scripts):
723 class hginstallscripts(install_scripts):
724 '''
724 '''
725 This is a specialization of install_scripts that replaces the @LIBDIR@ with
725 This is a specialization of install_scripts that replaces the @LIBDIR@ with
726 the configured directory for modules. If possible, the path is made relative
726 the configured directory for modules. If possible, the path is made relative
727 to the directory for scripts.
727 to the directory for scripts.
728 '''
728 '''
729
729
730 def initialize_options(self):
730 def initialize_options(self):
731 install_scripts.initialize_options(self)
731 install_scripts.initialize_options(self)
732
732
733 self.install_lib = None
733 self.install_lib = None
734
734
735 def finalize_options(self):
735 def finalize_options(self):
736 install_scripts.finalize_options(self)
736 install_scripts.finalize_options(self)
737 self.set_undefined_options('install',
737 self.set_undefined_options('install',
738 ('install_lib', 'install_lib'))
738 ('install_lib', 'install_lib'))
739
739
740 def run(self):
740 def run(self):
741 install_scripts.run(self)
741 install_scripts.run(self)
742
742
743 # It only makes sense to replace @LIBDIR@ with the install path if
743 # It only makes sense to replace @LIBDIR@ with the install path if
744 # the install path is known. For wheels, the logic below calculates
744 # the install path is known. For wheels, the logic below calculates
745 # the libdir to be "../..". This is because the internal layout of a
745 # the libdir to be "../..". This is because the internal layout of a
746 # wheel archive looks like:
746 # wheel archive looks like:
747 #
747 #
748 # mercurial-3.6.1.data/scripts/hg
748 # mercurial-3.6.1.data/scripts/hg
749 # mercurial/__init__.py
749 # mercurial/__init__.py
750 #
750 #
751 # When installing wheels, the subdirectories of the "<pkg>.data"
751 # When installing wheels, the subdirectories of the "<pkg>.data"
752 # directory are translated to system local paths and files therein
752 # directory are translated to system local paths and files therein
753 # are copied in place. The mercurial/* files are installed into the
753 # are copied in place. The mercurial/* files are installed into the
754 # site-packages directory. However, the site-packages directory
754 # site-packages directory. However, the site-packages directory
755 # isn't known until wheel install time. This means we have no clue
755 # isn't known until wheel install time. This means we have no clue
756 # at wheel generation time what the installed site-packages directory
756 # at wheel generation time what the installed site-packages directory
757 # will be. And, wheels don't appear to provide the ability to register
757 # will be. And, wheels don't appear to provide the ability to register
758 # custom code to run during wheel installation. This all means that
758 # custom code to run during wheel installation. This all means that
759 # we can't reliably set the libdir in wheels: the default behavior
759 # we can't reliably set the libdir in wheels: the default behavior
760 # of looking in sys.path must do.
760 # of looking in sys.path must do.
761
761
762 if (os.path.splitdrive(self.install_dir)[0] !=
762 if (os.path.splitdrive(self.install_dir)[0] !=
763 os.path.splitdrive(self.install_lib)[0]):
763 os.path.splitdrive(self.install_lib)[0]):
764 # can't make relative paths from one drive to another, so use an
764 # can't make relative paths from one drive to another, so use an
765 # absolute path instead
765 # absolute path instead
766 libdir = self.install_lib
766 libdir = self.install_lib
767 else:
767 else:
768 common = os.path.commonprefix((self.install_dir, self.install_lib))
768 common = os.path.commonprefix((self.install_dir, self.install_lib))
769 rest = self.install_dir[len(common):]
769 rest = self.install_dir[len(common):]
770 uplevel = len([n for n in os.path.split(rest) if n])
770 uplevel = len([n for n in os.path.split(rest) if n])
771
771
772 libdir = uplevel * ('..' + os.sep) + self.install_lib[len(common):]
772 libdir = uplevel * ('..' + os.sep) + self.install_lib[len(common):]
773
773
774 for outfile in self.outfiles:
774 for outfile in self.outfiles:
775 with open(outfile, 'rb') as fp:
775 with open(outfile, 'rb') as fp:
776 data = fp.read()
776 data = fp.read()
777
777
778 # skip binary files
778 # skip binary files
779 if b'\0' in data:
779 if b'\0' in data:
780 continue
780 continue
781
781
782 # During local installs, the shebang will be rewritten to the final
782 # During local installs, the shebang will be rewritten to the final
783 # install path. During wheel packaging, the shebang has a special
783 # install path. During wheel packaging, the shebang has a special
784 # value.
784 # value.
785 if data.startswith(b'#!python'):
785 if data.startswith(b'#!python'):
786 log.info('not rewriting @LIBDIR@ in %s because install path '
786 log.info('not rewriting @LIBDIR@ in %s because install path '
787 'not known' % outfile)
787 'not known' % outfile)
788 continue
788 continue
789
789
790 data = data.replace(b'@LIBDIR@', libdir.encode(libdir_escape))
790 data = data.replace(b'@LIBDIR@', libdir.encode(libdir_escape))
791 with open(outfile, 'wb') as fp:
791 with open(outfile, 'wb') as fp:
792 fp.write(data)
792 fp.write(data)
793
793
794 cmdclass = {'build': hgbuild,
794 cmdclass = {'build': hgbuild,
795 'build_mo': hgbuildmo,
795 'build_mo': hgbuildmo,
796 'build_ext': hgbuildext,
796 'build_ext': hgbuildext,
797 'build_py': hgbuildpy,
797 'build_py': hgbuildpy,
798 'build_scripts': hgbuildscripts,
798 'build_scripts': hgbuildscripts,
799 'build_hgextindex': buildhgextindex,
799 'build_hgextindex': buildhgextindex,
800 'install': hginstall,
800 'install': hginstall,
801 'install_lib': hginstalllib,
801 'install_lib': hginstalllib,
802 'install_scripts': hginstallscripts,
802 'install_scripts': hginstallscripts,
803 'build_hgexe': buildhgexe,
803 'build_hgexe': buildhgexe,
804 }
804 }
805
805
806 packages = ['mercurial',
806 packages = ['mercurial',
807 'mercurial.cext',
807 'mercurial.cext',
808 'mercurial.cffi',
808 'mercurial.cffi',
809 'mercurial.hgweb',
809 'mercurial.hgweb',
810 'mercurial.pure',
810 'mercurial.pure',
811 'mercurial.thirdparty',
811 'mercurial.thirdparty',
812 'mercurial.thirdparty.attr',
812 'mercurial.thirdparty.attr',
813 'mercurial.utils',
813 'mercurial.utils',
814 'hgext', 'hgext.convert', 'hgext.fsmonitor',
814 'hgext', 'hgext.convert', 'hgext.fsmonitor',
815 'hgext.fsmonitor.pywatchman', 'hgext.highlight',
815 'hgext.fsmonitor.pywatchman', 'hgext.highlight',
816 'hgext.largefiles', 'hgext.lfs', 'hgext.narrow',
816 'hgext.largefiles', 'hgext.lfs', 'hgext.narrow',
817 'hgext.zeroconf', 'hgext3rd',
817 'hgext.zeroconf', 'hgext3rd',
818 'hgdemandimport']
818 'hgdemandimport']
819
819
820 common_depends = ['mercurial/bitmanipulation.h',
820 common_depends = ['mercurial/bitmanipulation.h',
821 'mercurial/compat.h',
821 'mercurial/compat.h',
822 'mercurial/cext/util.h']
822 'mercurial/cext/util.h']
823 common_include_dirs = ['mercurial']
823 common_include_dirs = ['mercurial']
824
824
825 osutil_cflags = []
825 osutil_cflags = []
826 osutil_ldflags = []
826 osutil_ldflags = []
827
827
828 # platform specific macros
828 # platform specific macros
829 for plat, func in [('bsd', 'setproctitle')]:
829 for plat, func in [('bsd', 'setproctitle')]:
830 if re.search(plat, sys.platform) and hasfunction(new_compiler(), func):
830 if re.search(plat, sys.platform) and hasfunction(new_compiler(), func):
831 osutil_cflags.append('-DHAVE_%s' % func.upper())
831 osutil_cflags.append('-DHAVE_%s' % func.upper())
832
832
833 for plat, macro, code in [
833 for plat, macro, code in [
834 ('bsd|darwin', 'BSD_STATFS', '''
834 ('bsd|darwin', 'BSD_STATFS', '''
835 #include <sys/param.h>
835 #include <sys/param.h>
836 #include <sys/mount.h>
836 #include <sys/mount.h>
837 int main() { struct statfs s; return sizeof(s.f_fstypename); }
837 int main() { struct statfs s; return sizeof(s.f_fstypename); }
838 '''),
838 '''),
839 ('linux', 'LINUX_STATFS', '''
839 ('linux', 'LINUX_STATFS', '''
840 #include <linux/magic.h>
840 #include <linux/magic.h>
841 #include <sys/vfs.h>
841 #include <sys/vfs.h>
842 int main() { struct statfs s; return sizeof(s.f_type); }
842 int main() { struct statfs s; return sizeof(s.f_type); }
843 '''),
843 '''),
844 ]:
844 ]:
845 if re.search(plat, sys.platform) and cancompile(new_compiler(), code):
845 if re.search(plat, sys.platform) and cancompile(new_compiler(), code):
846 osutil_cflags.append('-DHAVE_%s' % macro)
846 osutil_cflags.append('-DHAVE_%s' % macro)
847
847
848 if sys.platform == 'darwin':
848 if sys.platform == 'darwin':
849 osutil_ldflags += ['-framework', 'ApplicationServices']
849 osutil_ldflags += ['-framework', 'ApplicationServices']
850
850
851 xdiff_srcs = [
851 xdiff_srcs = [
852 'mercurial/thirdparty/xdiff/xdiffi.c',
852 'mercurial/thirdparty/xdiff/xdiffi.c',
853 'mercurial/thirdparty/xdiff/xemit.c',
854 'mercurial/thirdparty/xdiff/xmerge.c',
853 'mercurial/thirdparty/xdiff/xmerge.c',
855 'mercurial/thirdparty/xdiff/xprepare.c',
854 'mercurial/thirdparty/xdiff/xprepare.c',
856 'mercurial/thirdparty/xdiff/xutils.c',
855 'mercurial/thirdparty/xdiff/xutils.c',
857 ]
856 ]
858
857
859 xdiff_headers = [
858 xdiff_headers = [
860 'mercurial/thirdparty/xdiff/xdiff.h',
859 'mercurial/thirdparty/xdiff/xdiff.h',
861 'mercurial/thirdparty/xdiff/xdiffi.h',
860 'mercurial/thirdparty/xdiff/xdiffi.h',
862 'mercurial/thirdparty/xdiff/xemit.h',
863 'mercurial/thirdparty/xdiff/xinclude.h',
861 'mercurial/thirdparty/xdiff/xinclude.h',
864 'mercurial/thirdparty/xdiff/xmacros.h',
862 'mercurial/thirdparty/xdiff/xmacros.h',
865 'mercurial/thirdparty/xdiff/xprepare.h',
863 'mercurial/thirdparty/xdiff/xprepare.h',
866 'mercurial/thirdparty/xdiff/xtypes.h',
864 'mercurial/thirdparty/xdiff/xtypes.h',
867 'mercurial/thirdparty/xdiff/xutils.h',
865 'mercurial/thirdparty/xdiff/xutils.h',
868 ]
866 ]
869
867
870 extmodules = [
868 extmodules = [
871 Extension('mercurial.cext.base85', ['mercurial/cext/base85.c'],
869 Extension('mercurial.cext.base85', ['mercurial/cext/base85.c'],
872 include_dirs=common_include_dirs,
870 include_dirs=common_include_dirs,
873 depends=common_depends),
871 depends=common_depends),
874 Extension('mercurial.cext.bdiff', ['mercurial/bdiff.c',
872 Extension('mercurial.cext.bdiff', ['mercurial/bdiff.c',
875 'mercurial/cext/bdiff.c'] + xdiff_srcs,
873 'mercurial/cext/bdiff.c'] + xdiff_srcs,
876 include_dirs=common_include_dirs,
874 include_dirs=common_include_dirs,
877 depends=common_depends + ['mercurial/bdiff.h'] + xdiff_headers),
875 depends=common_depends + ['mercurial/bdiff.h'] + xdiff_headers),
878 Extension('mercurial.cext.diffhelpers', ['mercurial/cext/diffhelpers.c'],
876 Extension('mercurial.cext.diffhelpers', ['mercurial/cext/diffhelpers.c'],
879 include_dirs=common_include_dirs,
877 include_dirs=common_include_dirs,
880 depends=common_depends),
878 depends=common_depends),
881 Extension('mercurial.cext.mpatch', ['mercurial/mpatch.c',
879 Extension('mercurial.cext.mpatch', ['mercurial/mpatch.c',
882 'mercurial/cext/mpatch.c'],
880 'mercurial/cext/mpatch.c'],
883 include_dirs=common_include_dirs,
881 include_dirs=common_include_dirs,
884 depends=common_depends),
882 depends=common_depends),
885 Extension('mercurial.cext.parsers', ['mercurial/cext/charencode.c',
883 Extension('mercurial.cext.parsers', ['mercurial/cext/charencode.c',
886 'mercurial/cext/dirs.c',
884 'mercurial/cext/dirs.c',
887 'mercurial/cext/manifest.c',
885 'mercurial/cext/manifest.c',
888 'mercurial/cext/parsers.c',
886 'mercurial/cext/parsers.c',
889 'mercurial/cext/pathencode.c',
887 'mercurial/cext/pathencode.c',
890 'mercurial/cext/revlog.c'],
888 'mercurial/cext/revlog.c'],
891 include_dirs=common_include_dirs,
889 include_dirs=common_include_dirs,
892 depends=common_depends + ['mercurial/cext/charencode.h']),
890 depends=common_depends + ['mercurial/cext/charencode.h']),
893 Extension('mercurial.cext.osutil', ['mercurial/cext/osutil.c'],
891 Extension('mercurial.cext.osutil', ['mercurial/cext/osutil.c'],
894 include_dirs=common_include_dirs,
892 include_dirs=common_include_dirs,
895 extra_compile_args=osutil_cflags,
893 extra_compile_args=osutil_cflags,
896 extra_link_args=osutil_ldflags,
894 extra_link_args=osutil_ldflags,
897 depends=common_depends),
895 depends=common_depends),
898 Extension('hgext.fsmonitor.pywatchman.bser',
896 Extension('hgext.fsmonitor.pywatchman.bser',
899 ['hgext/fsmonitor/pywatchman/bser.c']),
897 ['hgext/fsmonitor/pywatchman/bser.c']),
900 ]
898 ]
901
899
902 sys.path.insert(0, 'contrib/python-zstandard')
900 sys.path.insert(0, 'contrib/python-zstandard')
903 import setup_zstd
901 import setup_zstd
904 extmodules.append(setup_zstd.get_c_extension(name='mercurial.zstd'))
902 extmodules.append(setup_zstd.get_c_extension(name='mercurial.zstd'))
905
903
906 try:
904 try:
907 from distutils import cygwinccompiler
905 from distutils import cygwinccompiler
908
906
909 # the -mno-cygwin option has been deprecated for years
907 # the -mno-cygwin option has been deprecated for years
910 mingw32compilerclass = cygwinccompiler.Mingw32CCompiler
908 mingw32compilerclass = cygwinccompiler.Mingw32CCompiler
911
909
912 class HackedMingw32CCompiler(cygwinccompiler.Mingw32CCompiler):
910 class HackedMingw32CCompiler(cygwinccompiler.Mingw32CCompiler):
913 def __init__(self, *args, **kwargs):
911 def __init__(self, *args, **kwargs):
914 mingw32compilerclass.__init__(self, *args, **kwargs)
912 mingw32compilerclass.__init__(self, *args, **kwargs)
915 for i in 'compiler compiler_so linker_exe linker_so'.split():
913 for i in 'compiler compiler_so linker_exe linker_so'.split():
916 try:
914 try:
917 getattr(self, i).remove('-mno-cygwin')
915 getattr(self, i).remove('-mno-cygwin')
918 except ValueError:
916 except ValueError:
919 pass
917 pass
920
918
921 cygwinccompiler.Mingw32CCompiler = HackedMingw32CCompiler
919 cygwinccompiler.Mingw32CCompiler = HackedMingw32CCompiler
922 except ImportError:
920 except ImportError:
923 # the cygwinccompiler package is not available on some Python
921 # the cygwinccompiler package is not available on some Python
924 # distributions like the ones from the optware project for Synology
922 # distributions like the ones from the optware project for Synology
925 # DiskStation boxes
923 # DiskStation boxes
926 class HackedMingw32CCompiler(object):
924 class HackedMingw32CCompiler(object):
927 pass
925 pass
928
926
929 if os.name == 'nt':
927 if os.name == 'nt':
930 # Allow compiler/linker flags to be added to Visual Studio builds. Passing
928 # Allow compiler/linker flags to be added to Visual Studio builds. Passing
931 # extra_link_args to distutils.extensions.Extension() doesn't have any
929 # extra_link_args to distutils.extensions.Extension() doesn't have any
932 # effect.
930 # effect.
933 from distutils import msvccompiler
931 from distutils import msvccompiler
934
932
935 msvccompilerclass = msvccompiler.MSVCCompiler
933 msvccompilerclass = msvccompiler.MSVCCompiler
936
934
937 class HackedMSVCCompiler(msvccompiler.MSVCCompiler):
935 class HackedMSVCCompiler(msvccompiler.MSVCCompiler):
938 def initialize(self):
936 def initialize(self):
939 msvccompilerclass.initialize(self)
937 msvccompilerclass.initialize(self)
940 # "warning LNK4197: export 'func' specified multiple times"
938 # "warning LNK4197: export 'func' specified multiple times"
941 self.ldflags_shared.append('/ignore:4197')
939 self.ldflags_shared.append('/ignore:4197')
942 self.ldflags_shared_debug.append('/ignore:4197')
940 self.ldflags_shared_debug.append('/ignore:4197')
943
941
944 msvccompiler.MSVCCompiler = HackedMSVCCompiler
942 msvccompiler.MSVCCompiler = HackedMSVCCompiler
945
943
946 packagedata = {'mercurial': ['locale/*/LC_MESSAGES/hg.mo',
944 packagedata = {'mercurial': ['locale/*/LC_MESSAGES/hg.mo',
947 'help/*.txt',
945 'help/*.txt',
948 'help/internals/*.txt',
946 'help/internals/*.txt',
949 'default.d/*.rc',
947 'default.d/*.rc',
950 'dummycert.pem']}
948 'dummycert.pem']}
951
949
952 def ordinarypath(p):
950 def ordinarypath(p):
953 return p and p[0] != '.' and p[-1] != '~'
951 return p and p[0] != '.' and p[-1] != '~'
954
952
955 for root in ('templates',):
953 for root in ('templates',):
956 for curdir, dirs, files in os.walk(os.path.join('mercurial', root)):
954 for curdir, dirs, files in os.walk(os.path.join('mercurial', root)):
957 curdir = curdir.split(os.sep, 1)[1]
955 curdir = curdir.split(os.sep, 1)[1]
958 dirs[:] = filter(ordinarypath, dirs)
956 dirs[:] = filter(ordinarypath, dirs)
959 for f in filter(ordinarypath, files):
957 for f in filter(ordinarypath, files):
960 f = os.path.join(curdir, f)
958 f = os.path.join(curdir, f)
961 packagedata['mercurial'].append(f)
959 packagedata['mercurial'].append(f)
962
960
963 datafiles = []
961 datafiles = []
964
962
965 # distutils expects version to be str/unicode. Converting it to
963 # distutils expects version to be str/unicode. Converting it to
966 # unicode on Python 2 still works because it won't contain any
964 # unicode on Python 2 still works because it won't contain any
967 # non-ascii bytes and will be implicitly converted back to bytes
965 # non-ascii bytes and will be implicitly converted back to bytes
968 # when operated on.
966 # when operated on.
969 assert isinstance(version, bytes)
967 assert isinstance(version, bytes)
970 setupversion = version.decode('ascii')
968 setupversion = version.decode('ascii')
971
969
972 extra = {}
970 extra = {}
973
971
974 if issetuptools:
972 if issetuptools:
975 extra['python_requires'] = supportedpy
973 extra['python_requires'] = supportedpy
976 if py2exeloaded:
974 if py2exeloaded:
977 extra['console'] = [
975 extra['console'] = [
978 {'script':'hg',
976 {'script':'hg',
979 'copyright':'Copyright (C) 2005-2018 Matt Mackall and others',
977 'copyright':'Copyright (C) 2005-2018 Matt Mackall and others',
980 'product_version':version}]
978 'product_version':version}]
981 # sub command of 'build' because 'py2exe' does not handle sub_commands
979 # sub command of 'build' because 'py2exe' does not handle sub_commands
982 build.sub_commands.insert(0, ('build_hgextindex', None))
980 build.sub_commands.insert(0, ('build_hgextindex', None))
983 # put dlls in sub directory so that they won't pollute PATH
981 # put dlls in sub directory so that they won't pollute PATH
984 extra['zipfile'] = 'lib/library.zip'
982 extra['zipfile'] = 'lib/library.zip'
985
983
986 if os.name == 'nt':
984 if os.name == 'nt':
987 # Windows binary file versions for exe/dll files must have the
985 # Windows binary file versions for exe/dll files must have the
988 # form W.X.Y.Z, where W,X,Y,Z are numbers in the range 0..65535
986 # form W.X.Y.Z, where W,X,Y,Z are numbers in the range 0..65535
989 setupversion = version.split('+', 1)[0]
987 setupversion = version.split('+', 1)[0]
990
988
991 if sys.platform == 'darwin' and os.path.exists('/usr/bin/xcodebuild'):
989 if sys.platform == 'darwin' and os.path.exists('/usr/bin/xcodebuild'):
992 version = runcmd(['/usr/bin/xcodebuild', '-version'], {})[1].splitlines()
990 version = runcmd(['/usr/bin/xcodebuild', '-version'], {})[1].splitlines()
993 if version:
991 if version:
994 version = version[0]
992 version = version[0]
995 if sys.version_info[0] == 3:
993 if sys.version_info[0] == 3:
996 version = version.decode('utf-8')
994 version = version.decode('utf-8')
997 xcode4 = (version.startswith('Xcode') and
995 xcode4 = (version.startswith('Xcode') and
998 StrictVersion(version.split()[1]) >= StrictVersion('4.0'))
996 StrictVersion(version.split()[1]) >= StrictVersion('4.0'))
999 xcode51 = re.match(r'^Xcode\s+5\.1', version) is not None
997 xcode51 = re.match(r'^Xcode\s+5\.1', version) is not None
1000 else:
998 else:
1001 # xcodebuild returns empty on OS X Lion with XCode 4.3 not
999 # xcodebuild returns empty on OS X Lion with XCode 4.3 not
1002 # installed, but instead with only command-line tools. Assume
1000 # installed, but instead with only command-line tools. Assume
1003 # that only happens on >= Lion, thus no PPC support.
1001 # that only happens on >= Lion, thus no PPC support.
1004 xcode4 = True
1002 xcode4 = True
1005 xcode51 = False
1003 xcode51 = False
1006
1004
1007 # XCode 4.0 dropped support for ppc architecture, which is hardcoded in
1005 # XCode 4.0 dropped support for ppc architecture, which is hardcoded in
1008 # distutils.sysconfig
1006 # distutils.sysconfig
1009 if xcode4:
1007 if xcode4:
1010 os.environ['ARCHFLAGS'] = ''
1008 os.environ['ARCHFLAGS'] = ''
1011
1009
1012 # XCode 5.1 changes clang such that it now fails to compile if the
1010 # XCode 5.1 changes clang such that it now fails to compile if the
1013 # -mno-fused-madd flag is passed, but the version of Python shipped with
1011 # -mno-fused-madd flag is passed, but the version of Python shipped with
1014 # OS X 10.9 Mavericks includes this flag. This causes problems in all
1012 # OS X 10.9 Mavericks includes this flag. This causes problems in all
1015 # C extension modules, and a bug has been filed upstream at
1013 # C extension modules, and a bug has been filed upstream at
1016 # http://bugs.python.org/issue21244. We also need to patch this here
1014 # http://bugs.python.org/issue21244. We also need to patch this here
1017 # so Mercurial can continue to compile in the meantime.
1015 # so Mercurial can continue to compile in the meantime.
1018 if xcode51:
1016 if xcode51:
1019 cflags = get_config_var('CFLAGS')
1017 cflags = get_config_var('CFLAGS')
1020 if cflags and re.search(r'-mno-fused-madd\b', cflags) is not None:
1018 if cflags and re.search(r'-mno-fused-madd\b', cflags) is not None:
1021 os.environ['CFLAGS'] = (
1019 os.environ['CFLAGS'] = (
1022 os.environ.get('CFLAGS', '') + ' -Qunused-arguments')
1020 os.environ.get('CFLAGS', '') + ' -Qunused-arguments')
1023
1021
1024 setup(name='mercurial',
1022 setup(name='mercurial',
1025 version=setupversion,
1023 version=setupversion,
1026 author='Matt Mackall and many others',
1024 author='Matt Mackall and many others',
1027 author_email='mercurial@mercurial-scm.org',
1025 author_email='mercurial@mercurial-scm.org',
1028 url='https://mercurial-scm.org/',
1026 url='https://mercurial-scm.org/',
1029 download_url='https://mercurial-scm.org/release/',
1027 download_url='https://mercurial-scm.org/release/',
1030 description=('Fast scalable distributed SCM (revision control, version '
1028 description=('Fast scalable distributed SCM (revision control, version '
1031 'control) system'),
1029 'control) system'),
1032 long_description=('Mercurial is a distributed SCM tool written in Python.'
1030 long_description=('Mercurial is a distributed SCM tool written in Python.'
1033 ' It is used by a number of large projects that require'
1031 ' It is used by a number of large projects that require'
1034 ' fast, reliable distributed revision control, such as '
1032 ' fast, reliable distributed revision control, such as '
1035 'Mozilla.'),
1033 'Mozilla.'),
1036 license='GNU GPLv2 or any later version',
1034 license='GNU GPLv2 or any later version',
1037 classifiers=[
1035 classifiers=[
1038 'Development Status :: 6 - Mature',
1036 'Development Status :: 6 - Mature',
1039 'Environment :: Console',
1037 'Environment :: Console',
1040 'Intended Audience :: Developers',
1038 'Intended Audience :: Developers',
1041 'Intended Audience :: System Administrators',
1039 'Intended Audience :: System Administrators',
1042 'License :: OSI Approved :: GNU General Public License (GPL)',
1040 'License :: OSI Approved :: GNU General Public License (GPL)',
1043 'Natural Language :: Danish',
1041 'Natural Language :: Danish',
1044 'Natural Language :: English',
1042 'Natural Language :: English',
1045 'Natural Language :: German',
1043 'Natural Language :: German',
1046 'Natural Language :: Italian',
1044 'Natural Language :: Italian',
1047 'Natural Language :: Japanese',
1045 'Natural Language :: Japanese',
1048 'Natural Language :: Portuguese (Brazilian)',
1046 'Natural Language :: Portuguese (Brazilian)',
1049 'Operating System :: Microsoft :: Windows',
1047 'Operating System :: Microsoft :: Windows',
1050 'Operating System :: OS Independent',
1048 'Operating System :: OS Independent',
1051 'Operating System :: POSIX',
1049 'Operating System :: POSIX',
1052 'Programming Language :: C',
1050 'Programming Language :: C',
1053 'Programming Language :: Python',
1051 'Programming Language :: Python',
1054 'Topic :: Software Development :: Version Control',
1052 'Topic :: Software Development :: Version Control',
1055 ],
1053 ],
1056 scripts=scripts,
1054 scripts=scripts,
1057 packages=packages,
1055 packages=packages,
1058 ext_modules=extmodules,
1056 ext_modules=extmodules,
1059 data_files=datafiles,
1057 data_files=datafiles,
1060 package_data=packagedata,
1058 package_data=packagedata,
1061 cmdclass=cmdclass,
1059 cmdclass=cmdclass,
1062 distclass=hgdist,
1060 distclass=hgdist,
1063 options={'py2exe': {'packages': ['hgdemandimport', 'hgext', 'email',
1061 options={'py2exe': {'packages': ['hgdemandimport', 'hgext', 'email',
1064 # implicitly imported per module policy
1062 # implicitly imported per module policy
1065 # (cffi wouldn't be used as a frozen exe)
1063 # (cffi wouldn't be used as a frozen exe)
1066 'mercurial.cext',
1064 'mercurial.cext',
1067 #'mercurial.cffi',
1065 #'mercurial.cffi',
1068 'mercurial.pure']},
1066 'mercurial.pure']},
1069 'bdist_mpkg': {'zipdist': False,
1067 'bdist_mpkg': {'zipdist': False,
1070 'license': 'COPYING',
1068 'license': 'COPYING',
1071 'readme': 'contrib/macosx/Readme.html',
1069 'readme': 'contrib/macosx/Readme.html',
1072 'welcome': 'contrib/macosx/Welcome.html',
1070 'welcome': 'contrib/macosx/Welcome.html',
1073 },
1071 },
1074 },
1072 },
1075 **extra)
1073 **extra)
1 NO CONTENT: file was removed
NO CONTENT: file was removed
1 NO CONTENT: file was removed
NO CONTENT: file was removed
General Comments 0
You need to be logged in to leave comments. Login now