Show More
@@ -0,0 +1,150 b'' | |||||
|
1 | /* | |||
|
2 | * LibXDiff by Davide Libenzi ( File Differential Library ) | |||
|
3 | * Copyright (C) 2003 Davide Libenzi | |||
|
4 | * | |||
|
5 | * This library is free software; you can redistribute it and/or | |||
|
6 | * modify it under the terms of the GNU Lesser General Public | |||
|
7 | * License as published by the Free Software Foundation; either | |||
|
8 | * version 2.1 of the License, or (at your option) any later version. | |||
|
9 | * | |||
|
10 | * This library is distributed in the hope that it will be useful, | |||
|
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
|
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
|
13 | * Lesser General Public License for more details. | |||
|
14 | * | |||
|
15 | * You should have received a copy of the GNU Lesser General Public | |||
|
16 | * License along with this library; if not, see | |||
|
17 | * <http://www.gnu.org/licenses/>. | |||
|
18 | * | |||
|
19 | * Davide Libenzi <davidel@xmailserver.org> | |||
|
20 | * | |||
|
21 | */ | |||
|
22 | ||||
|
23 | #if !defined(XDIFF_H) | |||
|
24 | #define XDIFF_H | |||
|
25 | ||||
|
26 | #ifdef __cplusplus | |||
|
27 | extern "C" { | |||
|
28 | #endif /* #ifdef __cplusplus */ | |||
|
29 | ||||
|
30 | /* xpparm_t.flags */ | |||
|
31 | #define XDF_NEED_MINIMAL (1 << 0) | |||
|
32 | ||||
|
33 | #define XDF_IGNORE_WHITESPACE (1 << 1) | |||
|
34 | #define XDF_IGNORE_WHITESPACE_CHANGE (1 << 2) | |||
|
35 | #define XDF_IGNORE_WHITESPACE_AT_EOL (1 << 3) | |||
|
36 | #define XDF_IGNORE_CR_AT_EOL (1 << 4) | |||
|
37 | #define XDF_WHITESPACE_FLAGS (XDF_IGNORE_WHITESPACE | \ | |||
|
38 | XDF_IGNORE_WHITESPACE_CHANGE | \ | |||
|
39 | XDF_IGNORE_WHITESPACE_AT_EOL | \ | |||
|
40 | XDF_IGNORE_CR_AT_EOL) | |||
|
41 | ||||
|
42 | #define XDF_IGNORE_BLANK_LINES (1 << 7) | |||
|
43 | ||||
|
44 | #define XDF_PATIENCE_DIFF (1 << 14) | |||
|
45 | #define XDF_HISTOGRAM_DIFF (1 << 15) | |||
|
46 | #define XDF_DIFF_ALGORITHM_MASK (XDF_PATIENCE_DIFF | XDF_HISTOGRAM_DIFF) | |||
|
47 | #define XDF_DIFF_ALG(x) ((x) & XDF_DIFF_ALGORITHM_MASK) | |||
|
48 | ||||
|
49 | #define XDF_INDENT_HEURISTIC (1 << 23) | |||
|
50 | ||||
|
51 | /* xdemitconf_t.flags */ | |||
|
52 | #define XDL_EMIT_FUNCNAMES (1 << 0) | |||
|
53 | #define XDL_EMIT_FUNCCONTEXT (1 << 2) | |||
|
54 | ||||
|
55 | #define XDL_MMB_READONLY (1 << 0) | |||
|
56 | ||||
|
57 | #define XDL_MMF_ATOMIC (1 << 0) | |||
|
58 | ||||
|
59 | #define XDL_BDOP_INS 1 | |||
|
60 | #define XDL_BDOP_CPY 2 | |||
|
61 | #define XDL_BDOP_INSB 3 | |||
|
62 | ||||
|
63 | /* merge simplification levels */ | |||
|
64 | #define XDL_MERGE_MINIMAL 0 | |||
|
65 | #define XDL_MERGE_EAGER 1 | |||
|
66 | #define XDL_MERGE_ZEALOUS 2 | |||
|
67 | #define XDL_MERGE_ZEALOUS_ALNUM 3 | |||
|
68 | ||||
|
69 | /* merge favor modes */ | |||
|
70 | #define XDL_MERGE_FAVOR_OURS 1 | |||
|
71 | #define XDL_MERGE_FAVOR_THEIRS 2 | |||
|
72 | #define XDL_MERGE_FAVOR_UNION 3 | |||
|
73 | ||||
|
74 | /* merge output styles */ | |||
|
75 | #define XDL_MERGE_DIFF3 1 | |||
|
76 | ||||
|
77 | typedef struct s_mmfile { | |||
|
78 | char *ptr; | |||
|
79 | long size; | |||
|
80 | } mmfile_t; | |||
|
81 | ||||
|
82 | typedef struct s_mmbuffer { | |||
|
83 | char *ptr; | |||
|
84 | long size; | |||
|
85 | } mmbuffer_t; | |||
|
86 | ||||
|
87 | typedef struct s_xpparam { | |||
|
88 | unsigned long flags; | |||
|
89 | ||||
|
90 | /* See Documentation/diff-options.txt. */ | |||
|
91 | char **anchors; | |||
|
92 | size_t anchors_nr; | |||
|
93 | } xpparam_t; | |||
|
94 | ||||
|
95 | typedef struct s_xdemitcb { | |||
|
96 | void *priv; | |||
|
97 | int (*outf)(void *, mmbuffer_t *, int); | |||
|
98 | } xdemitcb_t; | |||
|
99 | ||||
|
100 | typedef long (*find_func_t)(const char *line, long line_len, char *buffer, long buffer_size, void *priv); | |||
|
101 | ||||
|
102 | typedef int (*xdl_emit_hunk_consume_func_t)(long start_a, long count_a, | |||
|
103 | long start_b, long count_b, | |||
|
104 | void *cb_data); | |||
|
105 | ||||
|
106 | typedef struct s_xdemitconf { | |||
|
107 | long ctxlen; | |||
|
108 | long interhunkctxlen; | |||
|
109 | unsigned long flags; | |||
|
110 | find_func_t find_func; | |||
|
111 | void *find_func_priv; | |||
|
112 | xdl_emit_hunk_consume_func_t hunk_func; | |||
|
113 | } xdemitconf_t; | |||
|
114 | ||||
|
115 | typedef struct s_bdiffparam { | |||
|
116 | long bsize; | |||
|
117 | } bdiffparam_t; | |||
|
118 | ||||
|
119 | ||||
|
120 | #define xdl_malloc(x) malloc(x) | |||
|
121 | #define xdl_free(ptr) free(ptr) | |||
|
122 | #define xdl_realloc(ptr,x) realloc(ptr,x) | |||
|
123 | ||||
|
124 | void *xdl_mmfile_first(mmfile_t *mmf, long *size); | |||
|
125 | long xdl_mmfile_size(mmfile_t *mmf); | |||
|
126 | ||||
|
127 | int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, | |||
|
128 | xdemitconf_t const *xecfg, xdemitcb_t *ecb); | |||
|
129 | ||||
|
130 | typedef struct s_xmparam { | |||
|
131 | xpparam_t xpp; | |||
|
132 | int marker_size; | |||
|
133 | int level; | |||
|
134 | int favor; | |||
|
135 | int style; | |||
|
136 | const char *ancestor; /* label for orig */ | |||
|
137 | const char *file1; /* label for mf1 */ | |||
|
138 | const char *file2; /* label for mf2 */ | |||
|
139 | } xmparam_t; | |||
|
140 | ||||
|
141 | #define DEFAULT_CONFLICT_MARKER_SIZE 7 | |||
|
142 | ||||
|
143 | int xdl_merge(mmfile_t *orig, mmfile_t *mf1, mmfile_t *mf2, | |||
|
144 | xmparam_t const *xmp, mmbuffer_t *result); | |||
|
145 | ||||
|
146 | #ifdef __cplusplus | |||
|
147 | } | |||
|
148 | #endif /* #ifdef __cplusplus */ | |||
|
149 | ||||
|
150 | #endif /* #if !defined(XDIFF_H) */ |
This diff has been collapsed as it changes many lines, (1050 lines changed) Show them Hide them | |||||
@@ -0,0 +1,1050 b'' | |||||
|
1 | /* | |||
|
2 | * LibXDiff by Davide Libenzi ( File Differential Library ) | |||
|
3 | * Copyright (C) 2003 Davide Libenzi | |||
|
4 | * | |||
|
5 | * This library is free software; you can redistribute it and/or | |||
|
6 | * modify it under the terms of the GNU Lesser General Public | |||
|
7 | * License as published by the Free Software Foundation; either | |||
|
8 | * version 2.1 of the License, or (at your option) any later version. | |||
|
9 | * | |||
|
10 | * This library is distributed in the hope that it will be useful, | |||
|
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
|
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
|
13 | * Lesser General Public License for more details. | |||
|
14 | * | |||
|
15 | * You should have received a copy of the GNU Lesser General Public | |||
|
16 | * License along with this library; if not, see | |||
|
17 | * <http://www.gnu.org/licenses/>. | |||
|
18 | * | |||
|
19 | * Davide Libenzi <davidel@xmailserver.org> | |||
|
20 | * | |||
|
21 | */ | |||
|
22 | ||||
|
23 | #include "xinclude.h" | |||
|
24 | ||||
|
25 | ||||
|
26 | ||||
|
27 | #define XDL_MAX_COST_MIN 256 | |||
|
28 | #define XDL_HEUR_MIN_COST 256 | |||
|
29 | #define XDL_LINE_MAX (long)((1UL << (CHAR_BIT * sizeof(long) - 1)) - 1) | |||
|
30 | #define XDL_SNAKE_CNT 20 | |||
|
31 | #define XDL_K_HEUR 4 | |||
|
32 | ||||
|
33 | ||||
|
34 | ||||
|
35 | typedef struct s_xdpsplit { | |||
|
36 | long i1, i2; | |||
|
37 | int min_lo, min_hi; | |||
|
38 | } xdpsplit_t; | |||
|
39 | ||||
|
40 | ||||
|
41 | ||||
|
42 | ||||
|
43 | static long xdl_split(unsigned long const *ha1, long off1, long lim1, | |||
|
44 | unsigned long const *ha2, long off2, long lim2, | |||
|
45 | long *kvdf, long *kvdb, int need_min, xdpsplit_t *spl, | |||
|
46 | xdalgoenv_t *xenv); | |||
|
47 | static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, long chg2); | |||
|
48 | ||||
|
49 | ||||
|
50 | ||||
|
51 | ||||
|
52 | ||||
|
53 | /* | |||
|
54 | * See "An O(ND) Difference Algorithm and its Variations", by Eugene Myers. | |||
|
55 | * Basically considers a "box" (off1, off2, lim1, lim2) and scan from both | |||
|
56 | * the forward diagonal starting from (off1, off2) and the backward diagonal | |||
|
57 | * starting from (lim1, lim2). If the K values on the same diagonal crosses | |||
|
58 | * returns the furthest point of reach. We might end up having to expensive | |||
|
59 | * cases using this algorithm is full, so a little bit of heuristic is needed | |||
|
60 | * to cut the search and to return a suboptimal point. | |||
|
61 | */ | |||
|
62 | static long xdl_split(unsigned long const *ha1, long off1, long lim1, | |||
|
63 | unsigned long const *ha2, long off2, long lim2, | |||
|
64 | long *kvdf, long *kvdb, int need_min, xdpsplit_t *spl, | |||
|
65 | xdalgoenv_t *xenv) { | |||
|
66 | long dmin = off1 - lim2, dmax = lim1 - off2; | |||
|
67 | long fmid = off1 - off2, bmid = lim1 - lim2; | |||
|
68 | long odd = (fmid - bmid) & 1; | |||
|
69 | long fmin = fmid, fmax = fmid; | |||
|
70 | long bmin = bmid, bmax = bmid; | |||
|
71 | long ec, d, i1, i2, prev1, best, dd, v, k; | |||
|
72 | ||||
|
73 | /* | |||
|
74 | * Set initial diagonal values for both forward and backward path. | |||
|
75 | */ | |||
|
76 | kvdf[fmid] = off1; | |||
|
77 | kvdb[bmid] = lim1; | |||
|
78 | ||||
|
79 | for (ec = 1;; ec++) { | |||
|
80 | int got_snake = 0; | |||
|
81 | ||||
|
82 | /* | |||
|
83 | * We need to extent the diagonal "domain" by one. If the next | |||
|
84 | * values exits the box boundaries we need to change it in the | |||
|
85 | * opposite direction because (max - min) must be a power of two. | |||
|
86 | * Also we initialize the external K value to -1 so that we can | |||
|
87 | * avoid extra conditions check inside the core loop. | |||
|
88 | */ | |||
|
89 | if (fmin > dmin) | |||
|
90 | kvdf[--fmin - 1] = -1; | |||
|
91 | else | |||
|
92 | ++fmin; | |||
|
93 | if (fmax < dmax) | |||
|
94 | kvdf[++fmax + 1] = -1; | |||
|
95 | else | |||
|
96 | --fmax; | |||
|
97 | ||||
|
98 | for (d = fmax; d >= fmin; d -= 2) { | |||
|
99 | if (kvdf[d - 1] >= kvdf[d + 1]) | |||
|
100 | i1 = kvdf[d - 1] + 1; | |||
|
101 | else | |||
|
102 | i1 = kvdf[d + 1]; | |||
|
103 | prev1 = i1; | |||
|
104 | i2 = i1 - d; | |||
|
105 | for (; i1 < lim1 && i2 < lim2 && ha1[i1] == ha2[i2]; i1++, i2++); | |||
|
106 | if (i1 - prev1 > xenv->snake_cnt) | |||
|
107 | got_snake = 1; | |||
|
108 | kvdf[d] = i1; | |||
|
109 | if (odd && bmin <= d && d <= bmax && kvdb[d] <= i1) { | |||
|
110 | spl->i1 = i1; | |||
|
111 | spl->i2 = i2; | |||
|
112 | spl->min_lo = spl->min_hi = 1; | |||
|
113 | return ec; | |||
|
114 | } | |||
|
115 | } | |||
|
116 | ||||
|
117 | /* | |||
|
118 | * We need to extent the diagonal "domain" by one. If the next | |||
|
119 | * values exits the box boundaries we need to change it in the | |||
|
120 | * opposite direction because (max - min) must be a power of two. | |||
|
121 | * Also we initialize the external K value to -1 so that we can | |||
|
122 | * avoid extra conditions check inside the core loop. | |||
|
123 | */ | |||
|
124 | if (bmin > dmin) | |||
|
125 | kvdb[--bmin - 1] = XDL_LINE_MAX; | |||
|
126 | else | |||
|
127 | ++bmin; | |||
|
128 | if (bmax < dmax) | |||
|
129 | kvdb[++bmax + 1] = XDL_LINE_MAX; | |||
|
130 | else | |||
|
131 | --bmax; | |||
|
132 | ||||
|
133 | for (d = bmax; d >= bmin; d -= 2) { | |||
|
134 | if (kvdb[d - 1] < kvdb[d + 1]) | |||
|
135 | i1 = kvdb[d - 1]; | |||
|
136 | else | |||
|
137 | i1 = kvdb[d + 1] - 1; | |||
|
138 | prev1 = i1; | |||
|
139 | i2 = i1 - d; | |||
|
140 | for (; i1 > off1 && i2 > off2 && ha1[i1 - 1] == ha2[i2 - 1]; i1--, i2--); | |||
|
141 | if (prev1 - i1 > xenv->snake_cnt) | |||
|
142 | got_snake = 1; | |||
|
143 | kvdb[d] = i1; | |||
|
144 | if (!odd && fmin <= d && d <= fmax && i1 <= kvdf[d]) { | |||
|
145 | spl->i1 = i1; | |||
|
146 | spl->i2 = i2; | |||
|
147 | spl->min_lo = spl->min_hi = 1; | |||
|
148 | return ec; | |||
|
149 | } | |||
|
150 | } | |||
|
151 | ||||
|
152 | if (need_min) | |||
|
153 | continue; | |||
|
154 | ||||
|
155 | /* | |||
|
156 | * If the edit cost is above the heuristic trigger and if | |||
|
157 | * we got a good snake, we sample current diagonals to see | |||
|
158 | * if some of the, have reached an "interesting" path. Our | |||
|
159 | * measure is a function of the distance from the diagonal | |||
|
160 | * corner (i1 + i2) penalized with the distance from the | |||
|
161 | * mid diagonal itself. If this value is above the current | |||
|
162 | * edit cost times a magic factor (XDL_K_HEUR) we consider | |||
|
163 | * it interesting. | |||
|
164 | */ | |||
|
165 | if (got_snake && ec > xenv->heur_min) { | |||
|
166 | for (best = 0, d = fmax; d >= fmin; d -= 2) { | |||
|
167 | dd = d > fmid ? d - fmid: fmid - d; | |||
|
168 | i1 = kvdf[d]; | |||
|
169 | i2 = i1 - d; | |||
|
170 | v = (i1 - off1) + (i2 - off2) - dd; | |||
|
171 | ||||
|
172 | if (v > XDL_K_HEUR * ec && v > best && | |||
|
173 | off1 + xenv->snake_cnt <= i1 && i1 < lim1 && | |||
|
174 | off2 + xenv->snake_cnt <= i2 && i2 < lim2) { | |||
|
175 | for (k = 1; ha1[i1 - k] == ha2[i2 - k]; k++) | |||
|
176 | if (k == xenv->snake_cnt) { | |||
|
177 | best = v; | |||
|
178 | spl->i1 = i1; | |||
|
179 | spl->i2 = i2; | |||
|
180 | break; | |||
|
181 | } | |||
|
182 | } | |||
|
183 | } | |||
|
184 | if (best > 0) { | |||
|
185 | spl->min_lo = 1; | |||
|
186 | spl->min_hi = 0; | |||
|
187 | return ec; | |||
|
188 | } | |||
|
189 | ||||
|
190 | for (best = 0, d = bmax; d >= bmin; d -= 2) { | |||
|
191 | dd = d > bmid ? d - bmid: bmid - d; | |||
|
192 | i1 = kvdb[d]; | |||
|
193 | i2 = i1 - d; | |||
|
194 | v = (lim1 - i1) + (lim2 - i2) - dd; | |||
|
195 | ||||
|
196 | if (v > XDL_K_HEUR * ec && v > best && | |||
|
197 | off1 < i1 && i1 <= lim1 - xenv->snake_cnt && | |||
|
198 | off2 < i2 && i2 <= lim2 - xenv->snake_cnt) { | |||
|
199 | for (k = 0; ha1[i1 + k] == ha2[i2 + k]; k++) | |||
|
200 | if (k == xenv->snake_cnt - 1) { | |||
|
201 | best = v; | |||
|
202 | spl->i1 = i1; | |||
|
203 | spl->i2 = i2; | |||
|
204 | break; | |||
|
205 | } | |||
|
206 | } | |||
|
207 | } | |||
|
208 | if (best > 0) { | |||
|
209 | spl->min_lo = 0; | |||
|
210 | spl->min_hi = 1; | |||
|
211 | return ec; | |||
|
212 | } | |||
|
213 | } | |||
|
214 | ||||
|
215 | /* | |||
|
216 | * Enough is enough. We spent too much time here and now we collect | |||
|
217 | * the furthest reaching path using the (i1 + i2) measure. | |||
|
218 | */ | |||
|
219 | if (ec >= xenv->mxcost) { | |||
|
220 | long fbest, fbest1, bbest, bbest1; | |||
|
221 | ||||
|
222 | fbest = fbest1 = -1; | |||
|
223 | for (d = fmax; d >= fmin; d -= 2) { | |||
|
224 | i1 = XDL_MIN(kvdf[d], lim1); | |||
|
225 | i2 = i1 - d; | |||
|
226 | if (lim2 < i2) | |||
|
227 | i1 = lim2 + d, i2 = lim2; | |||
|
228 | if (fbest < i1 + i2) { | |||
|
229 | fbest = i1 + i2; | |||
|
230 | fbest1 = i1; | |||
|
231 | } | |||
|
232 | } | |||
|
233 | ||||
|
234 | bbest = bbest1 = XDL_LINE_MAX; | |||
|
235 | for (d = bmax; d >= bmin; d -= 2) { | |||
|
236 | i1 = XDL_MAX(off1, kvdb[d]); | |||
|
237 | i2 = i1 - d; | |||
|
238 | if (i2 < off2) | |||
|
239 | i1 = off2 + d, i2 = off2; | |||
|
240 | if (i1 + i2 < bbest) { | |||
|
241 | bbest = i1 + i2; | |||
|
242 | bbest1 = i1; | |||
|
243 | } | |||
|
244 | } | |||
|
245 | ||||
|
246 | if ((lim1 + lim2) - bbest < fbest - (off1 + off2)) { | |||
|
247 | spl->i1 = fbest1; | |||
|
248 | spl->i2 = fbest - fbest1; | |||
|
249 | spl->min_lo = 1; | |||
|
250 | spl->min_hi = 0; | |||
|
251 | } else { | |||
|
252 | spl->i1 = bbest1; | |||
|
253 | spl->i2 = bbest - bbest1; | |||
|
254 | spl->min_lo = 0; | |||
|
255 | spl->min_hi = 1; | |||
|
256 | } | |||
|
257 | return ec; | |||
|
258 | } | |||
|
259 | } | |||
|
260 | } | |||
|
261 | ||||
|
262 | ||||
|
263 | /* | |||
|
264 | * Rule: "Divide et Impera". Recursively split the box in sub-boxes by calling | |||
|
265 | * the box splitting function. Note that the real job (marking changed lines) | |||
|
266 | * is done in the two boundary reaching checks. | |||
|
267 | */ | |||
|
268 | int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1, | |||
|
269 | diffdata_t *dd2, long off2, long lim2, | |||
|
270 | long *kvdf, long *kvdb, int need_min, xdalgoenv_t *xenv) { | |||
|
271 | unsigned long const *ha1 = dd1->ha, *ha2 = dd2->ha; | |||
|
272 | ||||
|
273 | /* | |||
|
274 | * Shrink the box by walking through each diagonal snake (SW and NE). | |||
|
275 | */ | |||
|
276 | for (; off1 < lim1 && off2 < lim2 && ha1[off1] == ha2[off2]; off1++, off2++); | |||
|
277 | for (; off1 < lim1 && off2 < lim2 && ha1[lim1 - 1] == ha2[lim2 - 1]; lim1--, lim2--); | |||
|
278 | ||||
|
279 | /* | |||
|
280 | * If one dimension is empty, then all records on the other one must | |||
|
281 | * be obviously changed. | |||
|
282 | */ | |||
|
283 | if (off1 == lim1) { | |||
|
284 | char *rchg2 = dd2->rchg; | |||
|
285 | long *rindex2 = dd2->rindex; | |||
|
286 | ||||
|
287 | for (; off2 < lim2; off2++) | |||
|
288 | rchg2[rindex2[off2]] = 1; | |||
|
289 | } else if (off2 == lim2) { | |||
|
290 | char *rchg1 = dd1->rchg; | |||
|
291 | long *rindex1 = dd1->rindex; | |||
|
292 | ||||
|
293 | for (; off1 < lim1; off1++) | |||
|
294 | rchg1[rindex1[off1]] = 1; | |||
|
295 | } else { | |||
|
296 | xdpsplit_t spl; | |||
|
297 | spl.i1 = spl.i2 = 0; | |||
|
298 | ||||
|
299 | /* | |||
|
300 | * Divide ... | |||
|
301 | */ | |||
|
302 | if (xdl_split(ha1, off1, lim1, ha2, off2, lim2, kvdf, kvdb, | |||
|
303 | need_min, &spl, xenv) < 0) { | |||
|
304 | ||||
|
305 | return -1; | |||
|
306 | } | |||
|
307 | ||||
|
308 | /* | |||
|
309 | * ... et Impera. | |||
|
310 | */ | |||
|
311 | if (xdl_recs_cmp(dd1, off1, spl.i1, dd2, off2, spl.i2, | |||
|
312 | kvdf, kvdb, spl.min_lo, xenv) < 0 || | |||
|
313 | xdl_recs_cmp(dd1, spl.i1, lim1, dd2, spl.i2, lim2, | |||
|
314 | kvdf, kvdb, spl.min_hi, xenv) < 0) { | |||
|
315 | ||||
|
316 | return -1; | |||
|
317 | } | |||
|
318 | } | |||
|
319 | ||||
|
320 | return 0; | |||
|
321 | } | |||
|
322 | ||||
|
323 | ||||
|
324 | int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, | |||
|
325 | xdfenv_t *xe) { | |||
|
326 | long ndiags; | |||
|
327 | long *kvd, *kvdf, *kvdb; | |||
|
328 | xdalgoenv_t xenv; | |||
|
329 | diffdata_t dd1, dd2; | |||
|
330 | ||||
|
331 | if (XDF_DIFF_ALG(xpp->flags) == XDF_PATIENCE_DIFF) | |||
|
332 | return xdl_do_patience_diff(mf1, mf2, xpp, xe); | |||
|
333 | ||||
|
334 | if (XDF_DIFF_ALG(xpp->flags) == XDF_HISTOGRAM_DIFF) | |||
|
335 | return xdl_do_histogram_diff(mf1, mf2, xpp, xe); | |||
|
336 | ||||
|
337 | if (xdl_prepare_env(mf1, mf2, xpp, xe) < 0) { | |||
|
338 | ||||
|
339 | return -1; | |||
|
340 | } | |||
|
341 | ||||
|
342 | /* | |||
|
343 | * Allocate and setup K vectors to be used by the differential algorithm. | |||
|
344 | * One is to store the forward path and one to store the backward path. | |||
|
345 | */ | |||
|
346 | ndiags = xe->xdf1.nreff + xe->xdf2.nreff + 3; | |||
|
347 | if (!(kvd = (long *) xdl_malloc((2 * ndiags + 2) * sizeof(long)))) { | |||
|
348 | ||||
|
349 | xdl_free_env(xe); | |||
|
350 | return -1; | |||
|
351 | } | |||
|
352 | kvdf = kvd; | |||
|
353 | kvdb = kvdf + ndiags; | |||
|
354 | kvdf += xe->xdf2.nreff + 1; | |||
|
355 | kvdb += xe->xdf2.nreff + 1; | |||
|
356 | ||||
|
357 | xenv.mxcost = xdl_bogosqrt(ndiags); | |||
|
358 | if (xenv.mxcost < XDL_MAX_COST_MIN) | |||
|
359 | xenv.mxcost = XDL_MAX_COST_MIN; | |||
|
360 | xenv.snake_cnt = XDL_SNAKE_CNT; | |||
|
361 | xenv.heur_min = XDL_HEUR_MIN_COST; | |||
|
362 | ||||
|
363 | dd1.nrec = xe->xdf1.nreff; | |||
|
364 | dd1.ha = xe->xdf1.ha; | |||
|
365 | dd1.rchg = xe->xdf1.rchg; | |||
|
366 | dd1.rindex = xe->xdf1.rindex; | |||
|
367 | dd2.nrec = xe->xdf2.nreff; | |||
|
368 | dd2.ha = xe->xdf2.ha; | |||
|
369 | dd2.rchg = xe->xdf2.rchg; | |||
|
370 | dd2.rindex = xe->xdf2.rindex; | |||
|
371 | ||||
|
372 | if (xdl_recs_cmp(&dd1, 0, dd1.nrec, &dd2, 0, dd2.nrec, | |||
|
373 | kvdf, kvdb, (xpp->flags & XDF_NEED_MINIMAL) != 0, &xenv) < 0) { | |||
|
374 | ||||
|
375 | xdl_free(kvd); | |||
|
376 | xdl_free_env(xe); | |||
|
377 | return -1; | |||
|
378 | } | |||
|
379 | ||||
|
380 | xdl_free(kvd); | |||
|
381 | ||||
|
382 | return 0; | |||
|
383 | } | |||
|
384 | ||||
|
385 | ||||
|
386 | static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, long chg2) { | |||
|
387 | xdchange_t *xch; | |||
|
388 | ||||
|
389 | if (!(xch = (xdchange_t *) xdl_malloc(sizeof(xdchange_t)))) | |||
|
390 | return NULL; | |||
|
391 | ||||
|
392 | xch->next = xscr; | |||
|
393 | xch->i1 = i1; | |||
|
394 | xch->i2 = i2; | |||
|
395 | xch->chg1 = chg1; | |||
|
396 | xch->chg2 = chg2; | |||
|
397 | xch->ignore = 0; | |||
|
398 | ||||
|
399 | return xch; | |||
|
400 | } | |||
|
401 | ||||
|
402 | ||||
|
403 | static int recs_match(xrecord_t *rec1, xrecord_t *rec2, long flags) | |||
|
404 | { | |||
|
405 | return (rec1->ha == rec2->ha && | |||
|
406 | xdl_recmatch(rec1->ptr, rec1->size, | |||
|
407 | rec2->ptr, rec2->size, | |||
|
408 | flags)); | |||
|
409 | } | |||
|
410 | ||||
|
411 | /* | |||
|
412 | * If a line is indented more than this, get_indent() just returns this value. | |||
|
413 | * This avoids having to do absurd amounts of work for data that are not | |||
|
414 | * human-readable text, and also ensures that the output of get_indent fits within | |||
|
415 | * an int. | |||
|
416 | */ | |||
|
417 | #define MAX_INDENT 200 | |||
|
418 | ||||
|
419 | /* | |||
|
420 | * Return the amount of indentation of the specified line, treating TAB as 8 | |||
|
421 | * columns. Return -1 if line is empty or contains only whitespace. Clamp the | |||
|
422 | * output value at MAX_INDENT. | |||
|
423 | */ | |||
|
424 | static int get_indent(xrecord_t *rec) | |||
|
425 | { | |||
|
426 | long i; | |||
|
427 | int ret = 0; | |||
|
428 | ||||
|
429 | for (i = 0; i < rec->size; i++) { | |||
|
430 | char c = rec->ptr[i]; | |||
|
431 | ||||
|
432 | if (!XDL_ISSPACE(c)) | |||
|
433 | return ret; | |||
|
434 | else if (c == ' ') | |||
|
435 | ret += 1; | |||
|
436 | else if (c == '\t') | |||
|
437 | ret += 8 - ret % 8; | |||
|
438 | /* ignore other whitespace characters */ | |||
|
439 | ||||
|
440 | if (ret >= MAX_INDENT) | |||
|
441 | return MAX_INDENT; | |||
|
442 | } | |||
|
443 | ||||
|
444 | /* The line contains only whitespace. */ | |||
|
445 | return -1; | |||
|
446 | } | |||
|
447 | ||||
|
448 | /* | |||
|
449 | * If more than this number of consecutive blank rows are found, just return this | |||
|
450 | * value. This avoids requiring O(N^2) work for pathological cases, and also | |||
|
451 | * ensures that the output of score_split fits in an int. | |||
|
452 | */ | |||
|
453 | #define MAX_BLANKS 20 | |||
|
454 | ||||
|
455 | /* Characteristics measured about a hypothetical split position. */ | |||
|
456 | struct split_measurement { | |||
|
457 | /* | |||
|
458 | * Is the split at the end of the file (aside from any blank lines)? | |||
|
459 | */ | |||
|
460 | int end_of_file; | |||
|
461 | ||||
|
462 | /* | |||
|
463 | * How much is the line immediately following the split indented (or -1 if | |||
|
464 | * the line is blank): | |||
|
465 | */ | |||
|
466 | int indent; | |||
|
467 | ||||
|
468 | /* | |||
|
469 | * How many consecutive lines above the split are blank? | |||
|
470 | */ | |||
|
471 | int pre_blank; | |||
|
472 | ||||
|
473 | /* | |||
|
474 | * How much is the nearest non-blank line above the split indented (or -1 | |||
|
475 | * if there is no such line)? | |||
|
476 | */ | |||
|
477 | int pre_indent; | |||
|
478 | ||||
|
479 | /* | |||
|
480 | * How many lines after the line following the split are blank? | |||
|
481 | */ | |||
|
482 | int post_blank; | |||
|
483 | ||||
|
484 | /* | |||
|
485 | * How much is the nearest non-blank line after the line following the | |||
|
486 | * split indented (or -1 if there is no such line)? | |||
|
487 | */ | |||
|
488 | int post_indent; | |||
|
489 | }; | |||
|
490 | ||||
|
491 | struct split_score { | |||
|
492 | /* The effective indent of this split (smaller is preferred). */ | |||
|
493 | int effective_indent; | |||
|
494 | ||||
|
495 | /* Penalty for this split (smaller is preferred). */ | |||
|
496 | int penalty; | |||
|
497 | }; | |||
|
498 | ||||
|
499 | /* | |||
|
500 | * Fill m with information about a hypothetical split of xdf above line split. | |||
|
501 | */ | |||
|
502 | static void measure_split(const xdfile_t *xdf, long split, | |||
|
503 | struct split_measurement *m) | |||
|
504 | { | |||
|
505 | long i; | |||
|
506 | ||||
|
507 | if (split >= xdf->nrec) { | |||
|
508 | m->end_of_file = 1; | |||
|
509 | m->indent = -1; | |||
|
510 | } else { | |||
|
511 | m->end_of_file = 0; | |||
|
512 | m->indent = get_indent(xdf->recs[split]); | |||
|
513 | } | |||
|
514 | ||||
|
515 | m->pre_blank = 0; | |||
|
516 | m->pre_indent = -1; | |||
|
517 | for (i = split - 1; i >= 0; i--) { | |||
|
518 | m->pre_indent = get_indent(xdf->recs[i]); | |||
|
519 | if (m->pre_indent != -1) | |||
|
520 | break; | |||
|
521 | m->pre_blank += 1; | |||
|
522 | if (m->pre_blank == MAX_BLANKS) { | |||
|
523 | m->pre_indent = 0; | |||
|
524 | break; | |||
|
525 | } | |||
|
526 | } | |||
|
527 | ||||
|
528 | m->post_blank = 0; | |||
|
529 | m->post_indent = -1; | |||
|
530 | for (i = split + 1; i < xdf->nrec; i++) { | |||
|
531 | m->post_indent = get_indent(xdf->recs[i]); | |||
|
532 | if (m->post_indent != -1) | |||
|
533 | break; | |||
|
534 | m->post_blank += 1; | |||
|
535 | if (m->post_blank == MAX_BLANKS) { | |||
|
536 | m->post_indent = 0; | |||
|
537 | break; | |||
|
538 | } | |||
|
539 | } | |||
|
540 | } | |||
|
541 | ||||
|
542 | /* | |||
|
543 | * The empirically-determined weight factors used by score_split() below. | |||
|
544 | * Larger values means that the position is a less favorable place to split. | |||
|
545 | * | |||
|
546 | * Note that scores are only ever compared against each other, so multiplying | |||
|
547 | * all of these weight/penalty values by the same factor wouldn't change the | |||
|
548 | * heuristic's behavior. Still, we need to set that arbitrary scale *somehow*. | |||
|
549 | * In practice, these numbers are chosen to be large enough that they can be | |||
|
550 | * adjusted relative to each other with sufficient precision despite using | |||
|
551 | * integer math. | |||
|
552 | */ | |||
|
553 | ||||
|
554 | /* Penalty if there are no non-blank lines before the split */ | |||
|
555 | #define START_OF_FILE_PENALTY 1 | |||
|
556 | ||||
|
557 | /* Penalty if there are no non-blank lines after the split */ | |||
|
558 | #define END_OF_FILE_PENALTY 21 | |||
|
559 | ||||
|
560 | /* Multiplier for the number of blank lines around the split */ | |||
|
561 | #define TOTAL_BLANK_WEIGHT (-30) | |||
|
562 | ||||
|
563 | /* Multiplier for the number of blank lines after the split */ | |||
|
564 | #define POST_BLANK_WEIGHT 6 | |||
|
565 | ||||
|
566 | /* | |||
|
567 | * Penalties applied if the line is indented more than its predecessor | |||
|
568 | */ | |||
|
569 | #define RELATIVE_INDENT_PENALTY (-4) | |||
|
570 | #define RELATIVE_INDENT_WITH_BLANK_PENALTY 10 | |||
|
571 | ||||
|
572 | /* | |||
|
573 | * Penalties applied if the line is indented less than both its predecessor and | |||
|
574 | * its successor | |||
|
575 | */ | |||
|
576 | #define RELATIVE_OUTDENT_PENALTY 24 | |||
|
577 | #define RELATIVE_OUTDENT_WITH_BLANK_PENALTY 17 | |||
|
578 | ||||
|
579 | /* | |||
|
580 | * Penalties applied if the line is indented less than its predecessor but not | |||
|
581 | * less than its successor | |||
|
582 | */ | |||
|
583 | #define RELATIVE_DEDENT_PENALTY 23 | |||
|
584 | #define RELATIVE_DEDENT_WITH_BLANK_PENALTY 17 | |||
|
585 | ||||
|
586 | /* | |||
|
587 | * We only consider whether the sum of the effective indents for splits are | |||
|
588 | * less than (-1), equal to (0), or greater than (+1) each other. The resulting | |||
|
589 | * value is multiplied by the following weight and combined with the penalty to | |||
|
590 | * determine the better of two scores. | |||
|
591 | */ | |||
|
592 | #define INDENT_WEIGHT 60 | |||
|
593 | ||||
|
594 | /* | |||
|
595 | * Compute a badness score for the hypothetical split whose measurements are | |||
|
596 | * stored in m. The weight factors were determined empirically using the tools and | |||
|
597 | * corpus described in | |||
|
598 | * | |||
|
599 | * https://github.com/mhagger/diff-slider-tools | |||
|
600 | * | |||
|
601 | * Also see that project if you want to improve the weights based on, for example, | |||
|
602 | * a larger or more diverse corpus. | |||
|
603 | */ | |||
|
604 | static void score_add_split(const struct split_measurement *m, struct split_score *s) | |||
|
605 | { | |||
|
606 | /* | |||
|
607 | * A place to accumulate penalty factors (positive makes this index more | |||
|
608 | * favored): | |||
|
609 | */ | |||
|
610 | int post_blank, total_blank, indent, any_blanks; | |||
|
611 | ||||
|
612 | if (m->pre_indent == -1 && m->pre_blank == 0) | |||
|
613 | s->penalty += START_OF_FILE_PENALTY; | |||
|
614 | ||||
|
615 | if (m->end_of_file) | |||
|
616 | s->penalty += END_OF_FILE_PENALTY; | |||
|
617 | ||||
|
618 | /* | |||
|
619 | * Set post_blank to the number of blank lines following the split, | |||
|
620 | * including the line immediately after the split: | |||
|
621 | */ | |||
|
622 | post_blank = (m->indent == -1) ? 1 + m->post_blank : 0; | |||
|
623 | total_blank = m->pre_blank + post_blank; | |||
|
624 | ||||
|
625 | /* Penalties based on nearby blank lines: */ | |||
|
626 | s->penalty += TOTAL_BLANK_WEIGHT * total_blank; | |||
|
627 | s->penalty += POST_BLANK_WEIGHT * post_blank; | |||
|
628 | ||||
|
629 | if (m->indent != -1) | |||
|
630 | indent = m->indent; | |||
|
631 | else | |||
|
632 | indent = m->post_indent; | |||
|
633 | ||||
|
634 | any_blanks = (total_blank != 0); | |||
|
635 | ||||
|
636 | /* Note that the effective indent is -1 at the end of the file: */ | |||
|
637 | s->effective_indent += indent; | |||
|
638 | ||||
|
639 | if (indent == -1) { | |||
|
640 | /* No additional adjustments needed. */ | |||
|
641 | } else if (m->pre_indent == -1) { | |||
|
642 | /* No additional adjustments needed. */ | |||
|
643 | } else if (indent > m->pre_indent) { | |||
|
644 | /* | |||
|
645 | * The line is indented more than its predecessor. | |||
|
646 | */ | |||
|
647 | s->penalty += any_blanks ? | |||
|
648 | RELATIVE_INDENT_WITH_BLANK_PENALTY : | |||
|
649 | RELATIVE_INDENT_PENALTY; | |||
|
650 | } else if (indent == m->pre_indent) { | |||
|
651 | /* | |||
|
652 | * The line has the same indentation level as its predecessor. | |||
|
653 | * No additional adjustments needed. | |||
|
654 | */ | |||
|
655 | } else { | |||
|
656 | /* | |||
|
657 | * The line is indented less than its predecessor. It could be | |||
|
658 | * the block terminator of the previous block, but it could | |||
|
659 | * also be the start of a new block (e.g., an "else" block, or | |||
|
660 | * maybe the previous block didn't have a block terminator). | |||
|
661 | * Try to distinguish those cases based on what comes next: | |||
|
662 | */ | |||
|
663 | if (m->post_indent != -1 && m->post_indent > indent) { | |||
|
664 | /* | |||
|
665 | * The following line is indented more. So it is likely | |||
|
666 | * that this line is the start of a block. | |||
|
667 | */ | |||
|
668 | s->penalty += any_blanks ? | |||
|
669 | RELATIVE_OUTDENT_WITH_BLANK_PENALTY : | |||
|
670 | RELATIVE_OUTDENT_PENALTY; | |||
|
671 | } else { | |||
|
672 | /* | |||
|
673 | * That was probably the end of a block. | |||
|
674 | */ | |||
|
675 | s->penalty += any_blanks ? | |||
|
676 | RELATIVE_DEDENT_WITH_BLANK_PENALTY : | |||
|
677 | RELATIVE_DEDENT_PENALTY; | |||
|
678 | } | |||
|
679 | } | |||
|
680 | } | |||
|
681 | ||||
|
682 | static int score_cmp(struct split_score *s1, struct split_score *s2) | |||
|
683 | { | |||
|
684 | /* -1 if s1.effective_indent < s2->effective_indent, etc. */ | |||
|
685 | int cmp_indents = ((s1->effective_indent > s2->effective_indent) - | |||
|
686 | (s1->effective_indent < s2->effective_indent)); | |||
|
687 | ||||
|
688 | return INDENT_WEIGHT * cmp_indents + (s1->penalty - s2->penalty); | |||
|
689 | } | |||
|
690 | ||||
|
691 | /* | |||
|
692 | * Represent a group of changed lines in an xdfile_t (i.e., a contiguous group | |||
|
693 | * of lines that was inserted or deleted from the corresponding version of the | |||
|
694 | * file). We consider there to be such a group at the beginning of the file, at | |||
|
695 | * the end of the file, and between any two unchanged lines, though most such | |||
|
696 | * groups will usually be empty. | |||
|
697 | * | |||
|
698 | * If the first line in a group is equal to the line following the group, then | |||
|
699 | * the group can be slid down. Similarly, if the last line in a group is equal | |||
|
700 | * to the line preceding the group, then the group can be slid up. See | |||
|
701 | * group_slide_down() and group_slide_up(). | |||
|
702 | * | |||
|
703 | * Note that loops that are testing for changed lines in xdf->rchg do not need | |||
|
704 | * index bounding since the array is prepared with a zero at position -1 and N. | |||
|
705 | */ | |||
|
706 | struct xdlgroup { | |||
|
707 | /* | |||
|
708 | * The index of the first changed line in the group, or the index of | |||
|
709 | * the unchanged line above which the (empty) group is located. | |||
|
710 | */ | |||
|
711 | long start; | |||
|
712 | ||||
|
713 | /* | |||
|
714 | * The index of the first unchanged line after the group. For an empty | |||
|
715 | * group, end is equal to start. | |||
|
716 | */ | |||
|
717 | long end; | |||
|
718 | }; | |||
|
719 | ||||
|
720 | /* | |||
|
721 | * Initialize g to point at the first group in xdf. | |||
|
722 | */ | |||
|
723 | static void group_init(xdfile_t *xdf, struct xdlgroup *g) | |||
|
724 | { | |||
|
725 | g->start = g->end = 0; | |||
|
726 | while (xdf->rchg[g->end]) | |||
|
727 | g->end++; | |||
|
728 | } | |||
|
729 | ||||
|
730 | /* | |||
|
731 | * Move g to describe the next (possibly empty) group in xdf and return 0. If g | |||
|
732 | * is already at the end of the file, do nothing and return -1. | |||
|
733 | */ | |||
|
734 | static inline int group_next(xdfile_t *xdf, struct xdlgroup *g) | |||
|
735 | { | |||
|
736 | if (g->end == xdf->nrec) | |||
|
737 | return -1; | |||
|
738 | ||||
|
739 | g->start = g->end + 1; | |||
|
740 | for (g->end = g->start; xdf->rchg[g->end]; g->end++) | |||
|
741 | ; | |||
|
742 | ||||
|
743 | return 0; | |||
|
744 | } | |||
|
745 | ||||
|
746 | /* | |||
|
747 | * Move g to describe the previous (possibly empty) group in xdf and return 0. | |||
|
748 | * If g is already at the beginning of the file, do nothing and return -1. | |||
|
749 | */ | |||
|
750 | static inline int group_previous(xdfile_t *xdf, struct xdlgroup *g) | |||
|
751 | { | |||
|
752 | if (g->start == 0) | |||
|
753 | return -1; | |||
|
754 | ||||
|
755 | g->end = g->start - 1; | |||
|
756 | for (g->start = g->end; xdf->rchg[g->start - 1]; g->start--) | |||
|
757 | ; | |||
|
758 | ||||
|
759 | return 0; | |||
|
760 | } | |||
|
761 | ||||
|
762 | /* | |||
|
763 | * If g can be slid toward the end of the file, do so, and if it bumps into a | |||
|
764 | * following group, expand this group to include it. Return 0 on success or -1 | |||
|
765 | * if g cannot be slid down. | |||
|
766 | */ | |||
|
767 | static int group_slide_down(xdfile_t *xdf, struct xdlgroup *g, long flags) | |||
|
768 | { | |||
|
769 | if (g->end < xdf->nrec && | |||
|
770 | recs_match(xdf->recs[g->start], xdf->recs[g->end], flags)) { | |||
|
771 | xdf->rchg[g->start++] = 0; | |||
|
772 | xdf->rchg[g->end++] = 1; | |||
|
773 | ||||
|
774 | while (xdf->rchg[g->end]) | |||
|
775 | g->end++; | |||
|
776 | ||||
|
777 | return 0; | |||
|
778 | } else { | |||
|
779 | return -1; | |||
|
780 | } | |||
|
781 | } | |||
|
782 | ||||
|
783 | /* | |||
|
784 | * If g can be slid toward the beginning of the file, do so, and if it bumps | |||
|
785 | * into a previous group, expand this group to include it. Return 0 on success | |||
|
786 | * or -1 if g cannot be slid up. | |||
|
787 | */ | |||
|
788 | static int group_slide_up(xdfile_t *xdf, struct xdlgroup *g, long flags) | |||
|
789 | { | |||
|
790 | if (g->start > 0 && | |||
|
791 | recs_match(xdf->recs[g->start - 1], xdf->recs[g->end - 1], flags)) { | |||
|
792 | xdf->rchg[--g->start] = 1; | |||
|
793 | xdf->rchg[--g->end] = 0; | |||
|
794 | ||||
|
795 | while (xdf->rchg[g->start - 1]) | |||
|
796 | g->start--; | |||
|
797 | ||||
|
798 | return 0; | |||
|
799 | } else { | |||
|
800 | return -1; | |||
|
801 | } | |||
|
802 | } | |||
|
803 | ||||
|
804 | static void xdl_bug(const char *msg) | |||
|
805 | { | |||
|
806 | fprintf(stderr, "BUG: %s\n", msg); | |||
|
807 | exit(1); | |||
|
808 | } | |||
|
809 | ||||
|
810 | /* | |||
|
811 | * Move back and forward change groups for a consistent and pretty diff output. | |||
|
812 | * This also helps in finding joinable change groups and reducing the diff | |||
|
813 | * size. | |||
|
814 | */ | |||
|
815 | int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) { | |||
|
816 | struct xdlgroup g, go; | |||
|
817 | long earliest_end, end_matching_other; | |||
|
818 | long groupsize; | |||
|
819 | ||||
|
820 | group_init(xdf, &g); | |||
|
821 | group_init(xdfo, &go); | |||
|
822 | ||||
|
823 | while (1) { | |||
|
824 | /* If the group is empty in the to-be-compacted file, skip it: */ | |||
|
825 | if (g.end == g.start) | |||
|
826 | goto next; | |||
|
827 | ||||
|
828 | /* | |||
|
829 | * Now shift the change up and then down as far as possible in | |||
|
830 | * each direction. If it bumps into any other changes, merge them. | |||
|
831 | */ | |||
|
832 | do { | |||
|
833 | groupsize = g.end - g.start; | |||
|
834 | ||||
|
835 | /* | |||
|
836 | * Keep track of the last "end" index that causes this | |||
|
837 | * group to align with a group of changed lines in the | |||
|
838 | * other file. -1 indicates that we haven't found such | |||
|
839 | * a match yet: | |||
|
840 | */ | |||
|
841 | end_matching_other = -1; | |||
|
842 | ||||
|
843 | /* Shift the group backward as much as possible: */ | |||
|
844 | while (!group_slide_up(xdf, &g, flags)) | |||
|
845 | if (group_previous(xdfo, &go)) | |||
|
846 | xdl_bug("group sync broken sliding up"); | |||
|
847 | ||||
|
848 | /* | |||
|
849 | * This is this highest that this group can be shifted. | |||
|
850 | * Record its end index: | |||
|
851 | */ | |||
|
852 | earliest_end = g.end; | |||
|
853 | ||||
|
854 | if (go.end > go.start) | |||
|
855 | end_matching_other = g.end; | |||
|
856 | ||||
|
857 | /* Now shift the group forward as far as possible: */ | |||
|
858 | while (1) { | |||
|
859 | if (group_slide_down(xdf, &g, flags)) | |||
|
860 | break; | |||
|
861 | if (group_next(xdfo, &go)) | |||
|
862 | xdl_bug("group sync broken sliding down"); | |||
|
863 | ||||
|
864 | if (go.end > go.start) | |||
|
865 | end_matching_other = g.end; | |||
|
866 | } | |||
|
867 | } while (groupsize != g.end - g.start); | |||
|
868 | ||||
|
869 | /* | |||
|
870 | * If the group can be shifted, then we can possibly use this | |||
|
871 | * freedom to produce a more intuitive diff. | |||
|
872 | * | |||
|
873 | * The group is currently shifted as far down as possible, so the | |||
|
874 | * heuristics below only have to handle upwards shifts. | |||
|
875 | */ | |||
|
876 | ||||
|
877 | if (g.end == earliest_end) { | |||
|
878 | /* no shifting was possible */ | |||
|
879 | } else if (end_matching_other != -1) { | |||
|
880 | /* | |||
|
881 | * Move the possibly merged group of changes back to line | |||
|
882 | * up with the last group of changes from the other file | |||
|
883 | * that it can align with. | |||
|
884 | */ | |||
|
885 | while (go.end == go.start) { | |||
|
886 | if (group_slide_up(xdf, &g, flags)) | |||
|
887 | xdl_bug("match disappeared"); | |||
|
888 | if (group_previous(xdfo, &go)) | |||
|
889 | xdl_bug("group sync broken sliding to match"); | |||
|
890 | } | |||
|
891 | } else if (flags & XDF_INDENT_HEURISTIC) { | |||
|
892 | /* | |||
|
893 | * Indent heuristic: a group of pure add/delete lines | |||
|
894 | * implies two splits, one between the end of the "before" | |||
|
895 | * context and the start of the group, and another between | |||
|
896 | * the end of the group and the beginning of the "after" | |||
|
897 | * context. Some splits are aesthetically better and some | |||
|
898 | * are worse. We compute a badness "score" for each split, | |||
|
899 | * and add the scores for the two splits to define a | |||
|
900 | * "score" for each position that the group can be shifted | |||
|
901 | * to. Then we pick the shift with the lowest score. | |||
|
902 | */ | |||
|
903 | long shift, best_shift = -1; | |||
|
904 | struct split_score best_score; | |||
|
905 | ||||
|
906 | for (shift = earliest_end; shift <= g.end; shift++) { | |||
|
907 | struct split_measurement m; | |||
|
908 | struct split_score score = {0, 0}; | |||
|
909 | ||||
|
910 | measure_split(xdf, shift, &m); | |||
|
911 | score_add_split(&m, &score); | |||
|
912 | measure_split(xdf, shift - groupsize, &m); | |||
|
913 | score_add_split(&m, &score); | |||
|
914 | if (best_shift == -1 || | |||
|
915 | score_cmp(&score, &best_score) <= 0) { | |||
|
916 | best_score.effective_indent = score.effective_indent; | |||
|
917 | best_score.penalty = score.penalty; | |||
|
918 | best_shift = shift; | |||
|
919 | } | |||
|
920 | } | |||
|
921 | ||||
|
922 | while (g.end > best_shift) { | |||
|
923 | if (group_slide_up(xdf, &g, flags)) | |||
|
924 | xdl_bug("best shift unreached"); | |||
|
925 | if (group_previous(xdfo, &go)) | |||
|
926 | xdl_bug("group sync broken sliding to blank line"); | |||
|
927 | } | |||
|
928 | } | |||
|
929 | ||||
|
930 | next: | |||
|
931 | /* Move past the just-processed group: */ | |||
|
932 | if (group_next(xdf, &g)) | |||
|
933 | break; | |||
|
934 | if (group_next(xdfo, &go)) | |||
|
935 | xdl_bug("group sync broken moving to next group"); | |||
|
936 | } | |||
|
937 | ||||
|
938 | if (!group_next(xdfo, &go)) | |||
|
939 | xdl_bug("group sync broken at end of file"); | |||
|
940 | ||||
|
941 | return 0; | |||
|
942 | } | |||
|
943 | ||||
|
944 | ||||
|
945 | int xdl_build_script(xdfenv_t *xe, xdchange_t **xscr) { | |||
|
946 | xdchange_t *cscr = NULL, *xch; | |||
|
947 | char *rchg1 = xe->xdf1.rchg, *rchg2 = xe->xdf2.rchg; | |||
|
948 | long i1, i2, l1, l2; | |||
|
949 | ||||
|
950 | /* | |||
|
951 | * Trivial. Collects "groups" of changes and creates an edit script. | |||
|
952 | */ | |||
|
953 | for (i1 = xe->xdf1.nrec, i2 = xe->xdf2.nrec; i1 >= 0 || i2 >= 0; i1--, i2--) | |||
|
954 | if (rchg1[i1 - 1] || rchg2[i2 - 1]) { | |||
|
955 | for (l1 = i1; rchg1[i1 - 1]; i1--); | |||
|
956 | for (l2 = i2; rchg2[i2 - 1]; i2--); | |||
|
957 | ||||
|
958 | if (!(xch = xdl_add_change(cscr, i1, i2, l1 - i1, l2 - i2))) { | |||
|
959 | xdl_free_script(cscr); | |||
|
960 | return -1; | |||
|
961 | } | |||
|
962 | cscr = xch; | |||
|
963 | } | |||
|
964 | ||||
|
965 | *xscr = cscr; | |||
|
966 | ||||
|
967 | return 0; | |||
|
968 | } | |||
|
969 | ||||
|
970 | ||||
|
971 | void xdl_free_script(xdchange_t *xscr) { | |||
|
972 | xdchange_t *xch; | |||
|
973 | ||||
|
974 | while ((xch = xscr) != NULL) { | |||
|
975 | xscr = xscr->next; | |||
|
976 | xdl_free(xch); | |||
|
977 | } | |||
|
978 | } | |||
|
979 | ||||
|
980 | static int xdl_call_hunk_func(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb, | |||
|
981 | xdemitconf_t const *xecfg) | |||
|
982 | { | |||
|
983 | xdchange_t *xch, *xche; | |||
|
984 | ||||
|
985 | for (xch = xscr; xch; xch = xche->next) { | |||
|
986 | xche = xdl_get_hunk(&xch, xecfg); | |||
|
987 | if (!xch) | |||
|
988 | break; | |||
|
989 | if (xecfg->hunk_func(xch->i1, xche->i1 + xche->chg1 - xch->i1, | |||
|
990 | xch->i2, xche->i2 + xche->chg2 - xch->i2, | |||
|
991 | ecb->priv) < 0) | |||
|
992 | return -1; | |||
|
993 | } | |||
|
994 | return 0; | |||
|
995 | } | |||
|
996 | ||||
|
997 | static void xdl_mark_ignorable(xdchange_t *xscr, xdfenv_t *xe, long flags) | |||
|
998 | { | |||
|
999 | xdchange_t *xch; | |||
|
1000 | ||||
|
1001 | for (xch = xscr; xch; xch = xch->next) { | |||
|
1002 | int ignore = 1; | |||
|
1003 | xrecord_t **rec; | |||
|
1004 | long i; | |||
|
1005 | ||||
|
1006 | rec = &xe->xdf1.recs[xch->i1]; | |||
|
1007 | for (i = 0; i < xch->chg1 && ignore; i++) | |||
|
1008 | ignore = xdl_blankline(rec[i]->ptr, rec[i]->size, flags); | |||
|
1009 | ||||
|
1010 | rec = &xe->xdf2.recs[xch->i2]; | |||
|
1011 | for (i = 0; i < xch->chg2 && ignore; i++) | |||
|
1012 | ignore = xdl_blankline(rec[i]->ptr, rec[i]->size, flags); | |||
|
1013 | ||||
|
1014 | xch->ignore = ignore; | |||
|
1015 | } | |||
|
1016 | } | |||
|
1017 | ||||
|
1018 | int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, | |||
|
1019 | xdemitconf_t const *xecfg, xdemitcb_t *ecb) { | |||
|
1020 | xdchange_t *xscr; | |||
|
1021 | xdfenv_t xe; | |||
|
1022 | emit_func_t ef = xecfg->hunk_func ? xdl_call_hunk_func : xdl_emit_diff; | |||
|
1023 | ||||
|
1024 | if (xdl_do_diff(mf1, mf2, xpp, &xe) < 0) { | |||
|
1025 | ||||
|
1026 | return -1; | |||
|
1027 | } | |||
|
1028 | if (xdl_change_compact(&xe.xdf1, &xe.xdf2, xpp->flags) < 0 || | |||
|
1029 | xdl_change_compact(&xe.xdf2, &xe.xdf1, xpp->flags) < 0 || | |||
|
1030 | xdl_build_script(&xe, &xscr) < 0) { | |||
|
1031 | ||||
|
1032 | xdl_free_env(&xe); | |||
|
1033 | return -1; | |||
|
1034 | } | |||
|
1035 | if (xscr) { | |||
|
1036 | if (xpp->flags & XDF_IGNORE_BLANK_LINES) | |||
|
1037 | xdl_mark_ignorable(xscr, &xe, xpp->flags); | |||
|
1038 | ||||
|
1039 | if (ef(&xe, xscr, ecb, xecfg) < 0) { | |||
|
1040 | ||||
|
1041 | xdl_free_script(xscr); | |||
|
1042 | xdl_free_env(&xe); | |||
|
1043 | return -1; | |||
|
1044 | } | |||
|
1045 | xdl_free_script(xscr); | |||
|
1046 | } | |||
|
1047 | xdl_free_env(&xe); | |||
|
1048 | ||||
|
1049 | return 0; | |||
|
1050 | } |
@@ -0,0 +1,64 b'' | |||||
|
1 | /* | |||
|
2 | * LibXDiff by Davide Libenzi ( File Differential Library ) | |||
|
3 | * Copyright (C) 2003 Davide Libenzi | |||
|
4 | * | |||
|
5 | * This library is free software; you can redistribute it and/or | |||
|
6 | * modify it under the terms of the GNU Lesser General Public | |||
|
7 | * License as published by the Free Software Foundation; either | |||
|
8 | * version 2.1 of the License, or (at your option) any later version. | |||
|
9 | * | |||
|
10 | * This library is distributed in the hope that it will be useful, | |||
|
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
|
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
|
13 | * Lesser General Public License for more details. | |||
|
14 | * | |||
|
15 | * You should have received a copy of the GNU Lesser General Public | |||
|
16 | * License along with this library; if not, see | |||
|
17 | * <http://www.gnu.org/licenses/>. | |||
|
18 | * | |||
|
19 | * Davide Libenzi <davidel@xmailserver.org> | |||
|
20 | * | |||
|
21 | */ | |||
|
22 | ||||
|
23 | #if !defined(XDIFFI_H) | |||
|
24 | #define XDIFFI_H | |||
|
25 | ||||
|
26 | ||||
|
27 | typedef struct s_diffdata { | |||
|
28 | long nrec; | |||
|
29 | unsigned long const *ha; | |||
|
30 | long *rindex; | |||
|
31 | char *rchg; | |||
|
32 | } diffdata_t; | |||
|
33 | ||||
|
34 | typedef struct s_xdalgoenv { | |||
|
35 | long mxcost; | |||
|
36 | long snake_cnt; | |||
|
37 | long heur_min; | |||
|
38 | } xdalgoenv_t; | |||
|
39 | ||||
|
40 | typedef struct s_xdchange { | |||
|
41 | struct s_xdchange *next; | |||
|
42 | long i1, i2; | |||
|
43 | long chg1, chg2; | |||
|
44 | int ignore; | |||
|
45 | } xdchange_t; | |||
|
46 | ||||
|
47 | ||||
|
48 | ||||
|
49 | int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1, | |||
|
50 | diffdata_t *dd2, long off2, long lim2, | |||
|
51 | long *kvdf, long *kvdb, int need_min, xdalgoenv_t *xenv); | |||
|
52 | int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, | |||
|
53 | xdfenv_t *xe); | |||
|
54 | int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags); | |||
|
55 | int xdl_build_script(xdfenv_t *xe, xdchange_t **xscr); | |||
|
56 | void xdl_free_script(xdchange_t *xscr); | |||
|
57 | int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb, | |||
|
58 | xdemitconf_t const *xecfg); | |||
|
59 | int xdl_do_patience_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, | |||
|
60 | xdfenv_t *env); | |||
|
61 | int xdl_do_histogram_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, | |||
|
62 | xdfenv_t *env); | |||
|
63 | ||||
|
64 | #endif /* #if !defined(XDIFFI_H) */ |
@@ -0,0 +1,312 b'' | |||||
|
1 | /* | |||
|
2 | * LibXDiff by Davide Libenzi ( File Differential Library ) | |||
|
3 | * Copyright (C) 2003 Davide Libenzi | |||
|
4 | * | |||
|
5 | * This library is free software; you can redistribute it and/or | |||
|
6 | * modify it under the terms of the GNU Lesser General Public | |||
|
7 | * License as published by the Free Software Foundation; either | |||
|
8 | * version 2.1 of the License, or (at your option) any later version. | |||
|
9 | * | |||
|
10 | * This library is distributed in the hope that it will be useful, | |||
|
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
|
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
|
13 | * Lesser General Public License for more details. | |||
|
14 | * | |||
|
15 | * You should have received a copy of the GNU Lesser General Public | |||
|
16 | * License along with this library; if not, see | |||
|
17 | * <http://www.gnu.org/licenses/>. | |||
|
18 | * | |||
|
19 | * Davide Libenzi <davidel@xmailserver.org> | |||
|
20 | * | |||
|
21 | */ | |||
|
22 | ||||
|
23 | #include "xinclude.h" | |||
|
24 | ||||
|
25 | static long xdl_get_rec(xdfile_t *xdf, long ri, char const **rec) { | |||
|
26 | ||||
|
27 | *rec = xdf->recs[ri]->ptr; | |||
|
28 | ||||
|
29 | return xdf->recs[ri]->size; | |||
|
30 | } | |||
|
31 | ||||
|
32 | ||||
|
33 | static int xdl_emit_record(xdfile_t *xdf, long ri, char const *pre, xdemitcb_t *ecb) { | |||
|
34 | long size, psize = strlen(pre); | |||
|
35 | char const *rec; | |||
|
36 | ||||
|
37 | size = xdl_get_rec(xdf, ri, &rec); | |||
|
38 | if (xdl_emit_diffrec(rec, size, pre, psize, ecb) < 0) { | |||
|
39 | ||||
|
40 | return -1; | |||
|
41 | } | |||
|
42 | ||||
|
43 | return 0; | |||
|
44 | } | |||
|
45 | ||||
|
46 | ||||
|
47 | /* | |||
|
48 | * Starting at the passed change atom, find the latest change atom to be included | |||
|
49 | * inside the differential hunk according to the specified configuration. | |||
|
50 | * Also advance xscr if the first changes must be discarded. | |||
|
51 | */ | |||
|
52 | xdchange_t *xdl_get_hunk(xdchange_t **xscr, xdemitconf_t const *xecfg) | |||
|
53 | { | |||
|
54 | xdchange_t *xch, *xchp, *lxch; | |||
|
55 | long max_common = 2 * xecfg->ctxlen + xecfg->interhunkctxlen; | |||
|
56 | long max_ignorable = xecfg->ctxlen; | |||
|
57 | unsigned long ignored = 0; /* number of ignored blank lines */ | |||
|
58 | ||||
|
59 | /* remove ignorable changes that are too far before other changes */ | |||
|
60 | for (xchp = *xscr; xchp && xchp->ignore; xchp = xchp->next) { | |||
|
61 | xch = xchp->next; | |||
|
62 | ||||
|
63 | if (xch == NULL || | |||
|
64 | xch->i1 - (xchp->i1 + xchp->chg1) >= max_ignorable) | |||
|
65 | *xscr = xch; | |||
|
66 | } | |||
|
67 | ||||
|
68 | if (*xscr == NULL) | |||
|
69 | return NULL; | |||
|
70 | ||||
|
71 | lxch = *xscr; | |||
|
72 | ||||
|
73 | for (xchp = *xscr, xch = xchp->next; xch; xchp = xch, xch = xch->next) { | |||
|
74 | long distance = xch->i1 - (xchp->i1 + xchp->chg1); | |||
|
75 | if (distance > max_common) | |||
|
76 | break; | |||
|
77 | ||||
|
78 | if (distance < max_ignorable && (!xch->ignore || lxch == xchp)) { | |||
|
79 | lxch = xch; | |||
|
80 | ignored = 0; | |||
|
81 | } else if (distance < max_ignorable && xch->ignore) { | |||
|
82 | ignored += xch->chg2; | |||
|
83 | } else if (lxch != xchp && | |||
|
84 | xch->i1 + ignored - (lxch->i1 + lxch->chg1) > max_common) { | |||
|
85 | break; | |||
|
86 | } else if (!xch->ignore) { | |||
|
87 | lxch = xch; | |||
|
88 | ignored = 0; | |||
|
89 | } else { | |||
|
90 | ignored += xch->chg2; | |||
|
91 | } | |||
|
92 | } | |||
|
93 | ||||
|
94 | return lxch; | |||
|
95 | } | |||
|
96 | ||||
|
97 | ||||
|
98 | static long def_ff(const char *rec, long len, char *buf, long sz, void *priv) | |||
|
99 | { | |||
|
100 | if (len > 0 && | |||
|
101 | (isalpha((unsigned char)*rec) || /* identifier? */ | |||
|
102 | *rec == '_' || /* also identifier? */ | |||
|
103 | *rec == '$')) { /* identifiers from VMS and other esoterico */ | |||
|
104 | if (len > sz) | |||
|
105 | len = sz; | |||
|
106 | while (0 < len && isspace((unsigned char)rec[len - 1])) | |||
|
107 | len--; | |||
|
108 | memcpy(buf, rec, len); | |||
|
109 | return len; | |||
|
110 | } | |||
|
111 | return -1; | |||
|
112 | } | |||
|
113 | ||||
|
114 | static long match_func_rec(xdfile_t *xdf, xdemitconf_t const *xecfg, long ri, | |||
|
115 | char *buf, long sz) | |||
|
116 | { | |||
|
117 | const char *rec; | |||
|
118 | long len = xdl_get_rec(xdf, ri, &rec); | |||
|
119 | if (!xecfg->find_func) | |||
|
120 | return def_ff(rec, len, buf, sz, xecfg->find_func_priv); | |||
|
121 | return xecfg->find_func(rec, len, buf, sz, xecfg->find_func_priv); | |||
|
122 | } | |||
|
123 | ||||
|
124 | static int is_func_rec(xdfile_t *xdf, xdemitconf_t const *xecfg, long ri) | |||
|
125 | { | |||
|
126 | char dummy[1]; | |||
|
127 | return match_func_rec(xdf, xecfg, ri, dummy, sizeof(dummy)) >= 0; | |||
|
128 | } | |||
|
129 | ||||
|
130 | struct func_line { | |||
|
131 | long len; | |||
|
132 | char buf[80]; | |||
|
133 | }; | |||
|
134 | ||||
|
135 | static long get_func_line(xdfenv_t *xe, xdemitconf_t const *xecfg, | |||
|
136 | struct func_line *func_line, long start, long limit) | |||
|
137 | { | |||
|
138 | long l, size, step = (start > limit) ? -1 : 1; | |||
|
139 | char *buf, dummy[1]; | |||
|
140 | ||||
|
141 | buf = func_line ? func_line->buf : dummy; | |||
|
142 | size = func_line ? sizeof(func_line->buf) : sizeof(dummy); | |||
|
143 | ||||
|
144 | for (l = start; l != limit && 0 <= l && l < xe->xdf1.nrec; l += step) { | |||
|
145 | long len = match_func_rec(&xe->xdf1, xecfg, l, buf, size); | |||
|
146 | if (len >= 0) { | |||
|
147 | if (func_line) | |||
|
148 | func_line->len = len; | |||
|
149 | return l; | |||
|
150 | } | |||
|
151 | } | |||
|
152 | return -1; | |||
|
153 | } | |||
|
154 | ||||
|
155 | static int is_empty_rec(xdfile_t *xdf, long ri) | |||
|
156 | { | |||
|
157 | const char *rec; | |||
|
158 | long len = xdl_get_rec(xdf, ri, &rec); | |||
|
159 | ||||
|
160 | while (len > 0 && XDL_ISSPACE(*rec)) { | |||
|
161 | rec++; | |||
|
162 | len--; | |||
|
163 | } | |||
|
164 | return !len; | |||
|
165 | } | |||
|
166 | ||||
|
167 | int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb, | |||
|
168 | xdemitconf_t const *xecfg) { | |||
|
169 | long s1, s2, e1, e2, lctx; | |||
|
170 | xdchange_t *xch, *xche; | |||
|
171 | long funclineprev = -1; | |||
|
172 | struct func_line func_line = { 0 }; | |||
|
173 | ||||
|
174 | for (xch = xscr; xch; xch = xche->next) { | |||
|
175 | xche = xdl_get_hunk(&xch, xecfg); | |||
|
176 | if (!xch) | |||
|
177 | break; | |||
|
178 | ||||
|
179 | s1 = XDL_MAX(xch->i1 - xecfg->ctxlen, 0); | |||
|
180 | s2 = XDL_MAX(xch->i2 - xecfg->ctxlen, 0); | |||
|
181 | ||||
|
182 | if (xecfg->flags & XDL_EMIT_FUNCCONTEXT) { | |||
|
183 | long fs1, i1 = xch->i1; | |||
|
184 | ||||
|
185 | /* Appended chunk? */ | |||
|
186 | if (i1 >= xe->xdf1.nrec) { | |||
|
187 | long i2 = xch->i2; | |||
|
188 | ||||
|
189 | /* | |||
|
190 | * We don't need additional context if | |||
|
191 | * a whole function was added. | |||
|
192 | */ | |||
|
193 | while (i2 < xe->xdf2.nrec) { | |||
|
194 | if (is_func_rec(&xe->xdf2, xecfg, i2)) | |||
|
195 | goto post_context_calculation; | |||
|
196 | i2++; | |||
|
197 | } | |||
|
198 | ||||
|
199 | /* | |||
|
200 | * Otherwise get more context from the | |||
|
201 | * pre-image. | |||
|
202 | */ | |||
|
203 | i1 = xe->xdf1.nrec - 1; | |||
|
204 | } | |||
|
205 | ||||
|
206 | fs1 = get_func_line(xe, xecfg, NULL, i1, -1); | |||
|
207 | while (fs1 > 0 && !is_empty_rec(&xe->xdf1, fs1 - 1) && | |||
|
208 | !is_func_rec(&xe->xdf1, xecfg, fs1 - 1)) | |||
|
209 | fs1--; | |||
|
210 | if (fs1 < 0) | |||
|
211 | fs1 = 0; | |||
|
212 | if (fs1 < s1) { | |||
|
213 | s2 -= s1 - fs1; | |||
|
214 | s1 = fs1; | |||
|
215 | } | |||
|
216 | } | |||
|
217 | ||||
|
218 | post_context_calculation: | |||
|
219 | lctx = xecfg->ctxlen; | |||
|
220 | lctx = XDL_MIN(lctx, xe->xdf1.nrec - (xche->i1 + xche->chg1)); | |||
|
221 | lctx = XDL_MIN(lctx, xe->xdf2.nrec - (xche->i2 + xche->chg2)); | |||
|
222 | ||||
|
223 | e1 = xche->i1 + xche->chg1 + lctx; | |||
|
224 | e2 = xche->i2 + xche->chg2 + lctx; | |||
|
225 | ||||
|
226 | if (xecfg->flags & XDL_EMIT_FUNCCONTEXT) { | |||
|
227 | long fe1 = get_func_line(xe, xecfg, NULL, | |||
|
228 | xche->i1 + xche->chg1, | |||
|
229 | xe->xdf1.nrec); | |||
|
230 | while (fe1 > 0 && is_empty_rec(&xe->xdf1, fe1 - 1)) | |||
|
231 | fe1--; | |||
|
232 | if (fe1 < 0) | |||
|
233 | fe1 = xe->xdf1.nrec; | |||
|
234 | if (fe1 > e1) { | |||
|
235 | e2 += fe1 - e1; | |||
|
236 | e1 = fe1; | |||
|
237 | } | |||
|
238 | ||||
|
239 | /* | |||
|
240 | * Overlap with next change? Then include it | |||
|
241 | * in the current hunk and start over to find | |||
|
242 | * its new end. | |||
|
243 | */ | |||
|
244 | if (xche->next) { | |||
|
245 | long l = XDL_MIN(xche->next->i1, | |||
|
246 | xe->xdf1.nrec - 1); | |||
|
247 | if (l - xecfg->ctxlen <= e1 || | |||
|
248 | get_func_line(xe, xecfg, NULL, l, e1) < 0) { | |||
|
249 | xche = xche->next; | |||
|
250 | goto post_context_calculation; | |||
|
251 | } | |||
|
252 | } | |||
|
253 | } | |||
|
254 | ||||
|
255 | /* | |||
|
256 | * Emit current hunk header. | |||
|
257 | */ | |||
|
258 | ||||
|
259 | if (xecfg->flags & XDL_EMIT_FUNCNAMES) { | |||
|
260 | get_func_line(xe, xecfg, &func_line, | |||
|
261 | s1 - 1, funclineprev); | |||
|
262 | funclineprev = s1 - 1; | |||
|
263 | } | |||
|
264 | if (xdl_emit_hunk_hdr(s1 + 1, e1 - s1, s2 + 1, e2 - s2, | |||
|
265 | func_line.buf, func_line.len, ecb) < 0) | |||
|
266 | return -1; | |||
|
267 | ||||
|
268 | /* | |||
|
269 | * Emit pre-context. | |||
|
270 | */ | |||
|
271 | for (; s2 < xch->i2; s2++) | |||
|
272 | if (xdl_emit_record(&xe->xdf2, s2, " ", ecb) < 0) | |||
|
273 | return -1; | |||
|
274 | ||||
|
275 | for (s1 = xch->i1, s2 = xch->i2;; xch = xch->next) { | |||
|
276 | /* | |||
|
277 | * Merge previous with current change atom. | |||
|
278 | */ | |||
|
279 | for (; s1 < xch->i1 && s2 < xch->i2; s1++, s2++) | |||
|
280 | if (xdl_emit_record(&xe->xdf2, s2, " ", ecb) < 0) | |||
|
281 | return -1; | |||
|
282 | ||||
|
283 | /* | |||
|
284 | * Removes lines from the first file. | |||
|
285 | */ | |||
|
286 | for (s1 = xch->i1; s1 < xch->i1 + xch->chg1; s1++) | |||
|
287 | if (xdl_emit_record(&xe->xdf1, s1, "-", ecb) < 0) | |||
|
288 | return -1; | |||
|
289 | ||||
|
290 | /* | |||
|
291 | * Adds lines from the second file. | |||
|
292 | */ | |||
|
293 | for (s2 = xch->i2; s2 < xch->i2 + xch->chg2; s2++) | |||
|
294 | if (xdl_emit_record(&xe->xdf2, s2, "+", ecb) < 0) | |||
|
295 | return -1; | |||
|
296 | ||||
|
297 | if (xch == xche) | |||
|
298 | break; | |||
|
299 | s1 = xch->i1 + xch->chg1; | |||
|
300 | s2 = xch->i2 + xch->chg2; | |||
|
301 | } | |||
|
302 | ||||
|
303 | /* | |||
|
304 | * Emit post-context. | |||
|
305 | */ | |||
|
306 | for (s2 = xche->i2 + xche->chg2; s2 < e2; s2++) | |||
|
307 | if (xdl_emit_record(&xe->xdf2, s2, " ", ecb) < 0) | |||
|
308 | return -1; | |||
|
309 | } | |||
|
310 | ||||
|
311 | return 0; | |||
|
312 | } |
@@ -0,0 +1,36 b'' | |||||
|
1 | /* | |||
|
2 | * LibXDiff by Davide Libenzi ( File Differential Library ) | |||
|
3 | * Copyright (C) 2003 Davide Libenzi | |||
|
4 | * | |||
|
5 | * This library is free software; you can redistribute it and/or | |||
|
6 | * modify it under the terms of the GNU Lesser General Public | |||
|
7 | * License as published by the Free Software Foundation; either | |||
|
8 | * version 2.1 of the License, or (at your option) any later version. | |||
|
9 | * | |||
|
10 | * This library is distributed in the hope that it will be useful, | |||
|
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
|
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
|
13 | * Lesser General Public License for more details. | |||
|
14 | * | |||
|
15 | * You should have received a copy of the GNU Lesser General Public | |||
|
16 | * License along with this library; if not, see | |||
|
17 | * <http://www.gnu.org/licenses/>. | |||
|
18 | * | |||
|
19 | * Davide Libenzi <davidel@xmailserver.org> | |||
|
20 | * | |||
|
21 | */ | |||
|
22 | ||||
|
23 | #if !defined(XEMIT_H) | |||
|
24 | #define XEMIT_H | |||
|
25 | ||||
|
26 | ||||
|
27 | typedef int (*emit_func_t)(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb, | |||
|
28 | xdemitconf_t const *xecfg); | |||
|
29 | ||||
|
30 | xdchange_t *xdl_get_hunk(xdchange_t **xscr, xdemitconf_t const *xecfg); | |||
|
31 | int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb, | |||
|
32 | xdemitconf_t const *xecfg); | |||
|
33 | ||||
|
34 | ||||
|
35 | ||||
|
36 | #endif /* #if !defined(XEMIT_H) */ |
@@ -0,0 +1,363 b'' | |||||
|
1 | /* | |||
|
2 | * Copyright (C) 2010, Google Inc. | |||
|
3 | * and other copyright owners as documented in JGit's IP log. | |||
|
4 | * | |||
|
5 | * This program and the accompanying materials are made available | |||
|
6 | * under the terms of the Eclipse Distribution License v1.0 which | |||
|
7 | * accompanies this distribution, is reproduced below, and is | |||
|
8 | * available at http://www.eclipse.org/org/documents/edl-v10.php | |||
|
9 | * | |||
|
10 | * All rights reserved. | |||
|
11 | * | |||
|
12 | * Redistribution and use in source and binary forms, with or | |||
|
13 | * without modification, are permitted provided that the following | |||
|
14 | * conditions are met: | |||
|
15 | * | |||
|
16 | * - Redistributions of source code must retain the above copyright | |||
|
17 | * notice, this list of conditions and the following disclaimer. | |||
|
18 | * | |||
|
19 | * - Redistributions in binary form must reproduce the above | |||
|
20 | * copyright notice, this list of conditions and the following | |||
|
21 | * disclaimer in the documentation and/or other materials provided | |||
|
22 | * with the distribution. | |||
|
23 | * | |||
|
24 | * - Neither the name of the Eclipse Foundation, Inc. nor the | |||
|
25 | * names of its contributors may be used to endorse or promote | |||
|
26 | * products derived from this software without specific prior | |||
|
27 | * written permission. | |||
|
28 | * | |||
|
29 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND | |||
|
30 | * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, | |||
|
31 | * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | |||
|
32 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
|
33 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR | |||
|
34 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |||
|
35 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |||
|
36 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |||
|
37 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
|
38 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |||
|
39 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |||
|
40 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF | |||
|
41 | * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
|
42 | */ | |||
|
43 | ||||
|
44 | #include "xinclude.h" | |||
|
45 | #include "xtypes.h" | |||
|
46 | #include "xdiff.h" | |||
|
47 | ||||
|
48 | #define MAX_PTR UINT_MAX | |||
|
49 | #define MAX_CNT UINT_MAX | |||
|
50 | ||||
|
51 | #define LINE_END(n) (line##n + count##n - 1) | |||
|
52 | #define LINE_END_PTR(n) (*line##n + *count##n - 1) | |||
|
53 | ||||
|
54 | struct histindex { | |||
|
55 | struct record { | |||
|
56 | unsigned int ptr, cnt; | |||
|
57 | struct record *next; | |||
|
58 | } **records, /* an occurrence */ | |||
|
59 | **line_map; /* map of line to record chain */ | |||
|
60 | chastore_t rcha; | |||
|
61 | unsigned int *next_ptrs; | |||
|
62 | unsigned int table_bits, | |||
|
63 | records_size, | |||
|
64 | line_map_size; | |||
|
65 | ||||
|
66 | unsigned int max_chain_length, | |||
|
67 | key_shift, | |||
|
68 | ptr_shift; | |||
|
69 | ||||
|
70 | unsigned int cnt, | |||
|
71 | has_common; | |||
|
72 | ||||
|
73 | xdfenv_t *env; | |||
|
74 | xpparam_t const *xpp; | |||
|
75 | }; | |||
|
76 | ||||
|
77 | struct region { | |||
|
78 | unsigned int begin1, end1; | |||
|
79 | unsigned int begin2, end2; | |||
|
80 | }; | |||
|
81 | ||||
|
82 | #define LINE_MAP(i, a) (i->line_map[(a) - i->ptr_shift]) | |||
|
83 | ||||
|
84 | #define NEXT_PTR(index, ptr) \ | |||
|
85 | (index->next_ptrs[(ptr) - index->ptr_shift]) | |||
|
86 | ||||
|
87 | #define CNT(index, ptr) \ | |||
|
88 | ((LINE_MAP(index, ptr))->cnt) | |||
|
89 | ||||
|
90 | #define REC(env, s, l) \ | |||
|
91 | (env->xdf##s.recs[l - 1]) | |||
|
92 | ||||
|
93 | static int cmp_recs(xpparam_t const *xpp, | |||
|
94 | xrecord_t *r1, xrecord_t *r2) | |||
|
95 | { | |||
|
96 | return r1->ha == r2->ha && | |||
|
97 | xdl_recmatch(r1->ptr, r1->size, r2->ptr, r2->size, | |||
|
98 | xpp->flags); | |||
|
99 | } | |||
|
100 | ||||
|
101 | #define CMP_ENV(xpp, env, s1, l1, s2, l2) \ | |||
|
102 | (cmp_recs(xpp, REC(env, s1, l1), REC(env, s2, l2))) | |||
|
103 | ||||
|
104 | #define CMP(i, s1, l1, s2, l2) \ | |||
|
105 | (cmp_recs(i->xpp, REC(i->env, s1, l1), REC(i->env, s2, l2))) | |||
|
106 | ||||
|
107 | #define TABLE_HASH(index, side, line) \ | |||
|
108 | XDL_HASHLONG((REC(index->env, side, line))->ha, index->table_bits) | |||
|
109 | ||||
|
110 | static int scanA(struct histindex *index, int line1, int count1) | |||
|
111 | { | |||
|
112 | unsigned int ptr, tbl_idx; | |||
|
113 | unsigned int chain_len; | |||
|
114 | struct record **rec_chain, *rec; | |||
|
115 | ||||
|
116 | for (ptr = LINE_END(1); line1 <= ptr; ptr--) { | |||
|
117 | tbl_idx = TABLE_HASH(index, 1, ptr); | |||
|
118 | rec_chain = index->records + tbl_idx; | |||
|
119 | rec = *rec_chain; | |||
|
120 | ||||
|
121 | chain_len = 0; | |||
|
122 | while (rec) { | |||
|
123 | if (CMP(index, 1, rec->ptr, 1, ptr)) { | |||
|
124 | /* | |||
|
125 | * ptr is identical to another element. Insert | |||
|
126 | * it onto the front of the existing element | |||
|
127 | * chain. | |||
|
128 | */ | |||
|
129 | NEXT_PTR(index, ptr) = rec->ptr; | |||
|
130 | rec->ptr = ptr; | |||
|
131 | /* cap rec->cnt at MAX_CNT */ | |||
|
132 | rec->cnt = XDL_MIN(MAX_CNT, rec->cnt + 1); | |||
|
133 | LINE_MAP(index, ptr) = rec; | |||
|
134 | goto continue_scan; | |||
|
135 | } | |||
|
136 | ||||
|
137 | rec = rec->next; | |||
|
138 | chain_len++; | |||
|
139 | } | |||
|
140 | ||||
|
141 | if (chain_len == index->max_chain_length) | |||
|
142 | return -1; | |||
|
143 | ||||
|
144 | /* | |||
|
145 | * This is the first time we have ever seen this particular | |||
|
146 | * element in the sequence. Construct a new chain for it. | |||
|
147 | */ | |||
|
148 | if (!(rec = xdl_cha_alloc(&index->rcha))) | |||
|
149 | return -1; | |||
|
150 | rec->ptr = ptr; | |||
|
151 | rec->cnt = 1; | |||
|
152 | rec->next = *rec_chain; | |||
|
153 | *rec_chain = rec; | |||
|
154 | LINE_MAP(index, ptr) = rec; | |||
|
155 | ||||
|
156 | continue_scan: | |||
|
157 | ; /* no op */ | |||
|
158 | } | |||
|
159 | ||||
|
160 | return 0; | |||
|
161 | } | |||
|
162 | ||||
|
163 | static int try_lcs(struct histindex *index, struct region *lcs, int b_ptr, | |||
|
164 | int line1, int count1, int line2, int count2) | |||
|
165 | { | |||
|
166 | unsigned int b_next = b_ptr + 1; | |||
|
167 | struct record *rec = index->records[TABLE_HASH(index, 2, b_ptr)]; | |||
|
168 | unsigned int as, ae, bs, be, np, rc; | |||
|
169 | int should_break; | |||
|
170 | ||||
|
171 | for (; rec; rec = rec->next) { | |||
|
172 | if (rec->cnt > index->cnt) { | |||
|
173 | if (!index->has_common) | |||
|
174 | index->has_common = CMP(index, 1, rec->ptr, 2, b_ptr); | |||
|
175 | continue; | |||
|
176 | } | |||
|
177 | ||||
|
178 | as = rec->ptr; | |||
|
179 | if (!CMP(index, 1, as, 2, b_ptr)) | |||
|
180 | continue; | |||
|
181 | ||||
|
182 | index->has_common = 1; | |||
|
183 | for (;;) { | |||
|
184 | should_break = 0; | |||
|
185 | np = NEXT_PTR(index, as); | |||
|
186 | bs = b_ptr; | |||
|
187 | ae = as; | |||
|
188 | be = bs; | |||
|
189 | rc = rec->cnt; | |||
|
190 | ||||
|
191 | while (line1 < as && line2 < bs | |||
|
192 | && CMP(index, 1, as - 1, 2, bs - 1)) { | |||
|
193 | as--; | |||
|
194 | bs--; | |||
|
195 | if (1 < rc) | |||
|
196 | rc = XDL_MIN(rc, CNT(index, as)); | |||
|
197 | } | |||
|
198 | while (ae < LINE_END(1) && be < LINE_END(2) | |||
|
199 | && CMP(index, 1, ae + 1, 2, be + 1)) { | |||
|
200 | ae++; | |||
|
201 | be++; | |||
|
202 | if (1 < rc) | |||
|
203 | rc = XDL_MIN(rc, CNT(index, ae)); | |||
|
204 | } | |||
|
205 | ||||
|
206 | if (b_next <= be) | |||
|
207 | b_next = be + 1; | |||
|
208 | if (lcs->end1 - lcs->begin1 < ae - as || rc < index->cnt) { | |||
|
209 | lcs->begin1 = as; | |||
|
210 | lcs->begin2 = bs; | |||
|
211 | lcs->end1 = ae; | |||
|
212 | lcs->end2 = be; | |||
|
213 | index->cnt = rc; | |||
|
214 | } | |||
|
215 | ||||
|
216 | if (np == 0) | |||
|
217 | break; | |||
|
218 | ||||
|
219 | while (np <= ae) { | |||
|
220 | np = NEXT_PTR(index, np); | |||
|
221 | if (np == 0) { | |||
|
222 | should_break = 1; | |||
|
223 | break; | |||
|
224 | } | |||
|
225 | } | |||
|
226 | ||||
|
227 | if (should_break) | |||
|
228 | break; | |||
|
229 | ||||
|
230 | as = np; | |||
|
231 | } | |||
|
232 | } | |||
|
233 | return b_next; | |||
|
234 | } | |||
|
235 | ||||
|
236 | static int find_lcs(struct histindex *index, struct region *lcs, | |||
|
237 | int line1, int count1, int line2, int count2) { | |||
|
238 | int b_ptr; | |||
|
239 | ||||
|
240 | if (scanA(index, line1, count1)) | |||
|
241 | return -1; | |||
|
242 | ||||
|
243 | index->cnt = index->max_chain_length + 1; | |||
|
244 | ||||
|
245 | for (b_ptr = line2; b_ptr <= LINE_END(2); ) | |||
|
246 | b_ptr = try_lcs(index, lcs, b_ptr, line1, count1, line2, count2); | |||
|
247 | ||||
|
248 | return index->has_common && index->max_chain_length < index->cnt; | |||
|
249 | } | |||
|
250 | ||||
|
251 | static int fall_back_to_classic_diff(struct histindex *index, | |||
|
252 | int line1, int count1, int line2, int count2) | |||
|
253 | { | |||
|
254 | xpparam_t xpp; | |||
|
255 | xpp.flags = index->xpp->flags & ~XDF_DIFF_ALGORITHM_MASK; | |||
|
256 | ||||
|
257 | return xdl_fall_back_diff(index->env, &xpp, | |||
|
258 | line1, count1, line2, count2); | |||
|
259 | } | |||
|
260 | ||||
|
261 | static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env, | |||
|
262 | int line1, int count1, int line2, int count2) | |||
|
263 | { | |||
|
264 | struct histindex index; | |||
|
265 | struct region lcs; | |||
|
266 | int sz; | |||
|
267 | int result = -1; | |||
|
268 | ||||
|
269 | if (count1 <= 0 && count2 <= 0) | |||
|
270 | return 0; | |||
|
271 | ||||
|
272 | if (LINE_END(1) >= MAX_PTR) | |||
|
273 | return -1; | |||
|
274 | ||||
|
275 | if (!count1) { | |||
|
276 | while(count2--) | |||
|
277 | env->xdf2.rchg[line2++ - 1] = 1; | |||
|
278 | return 0; | |||
|
279 | } else if (!count2) { | |||
|
280 | while(count1--) | |||
|
281 | env->xdf1.rchg[line1++ - 1] = 1; | |||
|
282 | return 0; | |||
|
283 | } | |||
|
284 | ||||
|
285 | memset(&index, 0, sizeof(index)); | |||
|
286 | ||||
|
287 | index.env = env; | |||
|
288 | index.xpp = xpp; | |||
|
289 | ||||
|
290 | index.records = NULL; | |||
|
291 | index.line_map = NULL; | |||
|
292 | /* in case of early xdl_cha_free() */ | |||
|
293 | index.rcha.head = NULL; | |||
|
294 | ||||
|
295 | index.table_bits = xdl_hashbits(count1); | |||
|
296 | sz = index.records_size = 1 << index.table_bits; | |||
|
297 | sz *= sizeof(struct record *); | |||
|
298 | if (!(index.records = (struct record **) xdl_malloc(sz))) | |||
|
299 | goto cleanup; | |||
|
300 | memset(index.records, 0, sz); | |||
|
301 | ||||
|
302 | sz = index.line_map_size = count1; | |||
|
303 | sz *= sizeof(struct record *); | |||
|
304 | if (!(index.line_map = (struct record **) xdl_malloc(sz))) | |||
|
305 | goto cleanup; | |||
|
306 | memset(index.line_map, 0, sz); | |||
|
307 | ||||
|
308 | sz = index.line_map_size; | |||
|
309 | sz *= sizeof(unsigned int); | |||
|
310 | if (!(index.next_ptrs = (unsigned int *) xdl_malloc(sz))) | |||
|
311 | goto cleanup; | |||
|
312 | memset(index.next_ptrs, 0, sz); | |||
|
313 | ||||
|
314 | /* lines / 4 + 1 comes from xprepare.c:xdl_prepare_ctx() */ | |||
|
315 | if (xdl_cha_init(&index.rcha, sizeof(struct record), count1 / 4 + 1) < 0) | |||
|
316 | goto cleanup; | |||
|
317 | ||||
|
318 | index.ptr_shift = line1; | |||
|
319 | index.max_chain_length = 64; | |||
|
320 | ||||
|
321 | memset(&lcs, 0, sizeof(lcs)); | |||
|
322 | if (find_lcs(&index, &lcs, line1, count1, line2, count2)) | |||
|
323 | result = fall_back_to_classic_diff(&index, line1, count1, line2, count2); | |||
|
324 | else { | |||
|
325 | if (lcs.begin1 == 0 && lcs.begin2 == 0) { | |||
|
326 | while (count1--) | |||
|
327 | env->xdf1.rchg[line1++ - 1] = 1; | |||
|
328 | while (count2--) | |||
|
329 | env->xdf2.rchg[line2++ - 1] = 1; | |||
|
330 | result = 0; | |||
|
331 | } else { | |||
|
332 | result = histogram_diff(xpp, env, | |||
|
333 | line1, lcs.begin1 - line1, | |||
|
334 | line2, lcs.begin2 - line2); | |||
|
335 | if (result) | |||
|
336 | goto cleanup; | |||
|
337 | result = histogram_diff(xpp, env, | |||
|
338 | lcs.end1 + 1, LINE_END(1) - lcs.end1, | |||
|
339 | lcs.end2 + 1, LINE_END(2) - lcs.end2); | |||
|
340 | if (result) | |||
|
341 | goto cleanup; | |||
|
342 | } | |||
|
343 | } | |||
|
344 | ||||
|
345 | cleanup: | |||
|
346 | xdl_free(index.records); | |||
|
347 | xdl_free(index.line_map); | |||
|
348 | xdl_free(index.next_ptrs); | |||
|
349 | xdl_cha_free(&index.rcha); | |||
|
350 | ||||
|
351 | return result; | |||
|
352 | } | |||
|
353 | ||||
|
354 | int xdl_do_histogram_diff(mmfile_t *file1, mmfile_t *file2, | |||
|
355 | xpparam_t const *xpp, xdfenv_t *env) | |||
|
356 | { | |||
|
357 | if (xdl_prepare_env(file1, file2, xpp, env) < 0) | |||
|
358 | return -1; | |||
|
359 | ||||
|
360 | return histogram_diff(xpp, env, | |||
|
361 | env->xdf1.dstart + 1, env->xdf1.dend - env->xdf1.dstart + 1, | |||
|
362 | env->xdf2.dstart + 1, env->xdf2.dend - env->xdf2.dstart + 1); | |||
|
363 | } |
@@ -0,0 +1,42 b'' | |||||
|
1 | /* | |||
|
2 | * LibXDiff by Davide Libenzi ( File Differential Library ) | |||
|
3 | * Copyright (C) 2003 Davide Libenzi | |||
|
4 | * | |||
|
5 | * This library is free software; you can redistribute it and/or | |||
|
6 | * modify it under the terms of the GNU Lesser General Public | |||
|
7 | * License as published by the Free Software Foundation; either | |||
|
8 | * version 2.1 of the License, or (at your option) any later version. | |||
|
9 | * | |||
|
10 | * This library is distributed in the hope that it will be useful, | |||
|
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
|
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
|
13 | * Lesser General Public License for more details. | |||
|
14 | * | |||
|
15 | * You should have received a copy of the GNU Lesser General Public | |||
|
16 | * License along with this library; if not, see | |||
|
17 | * <http://www.gnu.org/licenses/>. | |||
|
18 | * | |||
|
19 | * Davide Libenzi <davidel@xmailserver.org> | |||
|
20 | * | |||
|
21 | */ | |||
|
22 | ||||
|
23 | #if !defined(XINCLUDE_H) | |||
|
24 | #define XINCLUDE_H | |||
|
25 | ||||
|
26 | #include <ctype.h> | |||
|
27 | #include <stdio.h> | |||
|
28 | #include <stdlib.h> | |||
|
29 | #include <unistd.h> | |||
|
30 | #include <string.h> | |||
|
31 | #include <limits.h> | |||
|
32 | ||||
|
33 | #include "xmacros.h" | |||
|
34 | #include "xdiff.h" | |||
|
35 | #include "xtypes.h" | |||
|
36 | #include "xutils.h" | |||
|
37 | #include "xprepare.h" | |||
|
38 | #include "xdiffi.h" | |||
|
39 | #include "xemit.h" | |||
|
40 | ||||
|
41 | ||||
|
42 | #endif /* #if !defined(XINCLUDE_H) */ |
@@ -0,0 +1,54 b'' | |||||
|
1 | /* | |||
|
2 | * LibXDiff by Davide Libenzi ( File Differential Library ) | |||
|
3 | * Copyright (C) 2003 Davide Libenzi | |||
|
4 | * | |||
|
5 | * This library is free software; you can redistribute it and/or | |||
|
6 | * modify it under the terms of the GNU Lesser General Public | |||
|
7 | * License as published by the Free Software Foundation; either | |||
|
8 | * version 2.1 of the License, or (at your option) any later version. | |||
|
9 | * | |||
|
10 | * This library is distributed in the hope that it will be useful, | |||
|
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
|
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
|
13 | * Lesser General Public License for more details. | |||
|
14 | * | |||
|
15 | * You should have received a copy of the GNU Lesser General Public | |||
|
16 | * License along with this library; if not, see | |||
|
17 | * <http://www.gnu.org/licenses/>. | |||
|
18 | * | |||
|
19 | * Davide Libenzi <davidel@xmailserver.org> | |||
|
20 | * | |||
|
21 | */ | |||
|
22 | ||||
|
23 | #if !defined(XMACROS_H) | |||
|
24 | #define XMACROS_H | |||
|
25 | ||||
|
26 | ||||
|
27 | ||||
|
28 | ||||
|
29 | #define XDL_MIN(a, b) ((a) < (b) ? (a): (b)) | |||
|
30 | #define XDL_MAX(a, b) ((a) > (b) ? (a): (b)) | |||
|
31 | #define XDL_ABS(v) ((v) >= 0 ? (v): -(v)) | |||
|
32 | #define XDL_ISDIGIT(c) ((c) >= '0' && (c) <= '9') | |||
|
33 | #define XDL_ISSPACE(c) (isspace((unsigned char)(c))) | |||
|
34 | #define XDL_ADDBITS(v,b) ((v) + ((v) >> (b))) | |||
|
35 | #define XDL_MASKBITS(b) ((1UL << (b)) - 1) | |||
|
36 | #define XDL_HASHLONG(v,b) (XDL_ADDBITS((unsigned long)(v), b) & XDL_MASKBITS(b)) | |||
|
37 | #define XDL_PTRFREE(p) do { if (p) { xdl_free(p); (p) = NULL; } } while (0) | |||
|
38 | #define XDL_LE32_PUT(p, v) \ | |||
|
39 | do { \ | |||
|
40 | unsigned char *__p = (unsigned char *) (p); \ | |||
|
41 | *__p++ = (unsigned char) (v); \ | |||
|
42 | *__p++ = (unsigned char) ((v) >> 8); \ | |||
|
43 | *__p++ = (unsigned char) ((v) >> 16); \ | |||
|
44 | *__p = (unsigned char) ((v) >> 24); \ | |||
|
45 | } while (0) | |||
|
46 | #define XDL_LE32_GET(p, v) \ | |||
|
47 | do { \ | |||
|
48 | unsigned char const *__p = (unsigned char const *) (p); \ | |||
|
49 | (v) = (unsigned long) __p[0] | ((unsigned long) __p[1]) << 8 | \ | |||
|
50 | ((unsigned long) __p[2]) << 16 | ((unsigned long) __p[3]) << 24; \ | |||
|
51 | } while (0) | |||
|
52 | ||||
|
53 | ||||
|
54 | #endif /* #if !defined(XMACROS_H) */ |
This diff has been collapsed as it changes many lines, (686 lines changed) Show them Hide them | |||||
@@ -0,0 +1,686 b'' | |||||
|
1 | /* | |||
|
2 | * LibXDiff by Davide Libenzi ( File Differential Library ) | |||
|
3 | * Copyright (C) 2003-2006 Davide Libenzi, Johannes E. Schindelin | |||
|
4 | * | |||
|
5 | * This library is free software; you can redistribute it and/or | |||
|
6 | * modify it under the terms of the GNU Lesser General Public | |||
|
7 | * License as published by the Free Software Foundation; either | |||
|
8 | * version 2.1 of the License, or (at your option) any later version. | |||
|
9 | * | |||
|
10 | * This library is distributed in the hope that it will be useful, | |||
|
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
|
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
|
13 | * Lesser General Public License for more details. | |||
|
14 | * | |||
|
15 | * You should have received a copy of the GNU Lesser General Public | |||
|
16 | * License along with this library; if not, see | |||
|
17 | * <http://www.gnu.org/licenses/>. | |||
|
18 | * | |||
|
19 | * Davide Libenzi <davidel@xmailserver.org> | |||
|
20 | * | |||
|
21 | */ | |||
|
22 | ||||
|
23 | #include "xinclude.h" | |||
|
24 | ||||
|
25 | typedef struct s_xdmerge { | |||
|
26 | struct s_xdmerge *next; | |||
|
27 | /* | |||
|
28 | * 0 = conflict, | |||
|
29 | * 1 = no conflict, take first, | |||
|
30 | * 2 = no conflict, take second. | |||
|
31 | * 3 = no conflict, take both. | |||
|
32 | */ | |||
|
33 | int mode; | |||
|
34 | /* | |||
|
35 | * These point at the respective postimages. E.g. <i1,chg1> is | |||
|
36 | * how side #1 wants to change the common ancestor; if there is no | |||
|
37 | * overlap, lines before i1 in the postimage of side #1 appear | |||
|
38 | * in the merge result as a region touched by neither side. | |||
|
39 | */ | |||
|
40 | long i1, i2; | |||
|
41 | long chg1, chg2; | |||
|
42 | /* | |||
|
43 | * These point at the preimage; of course there is just one | |||
|
44 | * preimage, that is from the shared common ancestor. | |||
|
45 | */ | |||
|
46 | long i0; | |||
|
47 | long chg0; | |||
|
48 | } xdmerge_t; | |||
|
49 | ||||
|
50 | static int xdl_append_merge(xdmerge_t **merge, int mode, | |||
|
51 | long i0, long chg0, | |||
|
52 | long i1, long chg1, | |||
|
53 | long i2, long chg2) | |||
|
54 | { | |||
|
55 | xdmerge_t *m = *merge; | |||
|
56 | if (m && (i1 <= m->i1 + m->chg1 || i2 <= m->i2 + m->chg2)) { | |||
|
57 | if (mode != m->mode) | |||
|
58 | m->mode = 0; | |||
|
59 | m->chg0 = i0 + chg0 - m->i0; | |||
|
60 | m->chg1 = i1 + chg1 - m->i1; | |||
|
61 | m->chg2 = i2 + chg2 - m->i2; | |||
|
62 | } else { | |||
|
63 | m = xdl_malloc(sizeof(xdmerge_t)); | |||
|
64 | if (!m) | |||
|
65 | return -1; | |||
|
66 | m->next = NULL; | |||
|
67 | m->mode = mode; | |||
|
68 | m->i0 = i0; | |||
|
69 | m->chg0 = chg0; | |||
|
70 | m->i1 = i1; | |||
|
71 | m->chg1 = chg1; | |||
|
72 | m->i2 = i2; | |||
|
73 | m->chg2 = chg2; | |||
|
74 | if (*merge) | |||
|
75 | (*merge)->next = m; | |||
|
76 | *merge = m; | |||
|
77 | } | |||
|
78 | return 0; | |||
|
79 | } | |||
|
80 | ||||
|
81 | static int xdl_cleanup_merge(xdmerge_t *c) | |||
|
82 | { | |||
|
83 | int count = 0; | |||
|
84 | xdmerge_t *next_c; | |||
|
85 | ||||
|
86 | /* were there conflicts? */ | |||
|
87 | for (; c; c = next_c) { | |||
|
88 | if (c->mode == 0) | |||
|
89 | count++; | |||
|
90 | next_c = c->next; | |||
|
91 | free(c); | |||
|
92 | } | |||
|
93 | return count; | |||
|
94 | } | |||
|
95 | ||||
|
96 | static int xdl_merge_cmp_lines(xdfenv_t *xe1, int i1, xdfenv_t *xe2, int i2, | |||
|
97 | int line_count, long flags) | |||
|
98 | { | |||
|
99 | int i; | |||
|
100 | xrecord_t **rec1 = xe1->xdf2.recs + i1; | |||
|
101 | xrecord_t **rec2 = xe2->xdf2.recs + i2; | |||
|
102 | ||||
|
103 | for (i = 0; i < line_count; i++) { | |||
|
104 | int result = xdl_recmatch(rec1[i]->ptr, rec1[i]->size, | |||
|
105 | rec2[i]->ptr, rec2[i]->size, flags); | |||
|
106 | if (!result) | |||
|
107 | return -1; | |||
|
108 | } | |||
|
109 | return 0; | |||
|
110 | } | |||
|
111 | ||||
|
112 | static int xdl_recs_copy_0(int use_orig, xdfenv_t *xe, int i, int count, int needs_cr, int add_nl, char *dest) | |||
|
113 | { | |||
|
114 | xrecord_t **recs; | |||
|
115 | int size = 0; | |||
|
116 | ||||
|
117 | recs = (use_orig ? xe->xdf1.recs : xe->xdf2.recs) + i; | |||
|
118 | ||||
|
119 | if (count < 1) | |||
|
120 | return 0; | |||
|
121 | ||||
|
122 | for (i = 0; i < count; size += recs[i++]->size) | |||
|
123 | if (dest) | |||
|
124 | memcpy(dest + size, recs[i]->ptr, recs[i]->size); | |||
|
125 | if (add_nl) { | |||
|
126 | i = recs[count - 1]->size; | |||
|
127 | if (i == 0 || recs[count - 1]->ptr[i - 1] != '\n') { | |||
|
128 | if (needs_cr) { | |||
|
129 | if (dest) | |||
|
130 | dest[size] = '\r'; | |||
|
131 | size++; | |||
|
132 | } | |||
|
133 | ||||
|
134 | if (dest) | |||
|
135 | dest[size] = '\n'; | |||
|
136 | size++; | |||
|
137 | } | |||
|
138 | } | |||
|
139 | return size; | |||
|
140 | } | |||
|
141 | ||||
|
142 | static int xdl_recs_copy(xdfenv_t *xe, int i, int count, int needs_cr, int add_nl, char *dest) | |||
|
143 | { | |||
|
144 | return xdl_recs_copy_0(0, xe, i, count, needs_cr, add_nl, dest); | |||
|
145 | } | |||
|
146 | ||||
|
147 | static int xdl_orig_copy(xdfenv_t *xe, int i, int count, int needs_cr, int add_nl, char *dest) | |||
|
148 | { | |||
|
149 | return xdl_recs_copy_0(1, xe, i, count, needs_cr, add_nl, dest); | |||
|
150 | } | |||
|
151 | ||||
|
152 | /* | |||
|
153 | * Returns 1 if the i'th line ends in CR/LF (if it is the last line and | |||
|
154 | * has no eol, the preceding line, if any), 0 if it ends in LF-only, and | |||
|
155 | * -1 if the line ending cannot be determined. | |||
|
156 | */ | |||
|
157 | static int is_eol_crlf(xdfile_t *file, int i) | |||
|
158 | { | |||
|
159 | long size; | |||
|
160 | ||||
|
161 | if (i < file->nrec - 1) | |||
|
162 | /* All lines before the last *must* end in LF */ | |||
|
163 | return (size = file->recs[i]->size) > 1 && | |||
|
164 | file->recs[i]->ptr[size - 2] == '\r'; | |||
|
165 | if (!file->nrec) | |||
|
166 | /* Cannot determine eol style from empty file */ | |||
|
167 | return -1; | |||
|
168 | if ((size = file->recs[i]->size) && | |||
|
169 | file->recs[i]->ptr[size - 1] == '\n') | |||
|
170 | /* Last line; ends in LF; Is it CR/LF? */ | |||
|
171 | return size > 1 && | |||
|
172 | file->recs[i]->ptr[size - 2] == '\r'; | |||
|
173 | if (!i) | |||
|
174 | /* The only line has no eol */ | |||
|
175 | return -1; | |||
|
176 | /* Determine eol from second-to-last line */ | |||
|
177 | return (size = file->recs[i - 1]->size) > 1 && | |||
|
178 | file->recs[i - 1]->ptr[size - 2] == '\r'; | |||
|
179 | } | |||
|
180 | ||||
|
181 | static int is_cr_needed(xdfenv_t *xe1, xdfenv_t *xe2, xdmerge_t *m) | |||
|
182 | { | |||
|
183 | int needs_cr; | |||
|
184 | ||||
|
185 | /* Match post-images' preceding, or first, lines' end-of-line style */ | |||
|
186 | needs_cr = is_eol_crlf(&xe1->xdf2, m->i1 ? m->i1 - 1 : 0); | |||
|
187 | if (needs_cr) | |||
|
188 | needs_cr = is_eol_crlf(&xe2->xdf2, m->i2 ? m->i2 - 1 : 0); | |||
|
189 | /* Look at pre-image's first line, unless we already settled on LF */ | |||
|
190 | if (needs_cr) | |||
|
191 | needs_cr = is_eol_crlf(&xe1->xdf1, 0); | |||
|
192 | /* If still undecided, use LF-only */ | |||
|
193 | return needs_cr < 0 ? 0 : needs_cr; | |||
|
194 | } | |||
|
195 | ||||
|
196 | static int fill_conflict_hunk(xdfenv_t *xe1, const char *name1, | |||
|
197 | xdfenv_t *xe2, const char *name2, | |||
|
198 | const char *name3, | |||
|
199 | int size, int i, int style, | |||
|
200 | xdmerge_t *m, char *dest, int marker_size) | |||
|
201 | { | |||
|
202 | int marker1_size = (name1 ? strlen(name1) + 1 : 0); | |||
|
203 | int marker2_size = (name2 ? strlen(name2) + 1 : 0); | |||
|
204 | int marker3_size = (name3 ? strlen(name3) + 1 : 0); | |||
|
205 | int needs_cr = is_cr_needed(xe1, xe2, m); | |||
|
206 | ||||
|
207 | if (marker_size <= 0) | |||
|
208 | marker_size = DEFAULT_CONFLICT_MARKER_SIZE; | |||
|
209 | ||||
|
210 | /* Before conflicting part */ | |||
|
211 | size += xdl_recs_copy(xe1, i, m->i1 - i, 0, 0, | |||
|
212 | dest ? dest + size : NULL); | |||
|
213 | ||||
|
214 | if (!dest) { | |||
|
215 | size += marker_size + 1 + needs_cr + marker1_size; | |||
|
216 | } else { | |||
|
217 | memset(dest + size, '<', marker_size); | |||
|
218 | size += marker_size; | |||
|
219 | if (marker1_size) { | |||
|
220 | dest[size] = ' '; | |||
|
221 | memcpy(dest + size + 1, name1, marker1_size - 1); | |||
|
222 | size += marker1_size; | |||
|
223 | } | |||
|
224 | if (needs_cr) | |||
|
225 | dest[size++] = '\r'; | |||
|
226 | dest[size++] = '\n'; | |||
|
227 | } | |||
|
228 | ||||
|
229 | /* Postimage from side #1 */ | |||
|
230 | size += xdl_recs_copy(xe1, m->i1, m->chg1, needs_cr, 1, | |||
|
231 | dest ? dest + size : NULL); | |||
|
232 | ||||
|
233 | if (style == XDL_MERGE_DIFF3) { | |||
|
234 | /* Shared preimage */ | |||
|
235 | if (!dest) { | |||
|
236 | size += marker_size + 1 + needs_cr + marker3_size; | |||
|
237 | } else { | |||
|
238 | memset(dest + size, '|', marker_size); | |||
|
239 | size += marker_size; | |||
|
240 | if (marker3_size) { | |||
|
241 | dest[size] = ' '; | |||
|
242 | memcpy(dest + size + 1, name3, marker3_size - 1); | |||
|
243 | size += marker3_size; | |||
|
244 | } | |||
|
245 | if (needs_cr) | |||
|
246 | dest[size++] = '\r'; | |||
|
247 | dest[size++] = '\n'; | |||
|
248 | } | |||
|
249 | size += xdl_orig_copy(xe1, m->i0, m->chg0, needs_cr, 1, | |||
|
250 | dest ? dest + size : NULL); | |||
|
251 | } | |||
|
252 | ||||
|
253 | if (!dest) { | |||
|
254 | size += marker_size + 1 + needs_cr; | |||
|
255 | } else { | |||
|
256 | memset(dest + size, '=', marker_size); | |||
|
257 | size += marker_size; | |||
|
258 | if (needs_cr) | |||
|
259 | dest[size++] = '\r'; | |||
|
260 | dest[size++] = '\n'; | |||
|
261 | } | |||
|
262 | ||||
|
263 | /* Postimage from side #2 */ | |||
|
264 | size += xdl_recs_copy(xe2, m->i2, m->chg2, needs_cr, 1, | |||
|
265 | dest ? dest + size : NULL); | |||
|
266 | if (!dest) { | |||
|
267 | size += marker_size + 1 + needs_cr + marker2_size; | |||
|
268 | } else { | |||
|
269 | memset(dest + size, '>', marker_size); | |||
|
270 | size += marker_size; | |||
|
271 | if (marker2_size) { | |||
|
272 | dest[size] = ' '; | |||
|
273 | memcpy(dest + size + 1, name2, marker2_size - 1); | |||
|
274 | size += marker2_size; | |||
|
275 | } | |||
|
276 | if (needs_cr) | |||
|
277 | dest[size++] = '\r'; | |||
|
278 | dest[size++] = '\n'; | |||
|
279 | } | |||
|
280 | return size; | |||
|
281 | } | |||
|
282 | ||||
|
283 | static int xdl_fill_merge_buffer(xdfenv_t *xe1, const char *name1, | |||
|
284 | xdfenv_t *xe2, const char *name2, | |||
|
285 | const char *ancestor_name, | |||
|
286 | int favor, | |||
|
287 | xdmerge_t *m, char *dest, int style, | |||
|
288 | int marker_size) | |||
|
289 | { | |||
|
290 | int size, i; | |||
|
291 | ||||
|
292 | for (size = i = 0; m; m = m->next) { | |||
|
293 | if (favor && !m->mode) | |||
|
294 | m->mode = favor; | |||
|
295 | ||||
|
296 | if (m->mode == 0) | |||
|
297 | size = fill_conflict_hunk(xe1, name1, xe2, name2, | |||
|
298 | ancestor_name, | |||
|
299 | size, i, style, m, dest, | |||
|
300 | marker_size); | |||
|
301 | else if (m->mode & 3) { | |||
|
302 | /* Before conflicting part */ | |||
|
303 | size += xdl_recs_copy(xe1, i, m->i1 - i, 0, 0, | |||
|
304 | dest ? dest + size : NULL); | |||
|
305 | /* Postimage from side #1 */ | |||
|
306 | if (m->mode & 1) { | |||
|
307 | int needs_cr = is_cr_needed(xe1, xe2, m); | |||
|
308 | ||||
|
309 | size += xdl_recs_copy(xe1, m->i1, m->chg1, needs_cr, (m->mode & 2), | |||
|
310 | dest ? dest + size : NULL); | |||
|
311 | } | |||
|
312 | /* Postimage from side #2 */ | |||
|
313 | if (m->mode & 2) | |||
|
314 | size += xdl_recs_copy(xe2, m->i2, m->chg2, 0, 0, | |||
|
315 | dest ? dest + size : NULL); | |||
|
316 | } else | |||
|
317 | continue; | |||
|
318 | i = m->i1 + m->chg1; | |||
|
319 | } | |||
|
320 | size += xdl_recs_copy(xe1, i, xe1->xdf2.nrec - i, 0, 0, | |||
|
321 | dest ? dest + size : NULL); | |||
|
322 | return size; | |||
|
323 | } | |||
|
324 | ||||
|
325 | /* | |||
|
326 | * Sometimes, changes are not quite identical, but differ in only a few | |||
|
327 | * lines. Try hard to show only these few lines as conflicting. | |||
|
328 | */ | |||
|
329 | static int xdl_refine_conflicts(xdfenv_t *xe1, xdfenv_t *xe2, xdmerge_t *m, | |||
|
330 | xpparam_t const *xpp) | |||
|
331 | { | |||
|
332 | for (; m; m = m->next) { | |||
|
333 | mmfile_t t1, t2; | |||
|
334 | xdfenv_t xe; | |||
|
335 | xdchange_t *xscr, *x; | |||
|
336 | int i1 = m->i1, i2 = m->i2; | |||
|
337 | ||||
|
338 | /* let's handle just the conflicts */ | |||
|
339 | if (m->mode) | |||
|
340 | continue; | |||
|
341 | ||||
|
342 | /* no sense refining a conflict when one side is empty */ | |||
|
343 | if (m->chg1 == 0 || m->chg2 == 0) | |||
|
344 | continue; | |||
|
345 | ||||
|
346 | /* | |||
|
347 | * This probably does not work outside git, since | |||
|
348 | * we have a very simple mmfile structure. | |||
|
349 | */ | |||
|
350 | t1.ptr = (char *)xe1->xdf2.recs[m->i1]->ptr; | |||
|
351 | t1.size = xe1->xdf2.recs[m->i1 + m->chg1 - 1]->ptr | |||
|
352 | + xe1->xdf2.recs[m->i1 + m->chg1 - 1]->size - t1.ptr; | |||
|
353 | t2.ptr = (char *)xe2->xdf2.recs[m->i2]->ptr; | |||
|
354 | t2.size = xe2->xdf2.recs[m->i2 + m->chg2 - 1]->ptr | |||
|
355 | + xe2->xdf2.recs[m->i2 + m->chg2 - 1]->size - t2.ptr; | |||
|
356 | if (xdl_do_diff(&t1, &t2, xpp, &xe) < 0) | |||
|
357 | return -1; | |||
|
358 | if (xdl_change_compact(&xe.xdf1, &xe.xdf2, xpp->flags) < 0 || | |||
|
359 | xdl_change_compact(&xe.xdf2, &xe.xdf1, xpp->flags) < 0 || | |||
|
360 | xdl_build_script(&xe, &xscr) < 0) { | |||
|
361 | xdl_free_env(&xe); | |||
|
362 | return -1; | |||
|
363 | } | |||
|
364 | if (!xscr) { | |||
|
365 | /* If this happens, the changes are identical. */ | |||
|
366 | xdl_free_env(&xe); | |||
|
367 | m->mode = 4; | |||
|
368 | continue; | |||
|
369 | } | |||
|
370 | x = xscr; | |||
|
371 | m->i1 = xscr->i1 + i1; | |||
|
372 | m->chg1 = xscr->chg1; | |||
|
373 | m->i2 = xscr->i2 + i2; | |||
|
374 | m->chg2 = xscr->chg2; | |||
|
375 | while (xscr->next) { | |||
|
376 | xdmerge_t *m2 = xdl_malloc(sizeof(xdmerge_t)); | |||
|
377 | if (!m2) { | |||
|
378 | xdl_free_env(&xe); | |||
|
379 | xdl_free_script(x); | |||
|
380 | return -1; | |||
|
381 | } | |||
|
382 | xscr = xscr->next; | |||
|
383 | m2->next = m->next; | |||
|
384 | m->next = m2; | |||
|
385 | m = m2; | |||
|
386 | m->mode = 0; | |||
|
387 | m->i1 = xscr->i1 + i1; | |||
|
388 | m->chg1 = xscr->chg1; | |||
|
389 | m->i2 = xscr->i2 + i2; | |||
|
390 | m->chg2 = xscr->chg2; | |||
|
391 | } | |||
|
392 | xdl_free_env(&xe); | |||
|
393 | xdl_free_script(x); | |||
|
394 | } | |||
|
395 | return 0; | |||
|
396 | } | |||
|
397 | ||||
|
398 | static int line_contains_alnum(const char *ptr, long size) | |||
|
399 | { | |||
|
400 | while (size--) | |||
|
401 | if (isalnum((unsigned char)*(ptr++))) | |||
|
402 | return 1; | |||
|
403 | return 0; | |||
|
404 | } | |||
|
405 | ||||
|
406 | static int lines_contain_alnum(xdfenv_t *xe, int i, int chg) | |||
|
407 | { | |||
|
408 | for (; chg; chg--, i++) | |||
|
409 | if (line_contains_alnum(xe->xdf2.recs[i]->ptr, | |||
|
410 | xe->xdf2.recs[i]->size)) | |||
|
411 | return 1; | |||
|
412 | return 0; | |||
|
413 | } | |||
|
414 | ||||
|
415 | /* | |||
|
416 | * This function merges m and m->next, marking everything between those hunks | |||
|
417 | * as conflicting, too. | |||
|
418 | */ | |||
|
419 | static void xdl_merge_two_conflicts(xdmerge_t *m) | |||
|
420 | { | |||
|
421 | xdmerge_t *next_m = m->next; | |||
|
422 | m->chg1 = next_m->i1 + next_m->chg1 - m->i1; | |||
|
423 | m->chg2 = next_m->i2 + next_m->chg2 - m->i2; | |||
|
424 | m->next = next_m->next; | |||
|
425 | free(next_m); | |||
|
426 | } | |||
|
427 | ||||
|
428 | /* | |||
|
429 | * If there are less than 3 non-conflicting lines between conflicts, | |||
|
430 | * it appears simpler -- because it takes up less (or as many) lines -- | |||
|
431 | * if the lines are moved into the conflicts. | |||
|
432 | */ | |||
|
433 | static int xdl_simplify_non_conflicts(xdfenv_t *xe1, xdmerge_t *m, | |||
|
434 | int simplify_if_no_alnum) | |||
|
435 | { | |||
|
436 | int result = 0; | |||
|
437 | ||||
|
438 | if (!m) | |||
|
439 | return result; | |||
|
440 | for (;;) { | |||
|
441 | xdmerge_t *next_m = m->next; | |||
|
442 | int begin, end; | |||
|
443 | ||||
|
444 | if (!next_m) | |||
|
445 | return result; | |||
|
446 | ||||
|
447 | begin = m->i1 + m->chg1; | |||
|
448 | end = next_m->i1; | |||
|
449 | ||||
|
450 | if (m->mode != 0 || next_m->mode != 0 || | |||
|
451 | (end - begin > 3 && | |||
|
452 | (!simplify_if_no_alnum || | |||
|
453 | lines_contain_alnum(xe1, begin, end - begin)))) { | |||
|
454 | m = next_m; | |||
|
455 | } else { | |||
|
456 | result++; | |||
|
457 | xdl_merge_two_conflicts(m); | |||
|
458 | } | |||
|
459 | } | |||
|
460 | } | |||
|
461 | ||||
|
462 | /* | |||
|
463 | * level == 0: mark all overlapping changes as conflict | |||
|
464 | * level == 1: mark overlapping changes as conflict only if not identical | |||
|
465 | * level == 2: analyze non-identical changes for minimal conflict set | |||
|
466 | * level == 3: analyze non-identical changes for minimal conflict set, but | |||
|
467 | * treat hunks not containing any letter or number as conflicting | |||
|
468 | * | |||
|
469 | * returns < 0 on error, == 0 for no conflicts, else number of conflicts | |||
|
470 | */ | |||
|
471 | static int xdl_do_merge(xdfenv_t *xe1, xdchange_t *xscr1, | |||
|
472 | xdfenv_t *xe2, xdchange_t *xscr2, | |||
|
473 | xmparam_t const *xmp, mmbuffer_t *result) | |||
|
474 | { | |||
|
475 | xdmerge_t *changes, *c; | |||
|
476 | xpparam_t const *xpp = &xmp->xpp; | |||
|
477 | const char *const ancestor_name = xmp->ancestor; | |||
|
478 | const char *const name1 = xmp->file1; | |||
|
479 | const char *const name2 = xmp->file2; | |||
|
480 | int i0, i1, i2, chg0, chg1, chg2; | |||
|
481 | int level = xmp->level; | |||
|
482 | int style = xmp->style; | |||
|
483 | int favor = xmp->favor; | |||
|
484 | ||||
|
485 | if (style == XDL_MERGE_DIFF3) { | |||
|
486 | /* | |||
|
487 | * "diff3 -m" output does not make sense for anything | |||
|
488 | * more aggressive than XDL_MERGE_EAGER. | |||
|
489 | */ | |||
|
490 | if (XDL_MERGE_EAGER < level) | |||
|
491 | level = XDL_MERGE_EAGER; | |||
|
492 | } | |||
|
493 | ||||
|
494 | c = changes = NULL; | |||
|
495 | ||||
|
496 | while (xscr1 && xscr2) { | |||
|
497 | if (!changes) | |||
|
498 | changes = c; | |||
|
499 | if (xscr1->i1 + xscr1->chg1 < xscr2->i1) { | |||
|
500 | i0 = xscr1->i1; | |||
|
501 | i1 = xscr1->i2; | |||
|
502 | i2 = xscr2->i2 - xscr2->i1 + xscr1->i1; | |||
|
503 | chg0 = xscr1->chg1; | |||
|
504 | chg1 = xscr1->chg2; | |||
|
505 | chg2 = xscr1->chg1; | |||
|
506 | if (xdl_append_merge(&c, 1, | |||
|
507 | i0, chg0, i1, chg1, i2, chg2)) { | |||
|
508 | xdl_cleanup_merge(changes); | |||
|
509 | return -1; | |||
|
510 | } | |||
|
511 | xscr1 = xscr1->next; | |||
|
512 | continue; | |||
|
513 | } | |||
|
514 | if (xscr2->i1 + xscr2->chg1 < xscr1->i1) { | |||
|
515 | i0 = xscr2->i1; | |||
|
516 | i1 = xscr1->i2 - xscr1->i1 + xscr2->i1; | |||
|
517 | i2 = xscr2->i2; | |||
|
518 | chg0 = xscr2->chg1; | |||
|
519 | chg1 = xscr2->chg1; | |||
|
520 | chg2 = xscr2->chg2; | |||
|
521 | if (xdl_append_merge(&c, 2, | |||
|
522 | i0, chg0, i1, chg1, i2, chg2)) { | |||
|
523 | xdl_cleanup_merge(changes); | |||
|
524 | return -1; | |||
|
525 | } | |||
|
526 | xscr2 = xscr2->next; | |||
|
527 | continue; | |||
|
528 | } | |||
|
529 | if (level == XDL_MERGE_MINIMAL || xscr1->i1 != xscr2->i1 || | |||
|
530 | xscr1->chg1 != xscr2->chg1 || | |||
|
531 | xscr1->chg2 != xscr2->chg2 || | |||
|
532 | xdl_merge_cmp_lines(xe1, xscr1->i2, | |||
|
533 | xe2, xscr2->i2, | |||
|
534 | xscr1->chg2, xpp->flags)) { | |||
|
535 | /* conflict */ | |||
|
536 | int off = xscr1->i1 - xscr2->i1; | |||
|
537 | int ffo = off + xscr1->chg1 - xscr2->chg1; | |||
|
538 | ||||
|
539 | i0 = xscr1->i1; | |||
|
540 | i1 = xscr1->i2; | |||
|
541 | i2 = xscr2->i2; | |||
|
542 | if (off > 0) { | |||
|
543 | i0 -= off; | |||
|
544 | i1 -= off; | |||
|
545 | } | |||
|
546 | else | |||
|
547 | i2 += off; | |||
|
548 | chg0 = xscr1->i1 + xscr1->chg1 - i0; | |||
|
549 | chg1 = xscr1->i2 + xscr1->chg2 - i1; | |||
|
550 | chg2 = xscr2->i2 + xscr2->chg2 - i2; | |||
|
551 | if (ffo < 0) { | |||
|
552 | chg0 -= ffo; | |||
|
553 | chg1 -= ffo; | |||
|
554 | } else | |||
|
555 | chg2 += ffo; | |||
|
556 | if (xdl_append_merge(&c, 0, | |||
|
557 | i0, chg0, i1, chg1, i2, chg2)) { | |||
|
558 | xdl_cleanup_merge(changes); | |||
|
559 | return -1; | |||
|
560 | } | |||
|
561 | } | |||
|
562 | ||||
|
563 | i1 = xscr1->i1 + xscr1->chg1; | |||
|
564 | i2 = xscr2->i1 + xscr2->chg1; | |||
|
565 | ||||
|
566 | if (i1 >= i2) | |||
|
567 | xscr2 = xscr2->next; | |||
|
568 | if (i2 >= i1) | |||
|
569 | xscr1 = xscr1->next; | |||
|
570 | } | |||
|
571 | while (xscr1) { | |||
|
572 | if (!changes) | |||
|
573 | changes = c; | |||
|
574 | i0 = xscr1->i1; | |||
|
575 | i1 = xscr1->i2; | |||
|
576 | i2 = xscr1->i1 + xe2->xdf2.nrec - xe2->xdf1.nrec; | |||
|
577 | chg0 = xscr1->chg1; | |||
|
578 | chg1 = xscr1->chg2; | |||
|
579 | chg2 = xscr1->chg1; | |||
|
580 | if (xdl_append_merge(&c, 1, | |||
|
581 | i0, chg0, i1, chg1, i2, chg2)) { | |||
|
582 | xdl_cleanup_merge(changes); | |||
|
583 | return -1; | |||
|
584 | } | |||
|
585 | xscr1 = xscr1->next; | |||
|
586 | } | |||
|
587 | while (xscr2) { | |||
|
588 | if (!changes) | |||
|
589 | changes = c; | |||
|
590 | i0 = xscr2->i1; | |||
|
591 | i1 = xscr2->i1 + xe1->xdf2.nrec - xe1->xdf1.nrec; | |||
|
592 | i2 = xscr2->i2; | |||
|
593 | chg0 = xscr2->chg1; | |||
|
594 | chg1 = xscr2->chg1; | |||
|
595 | chg2 = xscr2->chg2; | |||
|
596 | if (xdl_append_merge(&c, 2, | |||
|
597 | i0, chg0, i1, chg1, i2, chg2)) { | |||
|
598 | xdl_cleanup_merge(changes); | |||
|
599 | return -1; | |||
|
600 | } | |||
|
601 | xscr2 = xscr2->next; | |||
|
602 | } | |||
|
603 | if (!changes) | |||
|
604 | changes = c; | |||
|
605 | /* refine conflicts */ | |||
|
606 | if (XDL_MERGE_ZEALOUS <= level && | |||
|
607 | (xdl_refine_conflicts(xe1, xe2, changes, xpp) < 0 || | |||
|
608 | xdl_simplify_non_conflicts(xe1, changes, | |||
|
609 | XDL_MERGE_ZEALOUS < level) < 0)) { | |||
|
610 | xdl_cleanup_merge(changes); | |||
|
611 | return -1; | |||
|
612 | } | |||
|
613 | /* output */ | |||
|
614 | if (result) { | |||
|
615 | int marker_size = xmp->marker_size; | |||
|
616 | int size = xdl_fill_merge_buffer(xe1, name1, xe2, name2, | |||
|
617 | ancestor_name, | |||
|
618 | favor, changes, NULL, style, | |||
|
619 | marker_size); | |||
|
620 | result->ptr = xdl_malloc(size); | |||
|
621 | if (!result->ptr) { | |||
|
622 | xdl_cleanup_merge(changes); | |||
|
623 | return -1; | |||
|
624 | } | |||
|
625 | result->size = size; | |||
|
626 | xdl_fill_merge_buffer(xe1, name1, xe2, name2, | |||
|
627 | ancestor_name, favor, changes, | |||
|
628 | result->ptr, style, marker_size); | |||
|
629 | } | |||
|
630 | return xdl_cleanup_merge(changes); | |||
|
631 | } | |||
|
632 | ||||
|
633 | int xdl_merge(mmfile_t *orig, mmfile_t *mf1, mmfile_t *mf2, | |||
|
634 | xmparam_t const *xmp, mmbuffer_t *result) | |||
|
635 | { | |||
|
636 | xdchange_t *xscr1, *xscr2; | |||
|
637 | xdfenv_t xe1, xe2; | |||
|
638 | int status; | |||
|
639 | xpparam_t const *xpp = &xmp->xpp; | |||
|
640 | ||||
|
641 | result->ptr = NULL; | |||
|
642 | result->size = 0; | |||
|
643 | ||||
|
644 | if (xdl_do_diff(orig, mf1, xpp, &xe1) < 0) { | |||
|
645 | return -1; | |||
|
646 | } | |||
|
647 | if (xdl_do_diff(orig, mf2, xpp, &xe2) < 0) { | |||
|
648 | xdl_free_env(&xe1); | |||
|
649 | return -1; | |||
|
650 | } | |||
|
651 | if (xdl_change_compact(&xe1.xdf1, &xe1.xdf2, xpp->flags) < 0 || | |||
|
652 | xdl_change_compact(&xe1.xdf2, &xe1.xdf1, xpp->flags) < 0 || | |||
|
653 | xdl_build_script(&xe1, &xscr1) < 0) { | |||
|
654 | xdl_free_env(&xe1); | |||
|
655 | return -1; | |||
|
656 | } | |||
|
657 | if (xdl_change_compact(&xe2.xdf1, &xe2.xdf2, xpp->flags) < 0 || | |||
|
658 | xdl_change_compact(&xe2.xdf2, &xe2.xdf1, xpp->flags) < 0 || | |||
|
659 | xdl_build_script(&xe2, &xscr2) < 0) { | |||
|
660 | xdl_free_script(xscr1); | |||
|
661 | xdl_free_env(&xe1); | |||
|
662 | xdl_free_env(&xe2); | |||
|
663 | return -1; | |||
|
664 | } | |||
|
665 | status = 0; | |||
|
666 | if (!xscr1) { | |||
|
667 | result->ptr = xdl_malloc(mf2->size); | |||
|
668 | memcpy(result->ptr, mf2->ptr, mf2->size); | |||
|
669 | result->size = mf2->size; | |||
|
670 | } else if (!xscr2) { | |||
|
671 | result->ptr = xdl_malloc(mf1->size); | |||
|
672 | memcpy(result->ptr, mf1->ptr, mf1->size); | |||
|
673 | result->size = mf1->size; | |||
|
674 | } else { | |||
|
675 | status = xdl_do_merge(&xe1, xscr1, | |||
|
676 | &xe2, xscr2, | |||
|
677 | xmp, result); | |||
|
678 | } | |||
|
679 | xdl_free_script(xscr1); | |||
|
680 | xdl_free_script(xscr2); | |||
|
681 | ||||
|
682 | xdl_free_env(&xe1); | |||
|
683 | xdl_free_env(&xe2); | |||
|
684 | ||||
|
685 | return status; | |||
|
686 | } |
@@ -0,0 +1,390 b'' | |||||
|
1 | /* | |||
|
2 | * LibXDiff by Davide Libenzi ( File Differential Library ) | |||
|
3 | * Copyright (C) 2003-2016 Davide Libenzi, Johannes E. Schindelin | |||
|
4 | * | |||
|
5 | * This library is free software; you can redistribute it and/or | |||
|
6 | * modify it under the terms of the GNU Lesser General Public | |||
|
7 | * License as published by the Free Software Foundation; either | |||
|
8 | * version 2.1 of the License, or (at your option) any later version. | |||
|
9 | * | |||
|
10 | * This library is distributed in the hope that it will be useful, | |||
|
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
|
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
|
13 | * Lesser General Public License for more details. | |||
|
14 | * | |||
|
15 | * You should have received a copy of the GNU Lesser General Public | |||
|
16 | * License along with this library; if not, see | |||
|
17 | * <http://www.gnu.org/licenses/>. | |||
|
18 | * | |||
|
19 | * Davide Libenzi <davidel@xmailserver.org> | |||
|
20 | * | |||
|
21 | */ | |||
|
22 | #include "xinclude.h" | |||
|
23 | #include "xtypes.h" | |||
|
24 | #include "xdiff.h" | |||
|
25 | ||||
|
26 | /* | |||
|
27 | * The basic idea of patience diff is to find lines that are unique in | |||
|
28 | * both files. These are intuitively the ones that we want to see as | |||
|
29 | * common lines. | |||
|
30 | * | |||
|
31 | * The maximal ordered sequence of such line pairs (where ordered means | |||
|
32 | * that the order in the sequence agrees with the order of the lines in | |||
|
33 | * both files) naturally defines an initial set of common lines. | |||
|
34 | * | |||
|
35 | * Now, the algorithm tries to extend the set of common lines by growing | |||
|
36 | * the line ranges where the files have identical lines. | |||
|
37 | * | |||
|
38 | * Between those common lines, the patience diff algorithm is applied | |||
|
39 | * recursively, until no unique line pairs can be found; these line ranges | |||
|
40 | * are handled by the well-known Myers algorithm. | |||
|
41 | */ | |||
|
42 | ||||
|
43 | #define NON_UNIQUE ULONG_MAX | |||
|
44 | ||||
|
45 | /* | |||
|
46 | * This is a hash mapping from line hash to line numbers in the first and | |||
|
47 | * second file. | |||
|
48 | */ | |||
|
49 | struct hashmap { | |||
|
50 | int nr, alloc; | |||
|
51 | struct entry { | |||
|
52 | unsigned long hash; | |||
|
53 | /* | |||
|
54 | * 0 = unused entry, 1 = first line, 2 = second, etc. | |||
|
55 | * line2 is NON_UNIQUE if the line is not unique | |||
|
56 | * in either the first or the second file. | |||
|
57 | */ | |||
|
58 | unsigned long line1, line2; | |||
|
59 | /* | |||
|
60 | * "next" & "previous" are used for the longest common | |||
|
61 | * sequence; | |||
|
62 | * initially, "next" reflects only the order in file1. | |||
|
63 | */ | |||
|
64 | struct entry *next, *previous; | |||
|
65 | ||||
|
66 | /* | |||
|
67 | * If 1, this entry can serve as an anchor. See | |||
|
68 | * Documentation/diff-options.txt for more information. | |||
|
69 | */ | |||
|
70 | unsigned anchor : 1; | |||
|
71 | } *entries, *first, *last; | |||
|
72 | /* were common records found? */ | |||
|
73 | unsigned long has_matches; | |||
|
74 | mmfile_t *file1, *file2; | |||
|
75 | xdfenv_t *env; | |||
|
76 | xpparam_t const *xpp; | |||
|
77 | }; | |||
|
78 | ||||
|
79 | static int is_anchor(xpparam_t const *xpp, const char *line) | |||
|
80 | { | |||
|
81 | int i; | |||
|
82 | for (i = 0; i < xpp->anchors_nr; i++) { | |||
|
83 | if (!strncmp(line, xpp->anchors[i], strlen(xpp->anchors[i]))) | |||
|
84 | return 1; | |||
|
85 | } | |||
|
86 | return 0; | |||
|
87 | } | |||
|
88 | ||||
|
89 | /* The argument "pass" is 1 for the first file, 2 for the second. */ | |||
|
90 | static void insert_record(xpparam_t const *xpp, int line, struct hashmap *map, | |||
|
91 | int pass) | |||
|
92 | { | |||
|
93 | xrecord_t **records = pass == 1 ? | |||
|
94 | map->env->xdf1.recs : map->env->xdf2.recs; | |||
|
95 | xrecord_t *record = records[line - 1], *other; | |||
|
96 | /* | |||
|
97 | * After xdl_prepare_env() (or more precisely, due to | |||
|
98 | * xdl_classify_record()), the "ha" member of the records (AKA lines) | |||
|
99 | * is _not_ the hash anymore, but a linearized version of it. In | |||
|
100 | * other words, the "ha" member is guaranteed to start with 0 and | |||
|
101 | * the second record's ha can only be 0 or 1, etc. | |||
|
102 | * | |||
|
103 | * So we multiply ha by 2 in the hope that the hashing was | |||
|
104 | * "unique enough". | |||
|
105 | */ | |||
|
106 | int index = (int)((record->ha << 1) % map->alloc); | |||
|
107 | ||||
|
108 | while (map->entries[index].line1) { | |||
|
109 | other = map->env->xdf1.recs[map->entries[index].line1 - 1]; | |||
|
110 | if (map->entries[index].hash != record->ha || | |||
|
111 | !xdl_recmatch(record->ptr, record->size, | |||
|
112 | other->ptr, other->size, | |||
|
113 | map->xpp->flags)) { | |||
|
114 | if (++index >= map->alloc) | |||
|
115 | index = 0; | |||
|
116 | continue; | |||
|
117 | } | |||
|
118 | if (pass == 2) | |||
|
119 | map->has_matches = 1; | |||
|
120 | if (pass == 1 || map->entries[index].line2) | |||
|
121 | map->entries[index].line2 = NON_UNIQUE; | |||
|
122 | else | |||
|
123 | map->entries[index].line2 = line; | |||
|
124 | return; | |||
|
125 | } | |||
|
126 | if (pass == 2) | |||
|
127 | return; | |||
|
128 | map->entries[index].line1 = line; | |||
|
129 | map->entries[index].hash = record->ha; | |||
|
130 | map->entries[index].anchor = is_anchor(xpp, map->env->xdf1.recs[line - 1]->ptr); | |||
|
131 | if (!map->first) | |||
|
132 | map->first = map->entries + index; | |||
|
133 | if (map->last) { | |||
|
134 | map->last->next = map->entries + index; | |||
|
135 | map->entries[index].previous = map->last; | |||
|
136 | } | |||
|
137 | map->last = map->entries + index; | |||
|
138 | map->nr++; | |||
|
139 | } | |||
|
140 | ||||
|
141 | /* | |||
|
142 | * This function has to be called for each recursion into the inter-hunk | |||
|
143 | * parts, as previously non-unique lines can become unique when being | |||
|
144 | * restricted to a smaller part of the files. | |||
|
145 | * | |||
|
146 | * It is assumed that env has been prepared using xdl_prepare(). | |||
|
147 | */ | |||
|
148 | static int fill_hashmap(mmfile_t *file1, mmfile_t *file2, | |||
|
149 | xpparam_t const *xpp, xdfenv_t *env, | |||
|
150 | struct hashmap *result, | |||
|
151 | int line1, int count1, int line2, int count2) | |||
|
152 | { | |||
|
153 | result->file1 = file1; | |||
|
154 | result->file2 = file2; | |||
|
155 | result->xpp = xpp; | |||
|
156 | result->env = env; | |||
|
157 | ||||
|
158 | /* We know exactly how large we want the hash map */ | |||
|
159 | result->alloc = count1 * 2; | |||
|
160 | result->entries = (struct entry *) | |||
|
161 | xdl_malloc(result->alloc * sizeof(struct entry)); | |||
|
162 | if (!result->entries) | |||
|
163 | return -1; | |||
|
164 | memset(result->entries, 0, result->alloc * sizeof(struct entry)); | |||
|
165 | ||||
|
166 | /* First, fill with entries from the first file */ | |||
|
167 | while (count1--) | |||
|
168 | insert_record(xpp, line1++, result, 1); | |||
|
169 | ||||
|
170 | /* Then search for matches in the second file */ | |||
|
171 | while (count2--) | |||
|
172 | insert_record(xpp, line2++, result, 2); | |||
|
173 | ||||
|
174 | return 0; | |||
|
175 | } | |||
|
176 | ||||
|
177 | /* | |||
|
178 | * Find the longest sequence with a smaller last element (meaning a smaller | |||
|
179 | * line2, as we construct the sequence with entries ordered by line1). | |||
|
180 | */ | |||
|
181 | static int binary_search(struct entry **sequence, int longest, | |||
|
182 | struct entry *entry) | |||
|
183 | { | |||
|
184 | int left = -1, right = longest; | |||
|
185 | ||||
|
186 | while (left + 1 < right) { | |||
|
187 | int middle = left + (right - left) / 2; | |||
|
188 | /* by construction, no two entries can be equal */ | |||
|
189 | if (sequence[middle]->line2 > entry->line2) | |||
|
190 | right = middle; | |||
|
191 | else | |||
|
192 | left = middle; | |||
|
193 | } | |||
|
194 | /* return the index in "sequence", _not_ the sequence length */ | |||
|
195 | return left; | |||
|
196 | } | |||
|
197 | ||||
|
198 | /* | |||
|
199 | * The idea is to start with the list of common unique lines sorted by | |||
|
200 | * the order in file1. For each of these pairs, the longest (partial) | |||
|
201 | * sequence whose last element's line2 is smaller is determined. | |||
|
202 | * | |||
|
203 | * For efficiency, the sequences are kept in a list containing exactly one | |||
|
204 | * item per sequence length: the sequence with the smallest last | |||
|
205 | * element (in terms of line2). | |||
|
206 | */ | |||
|
207 | static struct entry *find_longest_common_sequence(struct hashmap *map) | |||
|
208 | { | |||
|
209 | struct entry **sequence = xdl_malloc(map->nr * sizeof(struct entry *)); | |||
|
210 | int longest = 0, i; | |||
|
211 | struct entry *entry; | |||
|
212 | ||||
|
213 | /* | |||
|
214 | * If not -1, this entry in sequence must never be overridden. | |||
|
215 | * Therefore, overriding entries before this has no effect, so | |||
|
216 | * do not do that either. | |||
|
217 | */ | |||
|
218 | int anchor_i = -1; | |||
|
219 | ||||
|
220 | for (entry = map->first; entry; entry = entry->next) { | |||
|
221 | if (!entry->line2 || entry->line2 == NON_UNIQUE) | |||
|
222 | continue; | |||
|
223 | i = binary_search(sequence, longest, entry); | |||
|
224 | entry->previous = i < 0 ? NULL : sequence[i]; | |||
|
225 | ++i; | |||
|
226 | if (i <= anchor_i) | |||
|
227 | continue; | |||
|
228 | sequence[i] = entry; | |||
|
229 | if (entry->anchor) { | |||
|
230 | anchor_i = i; | |||
|
231 | longest = anchor_i + 1; | |||
|
232 | } else if (i == longest) { | |||
|
233 | longest++; | |||
|
234 | } | |||
|
235 | } | |||
|
236 | ||||
|
237 | /* No common unique lines were found */ | |||
|
238 | if (!longest) { | |||
|
239 | xdl_free(sequence); | |||
|
240 | return NULL; | |||
|
241 | } | |||
|
242 | ||||
|
243 | /* Iterate starting at the last element, adjusting the "next" members */ | |||
|
244 | entry = sequence[longest - 1]; | |||
|
245 | entry->next = NULL; | |||
|
246 | while (entry->previous) { | |||
|
247 | entry->previous->next = entry; | |||
|
248 | entry = entry->previous; | |||
|
249 | } | |||
|
250 | xdl_free(sequence); | |||
|
251 | return entry; | |||
|
252 | } | |||
|
253 | ||||
|
254 | static int match(struct hashmap *map, int line1, int line2) | |||
|
255 | { | |||
|
256 | xrecord_t *record1 = map->env->xdf1.recs[line1 - 1]; | |||
|
257 | xrecord_t *record2 = map->env->xdf2.recs[line2 - 1]; | |||
|
258 | return xdl_recmatch(record1->ptr, record1->size, | |||
|
259 | record2->ptr, record2->size, map->xpp->flags); | |||
|
260 | } | |||
|
261 | ||||
|
262 | static int patience_diff(mmfile_t *file1, mmfile_t *file2, | |||
|
263 | xpparam_t const *xpp, xdfenv_t *env, | |||
|
264 | int line1, int count1, int line2, int count2); | |||
|
265 | ||||
|
266 | static int walk_common_sequence(struct hashmap *map, struct entry *first, | |||
|
267 | int line1, int count1, int line2, int count2) | |||
|
268 | { | |||
|
269 | int end1 = line1 + count1, end2 = line2 + count2; | |||
|
270 | int next1, next2; | |||
|
271 | ||||
|
272 | for (;;) { | |||
|
273 | /* Try to grow the line ranges of common lines */ | |||
|
274 | if (first) { | |||
|
275 | next1 = first->line1; | |||
|
276 | next2 = first->line2; | |||
|
277 | while (next1 > line1 && next2 > line2 && | |||
|
278 | match(map, next1 - 1, next2 - 1)) { | |||
|
279 | next1--; | |||
|
280 | next2--; | |||
|
281 | } | |||
|
282 | } else { | |||
|
283 | next1 = end1; | |||
|
284 | next2 = end2; | |||
|
285 | } | |||
|
286 | while (line1 < next1 && line2 < next2 && | |||
|
287 | match(map, line1, line2)) { | |||
|
288 | line1++; | |||
|
289 | line2++; | |||
|
290 | } | |||
|
291 | ||||
|
292 | /* Recurse */ | |||
|
293 | if (next1 > line1 || next2 > line2) { | |||
|
294 | struct hashmap submap; | |||
|
295 | ||||
|
296 | memset(&submap, 0, sizeof(submap)); | |||
|
297 | if (patience_diff(map->file1, map->file2, | |||
|
298 | map->xpp, map->env, | |||
|
299 | line1, next1 - line1, | |||
|
300 | line2, next2 - line2)) | |||
|
301 | return -1; | |||
|
302 | } | |||
|
303 | ||||
|
304 | if (!first) | |||
|
305 | return 0; | |||
|
306 | ||||
|
307 | while (first->next && | |||
|
308 | first->next->line1 == first->line1 + 1 && | |||
|
309 | first->next->line2 == first->line2 + 1) | |||
|
310 | first = first->next; | |||
|
311 | ||||
|
312 | line1 = first->line1 + 1; | |||
|
313 | line2 = first->line2 + 1; | |||
|
314 | ||||
|
315 | first = first->next; | |||
|
316 | } | |||
|
317 | } | |||
|
318 | ||||
|
319 | static int fall_back_to_classic_diff(struct hashmap *map, | |||
|
320 | int line1, int count1, int line2, int count2) | |||
|
321 | { | |||
|
322 | xpparam_t xpp; | |||
|
323 | xpp.flags = map->xpp->flags & ~XDF_DIFF_ALGORITHM_MASK; | |||
|
324 | ||||
|
325 | return xdl_fall_back_diff(map->env, &xpp, | |||
|
326 | line1, count1, line2, count2); | |||
|
327 | } | |||
|
328 | ||||
|
329 | /* | |||
|
330 | * Recursively find the longest common sequence of unique lines, | |||
|
331 | * and if none was found, ask xdl_do_diff() to do the job. | |||
|
332 | * | |||
|
333 | * This function assumes that env was prepared with xdl_prepare_env(). | |||
|
334 | */ | |||
|
335 | static int patience_diff(mmfile_t *file1, mmfile_t *file2, | |||
|
336 | xpparam_t const *xpp, xdfenv_t *env, | |||
|
337 | int line1, int count1, int line2, int count2) | |||
|
338 | { | |||
|
339 | struct hashmap map; | |||
|
340 | struct entry *first; | |||
|
341 | int result = 0; | |||
|
342 | ||||
|
343 | /* trivial case: one side is empty */ | |||
|
344 | if (!count1) { | |||
|
345 | while(count2--) | |||
|
346 | env->xdf2.rchg[line2++ - 1] = 1; | |||
|
347 | return 0; | |||
|
348 | } else if (!count2) { | |||
|
349 | while(count1--) | |||
|
350 | env->xdf1.rchg[line1++ - 1] = 1; | |||
|
351 | return 0; | |||
|
352 | } | |||
|
353 | ||||
|
354 | memset(&map, 0, sizeof(map)); | |||
|
355 | if (fill_hashmap(file1, file2, xpp, env, &map, | |||
|
356 | line1, count1, line2, count2)) | |||
|
357 | return -1; | |||
|
358 | ||||
|
359 | /* are there any matching lines at all? */ | |||
|
360 | if (!map.has_matches) { | |||
|
361 | while(count1--) | |||
|
362 | env->xdf1.rchg[line1++ - 1] = 1; | |||
|
363 | while(count2--) | |||
|
364 | env->xdf2.rchg[line2++ - 1] = 1; | |||
|
365 | xdl_free(map.entries); | |||
|
366 | return 0; | |||
|
367 | } | |||
|
368 | ||||
|
369 | first = find_longest_common_sequence(&map); | |||
|
370 | if (first) | |||
|
371 | result = walk_common_sequence(&map, first, | |||
|
372 | line1, count1, line2, count2); | |||
|
373 | else | |||
|
374 | result = fall_back_to_classic_diff(&map, | |||
|
375 | line1, count1, line2, count2); | |||
|
376 | ||||
|
377 | xdl_free(map.entries); | |||
|
378 | return result; | |||
|
379 | } | |||
|
380 | ||||
|
381 | int xdl_do_patience_diff(mmfile_t *file1, mmfile_t *file2, | |||
|
382 | xpparam_t const *xpp, xdfenv_t *env) | |||
|
383 | { | |||
|
384 | if (xdl_prepare_env(file1, file2, xpp, env) < 0) | |||
|
385 | return -1; | |||
|
386 | ||||
|
387 | /* environment is cleaned up in xdl_diff() */ | |||
|
388 | return patience_diff(file1, file2, xpp, env, | |||
|
389 | 1, env->xdf1.nrec, 1, env->xdf2.nrec); | |||
|
390 | } |
@@ -0,0 +1,483 b'' | |||||
|
1 | /* | |||
|
2 | * LibXDiff by Davide Libenzi ( File Differential Library ) | |||
|
3 | * Copyright (C) 2003 Davide Libenzi | |||
|
4 | * | |||
|
5 | * This library is free software; you can redistribute it and/or | |||
|
6 | * modify it under the terms of the GNU Lesser General Public | |||
|
7 | * License as published by the Free Software Foundation; either | |||
|
8 | * version 2.1 of the License, or (at your option) any later version. | |||
|
9 | * | |||
|
10 | * This library is distributed in the hope that it will be useful, | |||
|
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
|
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
|
13 | * Lesser General Public License for more details. | |||
|
14 | * | |||
|
15 | * You should have received a copy of the GNU Lesser General Public | |||
|
16 | * License along with this library; if not, see | |||
|
17 | * <http://www.gnu.org/licenses/>. | |||
|
18 | * | |||
|
19 | * Davide Libenzi <davidel@xmailserver.org> | |||
|
20 | * | |||
|
21 | */ | |||
|
22 | ||||
|
23 | #include "xinclude.h" | |||
|
24 | ||||
|
25 | ||||
|
26 | #define XDL_KPDIS_RUN 4 | |||
|
27 | #define XDL_MAX_EQLIMIT 1024 | |||
|
28 | #define XDL_SIMSCAN_WINDOW 100 | |||
|
29 | #define XDL_GUESS_NLINES1 256 | |||
|
30 | #define XDL_GUESS_NLINES2 20 | |||
|
31 | ||||
|
32 | ||||
|
33 | typedef struct s_xdlclass { | |||
|
34 | struct s_xdlclass *next; | |||
|
35 | unsigned long ha; | |||
|
36 | char const *line; | |||
|
37 | long size; | |||
|
38 | long idx; | |||
|
39 | long len1, len2; | |||
|
40 | } xdlclass_t; | |||
|
41 | ||||
|
42 | typedef struct s_xdlclassifier { | |||
|
43 | unsigned int hbits; | |||
|
44 | long hsize; | |||
|
45 | xdlclass_t **rchash; | |||
|
46 | chastore_t ncha; | |||
|
47 | xdlclass_t **rcrecs; | |||
|
48 | long alloc; | |||
|
49 | long count; | |||
|
50 | long flags; | |||
|
51 | } xdlclassifier_t; | |||
|
52 | ||||
|
53 | ||||
|
54 | ||||
|
55 | ||||
|
56 | static int xdl_init_classifier(xdlclassifier_t *cf, long size, long flags); | |||
|
57 | static void xdl_free_classifier(xdlclassifier_t *cf); | |||
|
58 | static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t **rhash, | |||
|
59 | unsigned int hbits, xrecord_t *rec); | |||
|
60 | static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_t const *xpp, | |||
|
61 | xdlclassifier_t *cf, xdfile_t *xdf); | |||
|
62 | static void xdl_free_ctx(xdfile_t *xdf); | |||
|
63 | static int xdl_clean_mmatch(char const *dis, long i, long s, long e); | |||
|
64 | static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2); | |||
|
65 | static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2); | |||
|
66 | static int xdl_optimize_ctxs(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2); | |||
|
67 | ||||
|
68 | ||||
|
69 | ||||
|
70 | ||||
|
71 | static int xdl_init_classifier(xdlclassifier_t *cf, long size, long flags) { | |||
|
72 | cf->flags = flags; | |||
|
73 | ||||
|
74 | cf->hbits = xdl_hashbits((unsigned int) size); | |||
|
75 | cf->hsize = 1 << cf->hbits; | |||
|
76 | ||||
|
77 | if (xdl_cha_init(&cf->ncha, sizeof(xdlclass_t), size / 4 + 1) < 0) { | |||
|
78 | ||||
|
79 | return -1; | |||
|
80 | } | |||
|
81 | if (!(cf->rchash = (xdlclass_t **) xdl_malloc(cf->hsize * sizeof(xdlclass_t *)))) { | |||
|
82 | ||||
|
83 | xdl_cha_free(&cf->ncha); | |||
|
84 | return -1; | |||
|
85 | } | |||
|
86 | memset(cf->rchash, 0, cf->hsize * sizeof(xdlclass_t *)); | |||
|
87 | ||||
|
88 | cf->alloc = size; | |||
|
89 | if (!(cf->rcrecs = (xdlclass_t **) xdl_malloc(cf->alloc * sizeof(xdlclass_t *)))) { | |||
|
90 | ||||
|
91 | xdl_free(cf->rchash); | |||
|
92 | xdl_cha_free(&cf->ncha); | |||
|
93 | return -1; | |||
|
94 | } | |||
|
95 | ||||
|
96 | cf->count = 0; | |||
|
97 | ||||
|
98 | return 0; | |||
|
99 | } | |||
|
100 | ||||
|
101 | ||||
|
102 | static void xdl_free_classifier(xdlclassifier_t *cf) { | |||
|
103 | ||||
|
104 | xdl_free(cf->rcrecs); | |||
|
105 | xdl_free(cf->rchash); | |||
|
106 | xdl_cha_free(&cf->ncha); | |||
|
107 | } | |||
|
108 | ||||
|
109 | ||||
|
110 | static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t **rhash, | |||
|
111 | unsigned int hbits, xrecord_t *rec) { | |||
|
112 | long hi; | |||
|
113 | char const *line; | |||
|
114 | xdlclass_t *rcrec; | |||
|
115 | xdlclass_t **rcrecs; | |||
|
116 | ||||
|
117 | line = rec->ptr; | |||
|
118 | hi = (long) XDL_HASHLONG(rec->ha, cf->hbits); | |||
|
119 | for (rcrec = cf->rchash[hi]; rcrec; rcrec = rcrec->next) | |||
|
120 | if (rcrec->ha == rec->ha && | |||
|
121 | xdl_recmatch(rcrec->line, rcrec->size, | |||
|
122 | rec->ptr, rec->size, cf->flags)) | |||
|
123 | break; | |||
|
124 | ||||
|
125 | if (!rcrec) { | |||
|
126 | if (!(rcrec = xdl_cha_alloc(&cf->ncha))) { | |||
|
127 | ||||
|
128 | return -1; | |||
|
129 | } | |||
|
130 | rcrec->idx = cf->count++; | |||
|
131 | if (cf->count > cf->alloc) { | |||
|
132 | cf->alloc *= 2; | |||
|
133 | if (!(rcrecs = (xdlclass_t **) xdl_realloc(cf->rcrecs, cf->alloc * sizeof(xdlclass_t *)))) { | |||
|
134 | ||||
|
135 | return -1; | |||
|
136 | } | |||
|
137 | cf->rcrecs = rcrecs; | |||
|
138 | } | |||
|
139 | cf->rcrecs[rcrec->idx] = rcrec; | |||
|
140 | rcrec->line = line; | |||
|
141 | rcrec->size = rec->size; | |||
|
142 | rcrec->ha = rec->ha; | |||
|
143 | rcrec->len1 = rcrec->len2 = 0; | |||
|
144 | rcrec->next = cf->rchash[hi]; | |||
|
145 | cf->rchash[hi] = rcrec; | |||
|
146 | } | |||
|
147 | ||||
|
148 | (pass == 1) ? rcrec->len1++ : rcrec->len2++; | |||
|
149 | ||||
|
150 | rec->ha = (unsigned long) rcrec->idx; | |||
|
151 | ||||
|
152 | hi = (long) XDL_HASHLONG(rec->ha, hbits); | |||
|
153 | rec->next = rhash[hi]; | |||
|
154 | rhash[hi] = rec; | |||
|
155 | ||||
|
156 | return 0; | |||
|
157 | } | |||
|
158 | ||||
|
159 | ||||
|
160 | static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_t const *xpp, | |||
|
161 | xdlclassifier_t *cf, xdfile_t *xdf) { | |||
|
162 | unsigned int hbits; | |||
|
163 | long nrec, hsize, bsize; | |||
|
164 | unsigned long hav; | |||
|
165 | char const *blk, *cur, *top, *prev; | |||
|
166 | xrecord_t *crec; | |||
|
167 | xrecord_t **recs, **rrecs; | |||
|
168 | xrecord_t **rhash; | |||
|
169 | unsigned long *ha; | |||
|
170 | char *rchg; | |||
|
171 | long *rindex; | |||
|
172 | ||||
|
173 | ha = NULL; | |||
|
174 | rindex = NULL; | |||
|
175 | rchg = NULL; | |||
|
176 | rhash = NULL; | |||
|
177 | recs = NULL; | |||
|
178 | ||||
|
179 | if (xdl_cha_init(&xdf->rcha, sizeof(xrecord_t), narec / 4 + 1) < 0) | |||
|
180 | goto abort; | |||
|
181 | if (!(recs = (xrecord_t **) xdl_malloc(narec * sizeof(xrecord_t *)))) | |||
|
182 | goto abort; | |||
|
183 | ||||
|
184 | if (XDF_DIFF_ALG(xpp->flags) == XDF_HISTOGRAM_DIFF) | |||
|
185 | hbits = hsize = 0; | |||
|
186 | else { | |||
|
187 | hbits = xdl_hashbits((unsigned int) narec); | |||
|
188 | hsize = 1 << hbits; | |||
|
189 | if (!(rhash = (xrecord_t **) xdl_malloc(hsize * sizeof(xrecord_t *)))) | |||
|
190 | goto abort; | |||
|
191 | memset(rhash, 0, hsize * sizeof(xrecord_t *)); | |||
|
192 | } | |||
|
193 | ||||
|
194 | nrec = 0; | |||
|
195 | if ((cur = blk = xdl_mmfile_first(mf, &bsize)) != NULL) { | |||
|
196 | for (top = blk + bsize; cur < top; ) { | |||
|
197 | prev = cur; | |||
|
198 | hav = xdl_hash_record(&cur, top, xpp->flags); | |||
|
199 | if (nrec >= narec) { | |||
|
200 | narec *= 2; | |||
|
201 | if (!(rrecs = (xrecord_t **) xdl_realloc(recs, narec * sizeof(xrecord_t *)))) | |||
|
202 | goto abort; | |||
|
203 | recs = rrecs; | |||
|
204 | } | |||
|
205 | if (!(crec = xdl_cha_alloc(&xdf->rcha))) | |||
|
206 | goto abort; | |||
|
207 | crec->ptr = prev; | |||
|
208 | crec->size = (long) (cur - prev); | |||
|
209 | crec->ha = hav; | |||
|
210 | recs[nrec++] = crec; | |||
|
211 | ||||
|
212 | if ((XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF) && | |||
|
213 | xdl_classify_record(pass, cf, rhash, hbits, crec) < 0) | |||
|
214 | goto abort; | |||
|
215 | } | |||
|
216 | } | |||
|
217 | ||||
|
218 | if (!(rchg = (char *) xdl_malloc((nrec + 2) * sizeof(char)))) | |||
|
219 | goto abort; | |||
|
220 | memset(rchg, 0, (nrec + 2) * sizeof(char)); | |||
|
221 | ||||
|
222 | if (!(rindex = (long *) xdl_malloc((nrec + 1) * sizeof(long)))) | |||
|
223 | goto abort; | |||
|
224 | if (!(ha = (unsigned long *) xdl_malloc((nrec + 1) * sizeof(unsigned long)))) | |||
|
225 | goto abort; | |||
|
226 | ||||
|
227 | xdf->nrec = nrec; | |||
|
228 | xdf->recs = recs; | |||
|
229 | xdf->hbits = hbits; | |||
|
230 | xdf->rhash = rhash; | |||
|
231 | xdf->rchg = rchg + 1; | |||
|
232 | xdf->rindex = rindex; | |||
|
233 | xdf->nreff = 0; | |||
|
234 | xdf->ha = ha; | |||
|
235 | xdf->dstart = 0; | |||
|
236 | xdf->dend = nrec - 1; | |||
|
237 | ||||
|
238 | return 0; | |||
|
239 | ||||
|
240 | abort: | |||
|
241 | xdl_free(ha); | |||
|
242 | xdl_free(rindex); | |||
|
243 | xdl_free(rchg); | |||
|
244 | xdl_free(rhash); | |||
|
245 | xdl_free(recs); | |||
|
246 | xdl_cha_free(&xdf->rcha); | |||
|
247 | return -1; | |||
|
248 | } | |||
|
249 | ||||
|
250 | ||||
|
251 | static void xdl_free_ctx(xdfile_t *xdf) { | |||
|
252 | ||||
|
253 | xdl_free(xdf->rhash); | |||
|
254 | xdl_free(xdf->rindex); | |||
|
255 | xdl_free(xdf->rchg - 1); | |||
|
256 | xdl_free(xdf->ha); | |||
|
257 | xdl_free(xdf->recs); | |||
|
258 | xdl_cha_free(&xdf->rcha); | |||
|
259 | } | |||
|
260 | ||||
|
261 | ||||
|
262 | int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, | |||
|
263 | xdfenv_t *xe) { | |||
|
264 | long enl1, enl2, sample; | |||
|
265 | xdlclassifier_t cf; | |||
|
266 | ||||
|
267 | memset(&cf, 0, sizeof(cf)); | |||
|
268 | ||||
|
269 | /* | |||
|
270 | * For histogram diff, we can afford a smaller sample size and | |||
|
271 | * thus a poorer estimate of the number of lines, as the hash | |||
|
272 | * table (rhash) won't be filled up/grown. The number of lines | |||
|
273 | * (nrecs) will be updated correctly anyway by | |||
|
274 | * xdl_prepare_ctx(). | |||
|
275 | */ | |||
|
276 | sample = (XDF_DIFF_ALG(xpp->flags) == XDF_HISTOGRAM_DIFF | |||
|
277 | ? XDL_GUESS_NLINES2 : XDL_GUESS_NLINES1); | |||
|
278 | ||||
|
279 | enl1 = xdl_guess_lines(mf1, sample) + 1; | |||
|
280 | enl2 = xdl_guess_lines(mf2, sample) + 1; | |||
|
281 | ||||
|
282 | if (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF && | |||
|
283 | xdl_init_classifier(&cf, enl1 + enl2 + 1, xpp->flags) < 0) | |||
|
284 | return -1; | |||
|
285 | ||||
|
286 | if (xdl_prepare_ctx(1, mf1, enl1, xpp, &cf, &xe->xdf1) < 0) { | |||
|
287 | ||||
|
288 | xdl_free_classifier(&cf); | |||
|
289 | return -1; | |||
|
290 | } | |||
|
291 | if (xdl_prepare_ctx(2, mf2, enl2, xpp, &cf, &xe->xdf2) < 0) { | |||
|
292 | ||||
|
293 | xdl_free_ctx(&xe->xdf1); | |||
|
294 | xdl_free_classifier(&cf); | |||
|
295 | return -1; | |||
|
296 | } | |||
|
297 | ||||
|
298 | if ((XDF_DIFF_ALG(xpp->flags) != XDF_PATIENCE_DIFF) && | |||
|
299 | (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF) && | |||
|
300 | xdl_optimize_ctxs(&cf, &xe->xdf1, &xe->xdf2) < 0) { | |||
|
301 | ||||
|
302 | xdl_free_ctx(&xe->xdf2); | |||
|
303 | xdl_free_ctx(&xe->xdf1); | |||
|
304 | xdl_free_classifier(&cf); | |||
|
305 | return -1; | |||
|
306 | } | |||
|
307 | ||||
|
308 | if (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF) | |||
|
309 | xdl_free_classifier(&cf); | |||
|
310 | ||||
|
311 | return 0; | |||
|
312 | } | |||
|
313 | ||||
|
314 | ||||
|
315 | void xdl_free_env(xdfenv_t *xe) { | |||
|
316 | ||||
|
317 | xdl_free_ctx(&xe->xdf2); | |||
|
318 | xdl_free_ctx(&xe->xdf1); | |||
|
319 | } | |||
|
320 | ||||
|
321 | ||||
|
322 | static int xdl_clean_mmatch(char const *dis, long i, long s, long e) { | |||
|
323 | long r, rdis0, rpdis0, rdis1, rpdis1; | |||
|
324 | ||||
|
325 | /* | |||
|
326 | * Limits the window the is examined during the similar-lines | |||
|
327 | * scan. The loops below stops when dis[i - r] == 1 (line that | |||
|
328 | * has no match), but there are corner cases where the loop | |||
|
329 | * proceed all the way to the extremities by causing huge | |||
|
330 | * performance penalties in case of big files. | |||
|
331 | */ | |||
|
332 | if (i - s > XDL_SIMSCAN_WINDOW) | |||
|
333 | s = i - XDL_SIMSCAN_WINDOW; | |||
|
334 | if (e - i > XDL_SIMSCAN_WINDOW) | |||
|
335 | e = i + XDL_SIMSCAN_WINDOW; | |||
|
336 | ||||
|
337 | /* | |||
|
338 | * Scans the lines before 'i' to find a run of lines that either | |||
|
339 | * have no match (dis[j] == 0) or have multiple matches (dis[j] > 1). | |||
|
340 | * Note that we always call this function with dis[i] > 1, so the | |||
|
341 | * current line (i) is already a multimatch line. | |||
|
342 | */ | |||
|
343 | for (r = 1, rdis0 = 0, rpdis0 = 1; (i - r) >= s; r++) { | |||
|
344 | if (!dis[i - r]) | |||
|
345 | rdis0++; | |||
|
346 | else if (dis[i - r] == 2) | |||
|
347 | rpdis0++; | |||
|
348 | else | |||
|
349 | break; | |||
|
350 | } | |||
|
351 | /* | |||
|
352 | * If the run before the line 'i' found only multimatch lines, we | |||
|
353 | * return 0 and hence we don't make the current line (i) discarded. | |||
|
354 | * We want to discard multimatch lines only when they appear in the | |||
|
355 | * middle of runs with nomatch lines (dis[j] == 0). | |||
|
356 | */ | |||
|
357 | if (rdis0 == 0) | |||
|
358 | return 0; | |||
|
359 | for (r = 1, rdis1 = 0, rpdis1 = 1; (i + r) <= e; r++) { | |||
|
360 | if (!dis[i + r]) | |||
|
361 | rdis1++; | |||
|
362 | else if (dis[i + r] == 2) | |||
|
363 | rpdis1++; | |||
|
364 | else | |||
|
365 | break; | |||
|
366 | } | |||
|
367 | /* | |||
|
368 | * If the run after the line 'i' found only multimatch lines, we | |||
|
369 | * return 0 and hence we don't make the current line (i) discarded. | |||
|
370 | */ | |||
|
371 | if (rdis1 == 0) | |||
|
372 | return 0; | |||
|
373 | rdis1 += rdis0; | |||
|
374 | rpdis1 += rpdis0; | |||
|
375 | ||||
|
376 | return rpdis1 * XDL_KPDIS_RUN < (rpdis1 + rdis1); | |||
|
377 | } | |||
|
378 | ||||
|
379 | ||||
|
380 | /* | |||
|
381 | * Try to reduce the problem complexity, discard records that have no | |||
|
382 | * matches on the other file. Also, lines that have multiple matches | |||
|
383 | * might be potentially discarded if they happear in a run of discardable. | |||
|
384 | */ | |||
|
385 | static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2) { | |||
|
386 | long i, nm, nreff, mlim; | |||
|
387 | xrecord_t **recs; | |||
|
388 | xdlclass_t *rcrec; | |||
|
389 | char *dis, *dis1, *dis2; | |||
|
390 | ||||
|
391 | if (!(dis = (char *) xdl_malloc(xdf1->nrec + xdf2->nrec + 2))) { | |||
|
392 | ||||
|
393 | return -1; | |||
|
394 | } | |||
|
395 | memset(dis, 0, xdf1->nrec + xdf2->nrec + 2); | |||
|
396 | dis1 = dis; | |||
|
397 | dis2 = dis1 + xdf1->nrec + 1; | |||
|
398 | ||||
|
399 | if ((mlim = xdl_bogosqrt(xdf1->nrec)) > XDL_MAX_EQLIMIT) | |||
|
400 | mlim = XDL_MAX_EQLIMIT; | |||
|
401 | for (i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; i <= xdf1->dend; i++, recs++) { | |||
|
402 | rcrec = cf->rcrecs[(*recs)->ha]; | |||
|
403 | nm = rcrec ? rcrec->len2 : 0; | |||
|
404 | dis1[i] = (nm == 0) ? 0: (nm >= mlim) ? 2: 1; | |||
|
405 | } | |||
|
406 | ||||
|
407 | if ((mlim = xdl_bogosqrt(xdf2->nrec)) > XDL_MAX_EQLIMIT) | |||
|
408 | mlim = XDL_MAX_EQLIMIT; | |||
|
409 | for (i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; i <= xdf2->dend; i++, recs++) { | |||
|
410 | rcrec = cf->rcrecs[(*recs)->ha]; | |||
|
411 | nm = rcrec ? rcrec->len1 : 0; | |||
|
412 | dis2[i] = (nm == 0) ? 0: (nm >= mlim) ? 2: 1; | |||
|
413 | } | |||
|
414 | ||||
|
415 | for (nreff = 0, i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; | |||
|
416 | i <= xdf1->dend; i++, recs++) { | |||
|
417 | if (dis1[i] == 1 || | |||
|
418 | (dis1[i] == 2 && !xdl_clean_mmatch(dis1, i, xdf1->dstart, xdf1->dend))) { | |||
|
419 | xdf1->rindex[nreff] = i; | |||
|
420 | xdf1->ha[nreff] = (*recs)->ha; | |||
|
421 | nreff++; | |||
|
422 | } else | |||
|
423 | xdf1->rchg[i] = 1; | |||
|
424 | } | |||
|
425 | xdf1->nreff = nreff; | |||
|
426 | ||||
|
427 | for (nreff = 0, i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; | |||
|
428 | i <= xdf2->dend; i++, recs++) { | |||
|
429 | if (dis2[i] == 1 || | |||
|
430 | (dis2[i] == 2 && !xdl_clean_mmatch(dis2, i, xdf2->dstart, xdf2->dend))) { | |||
|
431 | xdf2->rindex[nreff] = i; | |||
|
432 | xdf2->ha[nreff] = (*recs)->ha; | |||
|
433 | nreff++; | |||
|
434 | } else | |||
|
435 | xdf2->rchg[i] = 1; | |||
|
436 | } | |||
|
437 | xdf2->nreff = nreff; | |||
|
438 | ||||
|
439 | xdl_free(dis); | |||
|
440 | ||||
|
441 | return 0; | |||
|
442 | } | |||
|
443 | ||||
|
444 | ||||
|
445 | /* | |||
|
446 | * Early trim initial and terminal matching records. | |||
|
447 | */ | |||
|
448 | static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2) { | |||
|
449 | long i, lim; | |||
|
450 | xrecord_t **recs1, **recs2; | |||
|
451 | ||||
|
452 | recs1 = xdf1->recs; | |||
|
453 | recs2 = xdf2->recs; | |||
|
454 | for (i = 0, lim = XDL_MIN(xdf1->nrec, xdf2->nrec); i < lim; | |||
|
455 | i++, recs1++, recs2++) | |||
|
456 | if ((*recs1)->ha != (*recs2)->ha) | |||
|
457 | break; | |||
|
458 | ||||
|
459 | xdf1->dstart = xdf2->dstart = i; | |||
|
460 | ||||
|
461 | recs1 = xdf1->recs + xdf1->nrec - 1; | |||
|
462 | recs2 = xdf2->recs + xdf2->nrec - 1; | |||
|
463 | for (lim -= i, i = 0; i < lim; i++, recs1--, recs2--) | |||
|
464 | if ((*recs1)->ha != (*recs2)->ha) | |||
|
465 | break; | |||
|
466 | ||||
|
467 | xdf1->dend = xdf1->nrec - i - 1; | |||
|
468 | xdf2->dend = xdf2->nrec - i - 1; | |||
|
469 | ||||
|
470 | return 0; | |||
|
471 | } | |||
|
472 | ||||
|
473 | ||||
|
474 | static int xdl_optimize_ctxs(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2) { | |||
|
475 | ||||
|
476 | if (xdl_trim_ends(xdf1, xdf2) < 0 || | |||
|
477 | xdl_cleanup_records(cf, xdf1, xdf2) < 0) { | |||
|
478 | ||||
|
479 | return -1; | |||
|
480 | } | |||
|
481 | ||||
|
482 | return 0; | |||
|
483 | } |
@@ -0,0 +1,34 b'' | |||||
|
1 | /* | |||
|
2 | * LibXDiff by Davide Libenzi ( File Differential Library ) | |||
|
3 | * Copyright (C) 2003 Davide Libenzi | |||
|
4 | * | |||
|
5 | * This library is free software; you can redistribute it and/or | |||
|
6 | * modify it under the terms of the GNU Lesser General Public | |||
|
7 | * License as published by the Free Software Foundation; either | |||
|
8 | * version 2.1 of the License, or (at your option) any later version. | |||
|
9 | * | |||
|
10 | * This library is distributed in the hope that it will be useful, | |||
|
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
|
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
|
13 | * Lesser General Public License for more details. | |||
|
14 | * | |||
|
15 | * You should have received a copy of the GNU Lesser General Public | |||
|
16 | * License along with this library; if not, see | |||
|
17 | * <http://www.gnu.org/licenses/>. | |||
|
18 | * | |||
|
19 | * Davide Libenzi <davidel@xmailserver.org> | |||
|
20 | * | |||
|
21 | */ | |||
|
22 | ||||
|
23 | #if !defined(XPREPARE_H) | |||
|
24 | #define XPREPARE_H | |||
|
25 | ||||
|
26 | ||||
|
27 | ||||
|
28 | int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, | |||
|
29 | xdfenv_t *xe); | |||
|
30 | void xdl_free_env(xdfenv_t *xe); | |||
|
31 | ||||
|
32 | ||||
|
33 | ||||
|
34 | #endif /* #if !defined(XPREPARE_H) */ |
@@ -0,0 +1,67 b'' | |||||
|
1 | /* | |||
|
2 | * LibXDiff by Davide Libenzi ( File Differential Library ) | |||
|
3 | * Copyright (C) 2003 Davide Libenzi | |||
|
4 | * | |||
|
5 | * This library is free software; you can redistribute it and/or | |||
|
6 | * modify it under the terms of the GNU Lesser General Public | |||
|
7 | * License as published by the Free Software Foundation; either | |||
|
8 | * version 2.1 of the License, or (at your option) any later version. | |||
|
9 | * | |||
|
10 | * This library is distributed in the hope that it will be useful, | |||
|
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
|
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
|
13 | * Lesser General Public License for more details. | |||
|
14 | * | |||
|
15 | * You should have received a copy of the GNU Lesser General Public | |||
|
16 | * License along with this library; if not, see | |||
|
17 | * <http://www.gnu.org/licenses/>. | |||
|
18 | * | |||
|
19 | * Davide Libenzi <davidel@xmailserver.org> | |||
|
20 | * | |||
|
21 | */ | |||
|
22 | ||||
|
23 | #if !defined(XTYPES_H) | |||
|
24 | #define XTYPES_H | |||
|
25 | ||||
|
26 | ||||
|
27 | ||||
|
28 | typedef struct s_chanode { | |||
|
29 | struct s_chanode *next; | |||
|
30 | long icurr; | |||
|
31 | } chanode_t; | |||
|
32 | ||||
|
33 | typedef struct s_chastore { | |||
|
34 | chanode_t *head, *tail; | |||
|
35 | long isize, nsize; | |||
|
36 | chanode_t *ancur; | |||
|
37 | chanode_t *sncur; | |||
|
38 | long scurr; | |||
|
39 | } chastore_t; | |||
|
40 | ||||
|
41 | typedef struct s_xrecord { | |||
|
42 | struct s_xrecord *next; | |||
|
43 | char const *ptr; | |||
|
44 | long size; | |||
|
45 | unsigned long ha; | |||
|
46 | } xrecord_t; | |||
|
47 | ||||
|
48 | typedef struct s_xdfile { | |||
|
49 | chastore_t rcha; | |||
|
50 | long nrec; | |||
|
51 | unsigned int hbits; | |||
|
52 | xrecord_t **rhash; | |||
|
53 | long dstart, dend; | |||
|
54 | xrecord_t **recs; | |||
|
55 | char *rchg; | |||
|
56 | long *rindex; | |||
|
57 | long nreff; | |||
|
58 | unsigned long *ha; | |||
|
59 | } xdfile_t; | |||
|
60 | ||||
|
61 | typedef struct s_xdfenv { | |||
|
62 | xdfile_t xdf1, xdf2; | |||
|
63 | } xdfenv_t; | |||
|
64 | ||||
|
65 | ||||
|
66 | ||||
|
67 | #endif /* #if !defined(XTYPES_H) */ |
@@ -0,0 +1,425 b'' | |||||
|
1 | /* | |||
|
2 | * LibXDiff by Davide Libenzi ( File Differential Library ) | |||
|
3 | * Copyright (C) 2003 Davide Libenzi | |||
|
4 | * | |||
|
5 | * This library is free software; you can redistribute it and/or | |||
|
6 | * modify it under the terms of the GNU Lesser General Public | |||
|
7 | * License as published by the Free Software Foundation; either | |||
|
8 | * version 2.1 of the License, or (at your option) any later version. | |||
|
9 | * | |||
|
10 | * This library is distributed in the hope that it will be useful, | |||
|
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
|
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
|
13 | * Lesser General Public License for more details. | |||
|
14 | * | |||
|
15 | * You should have received a copy of the GNU Lesser General Public | |||
|
16 | * License along with this library; if not, see | |||
|
17 | * <http://www.gnu.org/licenses/>. | |||
|
18 | * | |||
|
19 | * Davide Libenzi <davidel@xmailserver.org> | |||
|
20 | * | |||
|
21 | */ | |||
|
22 | ||||
|
23 | #include <limits.h> | |||
|
24 | #include <assert.h> | |||
|
25 | #include "xinclude.h" | |||
|
26 | ||||
|
27 | ||||
|
28 | ||||
|
29 | ||||
|
30 | long xdl_bogosqrt(long n) { | |||
|
31 | long i; | |||
|
32 | ||||
|
33 | /* | |||
|
34 | * Classical integer square root approximation using shifts. | |||
|
35 | */ | |||
|
36 | for (i = 1; n > 0; n >>= 2) | |||
|
37 | i <<= 1; | |||
|
38 | ||||
|
39 | return i; | |||
|
40 | } | |||
|
41 | ||||
|
42 | ||||
|
43 | int xdl_emit_diffrec(char const *rec, long size, char const *pre, long psize, | |||
|
44 | xdemitcb_t *ecb) { | |||
|
45 | int i = 2; | |||
|
46 | mmbuffer_t mb[3]; | |||
|
47 | ||||
|
48 | mb[0].ptr = (char *) pre; | |||
|
49 | mb[0].size = psize; | |||
|
50 | mb[1].ptr = (char *) rec; | |||
|
51 | mb[1].size = size; | |||
|
52 | if (size > 0 && rec[size - 1] != '\n') { | |||
|
53 | mb[2].ptr = (char *) "\n\\ No newline at end of file\n"; | |||
|
54 | mb[2].size = strlen(mb[2].ptr); | |||
|
55 | i++; | |||
|
56 | } | |||
|
57 | if (ecb->outf(ecb->priv, mb, i) < 0) { | |||
|
58 | ||||
|
59 | return -1; | |||
|
60 | } | |||
|
61 | ||||
|
62 | return 0; | |||
|
63 | } | |||
|
64 | ||||
|
65 | void *xdl_mmfile_first(mmfile_t *mmf, long *size) | |||
|
66 | { | |||
|
67 | *size = mmf->size; | |||
|
68 | return mmf->ptr; | |||
|
69 | } | |||
|
70 | ||||
|
71 | ||||
|
72 | long xdl_mmfile_size(mmfile_t *mmf) | |||
|
73 | { | |||
|
74 | return mmf->size; | |||
|
75 | } | |||
|
76 | ||||
|
77 | ||||
|
78 | int xdl_cha_init(chastore_t *cha, long isize, long icount) { | |||
|
79 | ||||
|
80 | cha->head = cha->tail = NULL; | |||
|
81 | cha->isize = isize; | |||
|
82 | cha->nsize = icount * isize; | |||
|
83 | cha->ancur = cha->sncur = NULL; | |||
|
84 | cha->scurr = 0; | |||
|
85 | ||||
|
86 | return 0; | |||
|
87 | } | |||
|
88 | ||||
|
89 | ||||
|
90 | void xdl_cha_free(chastore_t *cha) { | |||
|
91 | chanode_t *cur, *tmp; | |||
|
92 | ||||
|
93 | for (cur = cha->head; (tmp = cur) != NULL;) { | |||
|
94 | cur = cur->next; | |||
|
95 | xdl_free(tmp); | |||
|
96 | } | |||
|
97 | } | |||
|
98 | ||||
|
99 | ||||
|
100 | void *xdl_cha_alloc(chastore_t *cha) { | |||
|
101 | chanode_t *ancur; | |||
|
102 | void *data; | |||
|
103 | ||||
|
104 | if (!(ancur = cha->ancur) || ancur->icurr == cha->nsize) { | |||
|
105 | if (!(ancur = (chanode_t *) xdl_malloc(sizeof(chanode_t) + cha->nsize))) { | |||
|
106 | ||||
|
107 | return NULL; | |||
|
108 | } | |||
|
109 | ancur->icurr = 0; | |||
|
110 | ancur->next = NULL; | |||
|
111 | if (cha->tail) | |||
|
112 | cha->tail->next = ancur; | |||
|
113 | if (!cha->head) | |||
|
114 | cha->head = ancur; | |||
|
115 | cha->tail = ancur; | |||
|
116 | cha->ancur = ancur; | |||
|
117 | } | |||
|
118 | ||||
|
119 | data = (char *) ancur + sizeof(chanode_t) + ancur->icurr; | |||
|
120 | ancur->icurr += cha->isize; | |||
|
121 | ||||
|
122 | return data; | |||
|
123 | } | |||
|
124 | ||||
|
125 | long xdl_guess_lines(mmfile_t *mf, long sample) { | |||
|
126 | long nl = 0, size, tsize = 0; | |||
|
127 | char const *data, *cur, *top; | |||
|
128 | ||||
|
129 | if ((cur = data = xdl_mmfile_first(mf, &size)) != NULL) { | |||
|
130 | for (top = data + size; nl < sample && cur < top; ) { | |||
|
131 | nl++; | |||
|
132 | if (!(cur = memchr(cur, '\n', top - cur))) | |||
|
133 | cur = top; | |||
|
134 | else | |||
|
135 | cur++; | |||
|
136 | } | |||
|
137 | tsize += (long) (cur - data); | |||
|
138 | } | |||
|
139 | ||||
|
140 | if (nl && tsize) | |||
|
141 | nl = xdl_mmfile_size(mf) / (tsize / nl); | |||
|
142 | ||||
|
143 | return nl + 1; | |||
|
144 | } | |||
|
145 | ||||
|
146 | int xdl_blankline(const char *line, long size, long flags) | |||
|
147 | { | |||
|
148 | long i; | |||
|
149 | ||||
|
150 | if (!(flags & XDF_WHITESPACE_FLAGS)) | |||
|
151 | return (size <= 1); | |||
|
152 | ||||
|
153 | for (i = 0; i < size && XDL_ISSPACE(line[i]); i++) | |||
|
154 | ; | |||
|
155 | ||||
|
156 | return (i == size); | |||
|
157 | } | |||
|
158 | ||||
|
159 | /* | |||
|
160 | * Have we eaten everything on the line, except for an optional | |||
|
161 | * CR at the very end? | |||
|
162 | */ | |||
|
163 | static int ends_with_optional_cr(const char *l, long s, long i) | |||
|
164 | { | |||
|
165 | int complete = s && l[s-1] == '\n'; | |||
|
166 | ||||
|
167 | if (complete) | |||
|
168 | s--; | |||
|
169 | if (s == i) | |||
|
170 | return 1; | |||
|
171 | /* do not ignore CR at the end of an incomplete line */ | |||
|
172 | if (complete && s == i + 1 && l[i] == '\r') | |||
|
173 | return 1; | |||
|
174 | return 0; | |||
|
175 | } | |||
|
176 | ||||
|
177 | int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags) | |||
|
178 | { | |||
|
179 | int i1, i2; | |||
|
180 | ||||
|
181 | if (s1 == s2 && !memcmp(l1, l2, s1)) | |||
|
182 | return 1; | |||
|
183 | if (!(flags & XDF_WHITESPACE_FLAGS)) | |||
|
184 | return 0; | |||
|
185 | ||||
|
186 | i1 = 0; | |||
|
187 | i2 = 0; | |||
|
188 | ||||
|
189 | /* | |||
|
190 | * -w matches everything that matches with -b, and -b in turn | |||
|
191 | * matches everything that matches with --ignore-space-at-eol, | |||
|
192 | * which in turn matches everything that matches with --ignore-cr-at-eol. | |||
|
193 | * | |||
|
194 | * Each flavor of ignoring needs different logic to skip whitespaces | |||
|
195 | * while we have both sides to compare. | |||
|
196 | */ | |||
|
197 | if (flags & XDF_IGNORE_WHITESPACE) { | |||
|
198 | goto skip_ws; | |||
|
199 | while (i1 < s1 && i2 < s2) { | |||
|
200 | if (l1[i1++] != l2[i2++]) | |||
|
201 | return 0; | |||
|
202 | skip_ws: | |||
|
203 | while (i1 < s1 && XDL_ISSPACE(l1[i1])) | |||
|
204 | i1++; | |||
|
205 | while (i2 < s2 && XDL_ISSPACE(l2[i2])) | |||
|
206 | i2++; | |||
|
207 | } | |||
|
208 | } else if (flags & XDF_IGNORE_WHITESPACE_CHANGE) { | |||
|
209 | while (i1 < s1 && i2 < s2) { | |||
|
210 | if (XDL_ISSPACE(l1[i1]) && XDL_ISSPACE(l2[i2])) { | |||
|
211 | /* Skip matching spaces and try again */ | |||
|
212 | while (i1 < s1 && XDL_ISSPACE(l1[i1])) | |||
|
213 | i1++; | |||
|
214 | while (i2 < s2 && XDL_ISSPACE(l2[i2])) | |||
|
215 | i2++; | |||
|
216 | continue; | |||
|
217 | } | |||
|
218 | if (l1[i1++] != l2[i2++]) | |||
|
219 | return 0; | |||
|
220 | } | |||
|
221 | } else if (flags & XDF_IGNORE_WHITESPACE_AT_EOL) { | |||
|
222 | while (i1 < s1 && i2 < s2 && l1[i1] == l2[i2]) { | |||
|
223 | i1++; | |||
|
224 | i2++; | |||
|
225 | } | |||
|
226 | } else if (flags & XDF_IGNORE_CR_AT_EOL) { | |||
|
227 | /* Find the first difference and see how the line ends */ | |||
|
228 | while (i1 < s1 && i2 < s2 && l1[i1] == l2[i2]) { | |||
|
229 | i1++; | |||
|
230 | i2++; | |||
|
231 | } | |||
|
232 | return (ends_with_optional_cr(l1, s1, i1) && | |||
|
233 | ends_with_optional_cr(l2, s2, i2)); | |||
|
234 | } | |||
|
235 | ||||
|
236 | /* | |||
|
237 | * After running out of one side, the remaining side must have | |||
|
238 | * nothing but whitespace for the lines to match. Note that | |||
|
239 | * ignore-whitespace-at-eol case may break out of the loop | |||
|
240 | * while there still are characters remaining on both lines. | |||
|
241 | */ | |||
|
242 | if (i1 < s1) { | |||
|
243 | while (i1 < s1 && XDL_ISSPACE(l1[i1])) | |||
|
244 | i1++; | |||
|
245 | if (s1 != i1) | |||
|
246 | return 0; | |||
|
247 | } | |||
|
248 | if (i2 < s2) { | |||
|
249 | while (i2 < s2 && XDL_ISSPACE(l2[i2])) | |||
|
250 | i2++; | |||
|
251 | return (s2 == i2); | |||
|
252 | } | |||
|
253 | return 1; | |||
|
254 | } | |||
|
255 | ||||
|
256 | static unsigned long xdl_hash_record_with_whitespace(char const **data, | |||
|
257 | char const *top, long flags) { | |||
|
258 | unsigned long ha = 5381; | |||
|
259 | char const *ptr = *data; | |||
|
260 | int cr_at_eol_only = (flags & XDF_WHITESPACE_FLAGS) == XDF_IGNORE_CR_AT_EOL; | |||
|
261 | ||||
|
262 | for (; ptr < top && *ptr != '\n'; ptr++) { | |||
|
263 | if (cr_at_eol_only) { | |||
|
264 | /* do not ignore CR at the end of an incomplete line */ | |||
|
265 | if (*ptr == '\r' && | |||
|
266 | (ptr + 1 < top && ptr[1] == '\n')) | |||
|
267 | continue; | |||
|
268 | } | |||
|
269 | else if (XDL_ISSPACE(*ptr)) { | |||
|
270 | const char *ptr2 = ptr; | |||
|
271 | int at_eol; | |||
|
272 | while (ptr + 1 < top && XDL_ISSPACE(ptr[1]) | |||
|
273 | && ptr[1] != '\n') | |||
|
274 | ptr++; | |||
|
275 | at_eol = (top <= ptr + 1 || ptr[1] == '\n'); | |||
|
276 | if (flags & XDF_IGNORE_WHITESPACE) | |||
|
277 | ; /* already handled */ | |||
|
278 | else if (flags & XDF_IGNORE_WHITESPACE_CHANGE | |||
|
279 | && !at_eol) { | |||
|
280 | ha += (ha << 5); | |||
|
281 | ha ^= (unsigned long) ' '; | |||
|
282 | } | |||
|
283 | else if (flags & XDF_IGNORE_WHITESPACE_AT_EOL | |||
|
284 | && !at_eol) { | |||
|
285 | while (ptr2 != ptr + 1) { | |||
|
286 | ha += (ha << 5); | |||
|
287 | ha ^= (unsigned long) *ptr2; | |||
|
288 | ptr2++; | |||
|
289 | } | |||
|
290 | } | |||
|
291 | continue; | |||
|
292 | } | |||
|
293 | ha += (ha << 5); | |||
|
294 | ha ^= (unsigned long) *ptr; | |||
|
295 | } | |||
|
296 | *data = ptr < top ? ptr + 1: ptr; | |||
|
297 | ||||
|
298 | return ha; | |||
|
299 | } | |||
|
300 | ||||
|
301 | unsigned long xdl_hash_record(char const **data, char const *top, long flags) { | |||
|
302 | unsigned long ha = 5381; | |||
|
303 | char const *ptr = *data; | |||
|
304 | ||||
|
305 | if (flags & XDF_WHITESPACE_FLAGS) | |||
|
306 | return xdl_hash_record_with_whitespace(data, top, flags); | |||
|
307 | ||||
|
308 | for (; ptr < top && *ptr != '\n'; ptr++) { | |||
|
309 | ha += (ha << 5); | |||
|
310 | ha ^= (unsigned long) *ptr; | |||
|
311 | } | |||
|
312 | *data = ptr < top ? ptr + 1: ptr; | |||
|
313 | ||||
|
314 | return ha; | |||
|
315 | } | |||
|
316 | ||||
|
317 | unsigned int xdl_hashbits(unsigned int size) { | |||
|
318 | unsigned int val = 1, bits = 0; | |||
|
319 | ||||
|
320 | for (; val < size && bits < CHAR_BIT * sizeof(unsigned int); val <<= 1, bits++); | |||
|
321 | return bits ? bits: 1; | |||
|
322 | } | |||
|
323 | ||||
|
324 | ||||
|
325 | int xdl_num_out(char *out, long val) { | |||
|
326 | char *ptr, *str = out; | |||
|
327 | char buf[32]; | |||
|
328 | ||||
|
329 | ptr = buf + sizeof(buf) - 1; | |||
|
330 | *ptr = '\0'; | |||
|
331 | if (val < 0) { | |||
|
332 | *--ptr = '-'; | |||
|
333 | val = -val; | |||
|
334 | } | |||
|
335 | for (; val && ptr > buf; val /= 10) | |||
|
336 | *--ptr = "0123456789"[val % 10]; | |||
|
337 | if (*ptr) | |||
|
338 | for (; *ptr; ptr++, str++) | |||
|
339 | *str = *ptr; | |||
|
340 | else | |||
|
341 | *str++ = '0'; | |||
|
342 | *str = '\0'; | |||
|
343 | ||||
|
344 | return str - out; | |||
|
345 | } | |||
|
346 | ||||
|
347 | int xdl_emit_hunk_hdr(long s1, long c1, long s2, long c2, | |||
|
348 | const char *func, long funclen, xdemitcb_t *ecb) { | |||
|
349 | int nb = 0; | |||
|
350 | mmbuffer_t mb; | |||
|
351 | char buf[128]; | |||
|
352 | ||||
|
353 | memcpy(buf, "@@ -", 4); | |||
|
354 | nb += 4; | |||
|
355 | ||||
|
356 | nb += xdl_num_out(buf + nb, c1 ? s1: s1 - 1); | |||
|
357 | ||||
|
358 | if (c1 != 1) { | |||
|
359 | memcpy(buf + nb, ",", 1); | |||
|
360 | nb += 1; | |||
|
361 | ||||
|
362 | nb += xdl_num_out(buf + nb, c1); | |||
|
363 | } | |||
|
364 | ||||
|
365 | memcpy(buf + nb, " +", 2); | |||
|
366 | nb += 2; | |||
|
367 | ||||
|
368 | nb += xdl_num_out(buf + nb, c2 ? s2: s2 - 1); | |||
|
369 | ||||
|
370 | if (c2 != 1) { | |||
|
371 | memcpy(buf + nb, ",", 1); | |||
|
372 | nb += 1; | |||
|
373 | ||||
|
374 | nb += xdl_num_out(buf + nb, c2); | |||
|
375 | } | |||
|
376 | ||||
|
377 | memcpy(buf + nb, " @@", 3); | |||
|
378 | nb += 3; | |||
|
379 | if (func && funclen) { | |||
|
380 | buf[nb++] = ' '; | |||
|
381 | if (funclen > sizeof(buf) - nb - 1) | |||
|
382 | funclen = sizeof(buf) - nb - 1; | |||
|
383 | memcpy(buf + nb, func, funclen); | |||
|
384 | nb += funclen; | |||
|
385 | } | |||
|
386 | buf[nb++] = '\n'; | |||
|
387 | ||||
|
388 | mb.ptr = buf; | |||
|
389 | mb.size = nb; | |||
|
390 | if (ecb->outf(ecb->priv, &mb, 1) < 0) | |||
|
391 | return -1; | |||
|
392 | ||||
|
393 | return 0; | |||
|
394 | } | |||
|
395 | ||||
|
396 | int xdl_fall_back_diff(xdfenv_t *diff_env, xpparam_t const *xpp, | |||
|
397 | int line1, int count1, int line2, int count2) | |||
|
398 | { | |||
|
399 | /* | |||
|
400 | * This probably does not work outside Git, since | |||
|
401 | * we have a very simple mmfile structure. | |||
|
402 | * | |||
|
403 | * Note: ideally, we would reuse the prepared environment, but | |||
|
404 | * the libxdiff interface does not (yet) allow for diffing only | |||
|
405 | * ranges of lines instead of the whole files. | |||
|
406 | */ | |||
|
407 | mmfile_t subfile1, subfile2; | |||
|
408 | xdfenv_t env; | |||
|
409 | ||||
|
410 | subfile1.ptr = (char *)diff_env->xdf1.recs[line1 - 1]->ptr; | |||
|
411 | subfile1.size = diff_env->xdf1.recs[line1 + count1 - 2]->ptr + | |||
|
412 | diff_env->xdf1.recs[line1 + count1 - 2]->size - subfile1.ptr; | |||
|
413 | subfile2.ptr = (char *)diff_env->xdf2.recs[line2 - 1]->ptr; | |||
|
414 | subfile2.size = diff_env->xdf2.recs[line2 + count2 - 2]->ptr + | |||
|
415 | diff_env->xdf2.recs[line2 + count2 - 2]->size - subfile2.ptr; | |||
|
416 | if (xdl_do_diff(&subfile1, &subfile2, xpp, &env) < 0) | |||
|
417 | return -1; | |||
|
418 | ||||
|
419 | memcpy(diff_env->xdf1.rchg + line1 - 1, env.xdf1.rchg, count1); | |||
|
420 | memcpy(diff_env->xdf2.rchg + line2 - 1, env.xdf2.rchg, count2); | |||
|
421 | ||||
|
422 | xdl_free_env(&env); | |||
|
423 | ||||
|
424 | return 0; | |||
|
425 | } |
@@ -0,0 +1,47 b'' | |||||
|
1 | /* | |||
|
2 | * LibXDiff by Davide Libenzi ( File Differential Library ) | |||
|
3 | * Copyright (C) 2003 Davide Libenzi | |||
|
4 | * | |||
|
5 | * This library is free software; you can redistribute it and/or | |||
|
6 | * modify it under the terms of the GNU Lesser General Public | |||
|
7 | * License as published by the Free Software Foundation; either | |||
|
8 | * version 2.1 of the License, or (at your option) any later version. | |||
|
9 | * | |||
|
10 | * This library is distributed in the hope that it will be useful, | |||
|
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
|
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
|
13 | * Lesser General Public License for more details. | |||
|
14 | * | |||
|
15 | * You should have received a copy of the GNU Lesser General Public | |||
|
16 | * License along with this library; if not, see | |||
|
17 | * <http://www.gnu.org/licenses/>. | |||
|
18 | * | |||
|
19 | * Davide Libenzi <davidel@xmailserver.org> | |||
|
20 | * | |||
|
21 | */ | |||
|
22 | ||||
|
23 | #if !defined(XUTILS_H) | |||
|
24 | #define XUTILS_H | |||
|
25 | ||||
|
26 | ||||
|
27 | ||||
|
28 | long xdl_bogosqrt(long n); | |||
|
29 | int xdl_emit_diffrec(char const *rec, long size, char const *pre, long psize, | |||
|
30 | xdemitcb_t *ecb); | |||
|
31 | int xdl_cha_init(chastore_t *cha, long isize, long icount); | |||
|
32 | void xdl_cha_free(chastore_t *cha); | |||
|
33 | void *xdl_cha_alloc(chastore_t *cha); | |||
|
34 | long xdl_guess_lines(mmfile_t *mf, long sample); | |||
|
35 | int xdl_blankline(const char *line, long size, long flags); | |||
|
36 | int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags); | |||
|
37 | unsigned long xdl_hash_record(char const **data, char const *top, long flags); | |||
|
38 | unsigned int xdl_hashbits(unsigned int size); | |||
|
39 | int xdl_num_out(char *out, long val); | |||
|
40 | int xdl_emit_hunk_hdr(long s1, long c1, long s2, long c2, | |||
|
41 | const char *func, long funclen, xdemitcb_t *ecb); | |||
|
42 | int xdl_fall_back_diff(xdfenv_t *diff_env, xpparam_t const *xpp, | |||
|
43 | int line1, int count1, int line2, int count2); | |||
|
44 | ||||
|
45 | ||||
|
46 | ||||
|
47 | #endif /* #if !defined(XUTILS_H) */ |
@@ -52,3 +52,18 b' contrib/python-zstandard/zstd/dictBuilde' | |||||
52 | contrib/python-zstandard/zstd/dictBuilder/zdict.h |
|
52 | contrib/python-zstandard/zstd/dictBuilder/zdict.h | |
53 | contrib/python-zstandard/zstd/zstd.h |
|
53 | contrib/python-zstandard/zstd/zstd.h | |
54 | hgext/fsmonitor/pywatchman/bser.c |
|
54 | hgext/fsmonitor/pywatchman/bser.c | |
|
55 | mercurial/thirdparty/xdiff/xdiff.h | |||
|
56 | mercurial/thirdparty/xdiff/xdiffi.c | |||
|
57 | mercurial/thirdparty/xdiff/xdiffi.h | |||
|
58 | mercurial/thirdparty/xdiff/xemit.c | |||
|
59 | mercurial/thirdparty/xdiff/xemit.h | |||
|
60 | mercurial/thirdparty/xdiff/xhistogram.c | |||
|
61 | mercurial/thirdparty/xdiff/xinclude.h | |||
|
62 | mercurial/thirdparty/xdiff/xmacros.h | |||
|
63 | mercurial/thirdparty/xdiff/xmerge.c | |||
|
64 | mercurial/thirdparty/xdiff/xpatience.c | |||
|
65 | mercurial/thirdparty/xdiff/xprepare.c | |||
|
66 | mercurial/thirdparty/xdiff/xprepare.h | |||
|
67 | mercurial/thirdparty/xdiff/xtypes.h | |||
|
68 | mercurial/thirdparty/xdiff/xutils.c | |||
|
69 | mercurial/thirdparty/xdiff/xutils.h |
General Comments 0
You need to be logged in to leave comments.
Login now