##// END OF EJS Templates
xdiff: add a preprocessing step that trims files...
Jun Wu -
r36838:f33a87cf default
parent child Browse files
Show More
@@ -1062,6 +1062,7 b' xdchange_t *xdl_get_hunk(xdchange_t **xs'
1062 1062 static int xdl_call_hunk_func(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
1063 1063 xdemitconf_t const *xecfg)
1064 1064 {
1065 long p = xe->nprefix, s = xe->nsuffix;
1065 1066 xdchange_t *xch, *xche;
1066 1067
1067 1068 if (!xecfg->hunk_func)
@@ -1073,6 +1074,10 b' static int xdl_call_hunk_func(xdfenv_t *'
1073 1074 xche = xdl_get_hunk(&xch, xecfg);
1074 1075 if (!xch)
1075 1076 break;
1077 if (xch != xche)
1078 xdl_bug("xch != xche");
1079 xch->i1 += p;
1080 xch->i2 += p;
1076 1081 if (xch->i1 > i1 || xch->i2 > i2) {
1077 1082 if (xecfg->hunk_func(i1, xch->i1, i2, xch->i2, ecb->priv) < 0)
1078 1083 return -1;
@@ -1080,16 +1085,18 b' static int xdl_call_hunk_func(xdfenv_t *'
1080 1085 i1 = xche->i1 + xche->chg1;
1081 1086 i2 = xche->i2 + xche->chg2;
1082 1087 }
1083 if (xecfg->hunk_func(i1, n1, i2, n2, ecb->priv) < 0)
1088 if (xecfg->hunk_func(i1, n1 + p + s, i2, n2 + p + s,
1089 ecb->priv) < 0)
1084 1090 return -1;
1085 1091 } else {
1086 1092 for (xch = xscr; xch; xch = xche->next) {
1087 1093 xche = xdl_get_hunk(&xch, xecfg);
1088 1094 if (!xch)
1089 1095 break;
1090 if (xecfg->hunk_func(
1091 xch->i1, xche->i1 + xche->chg1 - xch->i1,
1092 xch->i2, xche->i2 + xche->chg2 - xch->i2,
1096 if (xecfg->hunk_func(xch->i1 + p,
1097 xche->i1 + xche->chg1 - xch->i1,
1098 xch->i2 + p,
1099 xche->i2 + xche->chg2 - xch->i2,
1093 1100 ecb->priv) < 0)
1094 1101 return -1;
1095 1102 }
@@ -156,6 +156,87 b' static int xdl_classify_record(unsigned '
156 156 }
157 157
158 158
159 /*
160 * Trim common prefix from files.
161 *
162 * Note: trimming could affect hunk shifting. But the performance benefit
163 * outweighs the shift change. A diff result with suboptimal shifting is still
164 * valid.
165 */
166 static void xdl_trim_files(mmfile_t *mf1, mmfile_t *mf2, long reserved,
167 xdfenv_t *xe, mmfile_t *out_mf1, mmfile_t *out_mf2) {
168 mmfile_t msmall, mlarge;
169 /* prefix lines, prefix bytes, suffix lines, suffix bytes */
170 long plines = 0, pbytes = 0, slines = 0, sbytes = 0, i;
171 /* prefix char pointer for msmall and mlarge */
172 const char *pp1, *pp2;
173 /* suffix char pointer for msmall and mlarge */
174 const char *ps1, *ps2;
175
176 /* reserved must >= 0 for the line boundary adjustment to work */
177 if (reserved < 0)
178 reserved = 0;
179
180 if (mf1->size < mf2->size) {
181 memcpy(&msmall, mf1, sizeof(mmfile_t));
182 memcpy(&mlarge, mf2, sizeof(mmfile_t));
183 } else {
184 memcpy(&msmall, mf2, sizeof(mmfile_t));
185 memcpy(&mlarge, mf1, sizeof(mmfile_t));
186 }
187
188 pp1 = msmall.ptr, pp2 = mlarge.ptr;
189 for (i = 0; i < msmall.size && *pp1 == *pp2; ++i) {
190 plines += (*pp1 == '\n');
191 pp1++, pp2++;
192 }
193
194 ps1 = msmall.ptr + msmall.size - 1, ps2 = mlarge.ptr + mlarge.size - 1;
195 while (ps1 > pp1 && *ps1 == *ps2) {
196 slines += (*ps1 == '\n');
197 ps1--, ps2--;
198 }
199
200 /* Retract common prefix and suffix boundaries for reserved lines */
201 if (plines <= reserved + 1) {
202 plines = 0;
203 } else {
204 i = 0;
205 while (i <= reserved) {
206 pp1--;
207 i += (*pp1 == '\n');
208 }
209 /* The new mmfile starts at the next char just after '\n' */
210 pbytes = pp1 - msmall.ptr + 1;
211 plines -= reserved;
212 }
213
214 if (slines <= reserved + 1) {
215 slines = 0;
216 } else {
217 /* Note: with compiler SIMD support (ex. -O3 -mavx2), this
218 * might perform better than memchr. */
219 i = 0;
220 while (i <= reserved) {
221 ps1++;
222 i += (*ps1 == '\n');
223 }
224 /* The new mmfile includes this '\n' */
225 sbytes = msmall.ptr + msmall.size - ps1 - 1;
226 slines -= reserved;
227 if (msmall.ptr[msmall.size - 1] == '\n')
228 slines -= 1;
229 }
230
231 xe->nprefix = plines;
232 xe->nsuffix = slines;
233 out_mf1->ptr = mf1->ptr + pbytes;
234 out_mf1->size = mf1->size - pbytes - sbytes;
235 out_mf2->ptr = mf2->ptr + pbytes;
236 out_mf2->size = mf2->size - pbytes - sbytes;
237 }
238
239
159 240 static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_t const *xpp,
160 241 xdlclassifier_t *cf, xdfile_t *xdf) {
161 242 unsigned int hbits;
@@ -254,10 +335,13 b' static void xdl_free_ctx(xdfile_t *xdf) '
254 335 xdl_cha_free(&xdf->rcha);
255 336 }
256 337
338 /* Reserved lines for trimming, to leave room for shifting */
339 #define TRIM_RESERVED_LINES 100
257 340
258 341 int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
259 342 xdfenv_t *xe) {
260 343 long enl1, enl2, sample;
344 mmfile_t tmf1, tmf2;
261 345 xdlclassifier_t cf;
262 346
263 347 memset(&cf, 0, sizeof(cf));
@@ -270,12 +354,14 b' int xdl_prepare_env(mmfile_t *mf1, mmfil'
270 354 if (xdl_init_classifier(&cf, enl1 + enl2 + 1, xpp->flags) < 0)
271 355 return -1;
272 356
273 if (xdl_prepare_ctx(1, mf1, enl1, xpp, &cf, &xe->xdf1) < 0) {
357 xdl_trim_files(mf1, mf2, TRIM_RESERVED_LINES, xe, &tmf1, &tmf2);
358
359 if (xdl_prepare_ctx(1, &tmf1, enl1, xpp, &cf, &xe->xdf1) < 0) {
274 360
275 361 xdl_free_classifier(&cf);
276 362 return -1;
277 363 }
278 if (xdl_prepare_ctx(2, mf2, enl2, xpp, &cf, &xe->xdf2) < 0) {
364 if (xdl_prepare_ctx(2, &tmf2, enl2, xpp, &cf, &xe->xdf2) < 0) {
279 365
280 366 xdl_free_ctx(&xe->xdf1);
281 367 xdl_free_classifier(&cf);
@@ -60,6 +60,10 b' typedef struct s_xdfile {'
60 60
61 61 typedef struct s_xdfenv {
62 62 xdfile_t xdf1, xdf2;
63
64 /* number of lines for common prefix and suffix that are removed
65 * from xdf1 and xdf2 as a preprocessing step */
66 long nprefix, nsuffix;
63 67 } xdfenv_t;
64 68
65 69
General Comments 0
You need to be logged in to leave comments. Login now