Show More
@@ -40,28 +40,28 b' extern "C" {' | |||||
40 |
|
40 | |||
41 | typedef struct s_mmfile { |
|
41 | typedef struct s_mmfile { | |
42 | char *ptr; |
|
42 | char *ptr; | |
43 |
|
|
43 | int64_t size; | |
44 | } mmfile_t; |
|
44 | } mmfile_t; | |
45 |
|
45 | |||
46 | typedef struct s_mmbuffer { |
|
46 | typedef struct s_mmbuffer { | |
47 | char *ptr; |
|
47 | char *ptr; | |
48 |
|
|
48 | int64_t size; | |
49 | } mmbuffer_t; |
|
49 | } mmbuffer_t; | |
50 |
|
50 | |||
51 | typedef struct s_xpparam { |
|
51 | typedef struct s_xpparam { | |
52 | unsigned long flags; |
|
52 | uint64_t flags; | |
53 | } xpparam_t; |
|
53 | } xpparam_t; | |
54 |
|
54 | |||
55 | typedef struct s_xdemitcb { |
|
55 | typedef struct s_xdemitcb { | |
56 | void *priv; |
|
56 | void *priv; | |
57 | } xdemitcb_t; |
|
57 | } xdemitcb_t; | |
58 |
|
58 | |||
59 |
typedef int (*xdl_emit_hunk_consume_func_t)( |
|
59 | typedef int (*xdl_emit_hunk_consume_func_t)(int64_t start_a, int64_t count_a, | |
60 |
|
|
60 | int64_t start_b, int64_t count_b, | |
61 | void *cb_data); |
|
61 | void *cb_data); | |
62 |
|
62 | |||
63 | typedef struct s_xdemitconf { |
|
63 | typedef struct s_xdemitconf { | |
64 | unsigned long flags; |
|
64 | uint64_t flags; | |
65 | xdl_emit_hunk_consume_func_t hunk_func; |
|
65 | xdl_emit_hunk_consume_func_t hunk_func; | |
66 | } xdemitconf_t; |
|
66 | } xdemitconf_t; | |
67 |
|
67 | |||
@@ -70,8 +70,8 b' typedef struct s_xdemitconf {' | |||||
70 | #define xdl_free(ptr) free(ptr) |
|
70 | #define xdl_free(ptr) free(ptr) | |
71 | #define xdl_realloc(ptr,x) realloc(ptr,x) |
|
71 | #define xdl_realloc(ptr,x) realloc(ptr,x) | |
72 |
|
72 | |||
73 |
void *xdl_mmfile_first(mmfile_t *mmf, |
|
73 | void *xdl_mmfile_first(mmfile_t *mmf, int64_t *size); | |
74 |
|
|
74 | int64_t xdl_mmfile_size(mmfile_t *mmf); | |
75 |
|
75 | |||
76 | int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, |
|
76 | int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, | |
77 | xdemitconf_t const *xecfg, xdemitcb_t *ecb); |
|
77 | xdemitconf_t const *xecfg, xdemitcb_t *ecb); |
@@ -37,18 +37,18 b'' | |||||
37 |
|
37 | |||
38 |
|
38 | |||
39 | typedef struct s_xdpsplit { |
|
39 | typedef struct s_xdpsplit { | |
40 |
|
|
40 | int64_t i1, i2; | |
41 | int min_lo, min_hi; |
|
41 | int min_lo, min_hi; | |
42 | } xdpsplit_t; |
|
42 | } xdpsplit_t; | |
43 |
|
43 | |||
44 |
|
44 | |||
45 |
|
45 | |||
46 |
|
46 | |||
47 |
static |
|
47 | static int64_t xdl_split(uint64_t const *ha1, int64_t off1, int64_t lim1, | |
48 |
u |
|
48 | uint64_t const *ha2, int64_t off2, int64_t lim2, | |
49 |
|
|
49 | int64_t *kvdf, int64_t *kvdb, int need_min, xdpsplit_t *spl, | |
50 | xdalgoenv_t *xenv); |
|
50 | xdalgoenv_t *xenv); | |
51 |
static xdchange_t *xdl_add_change(xdchange_t *xscr, |
|
51 | static xdchange_t *xdl_add_change(xdchange_t *xscr, int64_t i1, int64_t i2, int64_t chg1, int64_t chg2); | |
52 |
|
52 | |||
53 |
|
53 | |||
54 |
|
54 | |||
@@ -63,16 +63,16 b' static xdchange_t *xdl_add_change(xdchan' | |||||
63 | * cases using this algorithm is full, so a little bit of heuristic is needed |
|
63 | * cases using this algorithm is full, so a little bit of heuristic is needed | |
64 | * to cut the search and to return a suboptimal point. |
|
64 | * to cut the search and to return a suboptimal point. | |
65 | */ |
|
65 | */ | |
66 |
static |
|
66 | static int64_t xdl_split(uint64_t const *ha1, int64_t off1, int64_t lim1, | |
67 |
u |
|
67 | uint64_t const *ha2, int64_t off2, int64_t lim2, | |
68 |
|
|
68 | int64_t *kvdf, int64_t *kvdb, int need_min, xdpsplit_t *spl, | |
69 | xdalgoenv_t *xenv) { |
|
69 | xdalgoenv_t *xenv) { | |
70 |
|
|
70 | int64_t dmin = off1 - lim2, dmax = lim1 - off2; | |
71 |
|
|
71 | int64_t fmid = off1 - off2, bmid = lim1 - lim2; | |
72 |
|
|
72 | int64_t odd = (fmid - bmid) & 1; | |
73 |
|
|
73 | int64_t fmin = fmid, fmax = fmid; | |
74 |
|
|
74 | int64_t bmin = bmid, bmax = bmid; | |
75 |
|
|
75 | int64_t ec, d, i1, i2, prev1, best, dd, v, k; | |
76 |
|
76 | |||
77 | /* |
|
77 | /* | |
78 | * Set initial diagonal values for both forward and backward path. |
|
78 | * Set initial diagonal values for both forward and backward path. | |
@@ -221,7 +221,7 b' static long xdl_split(unsigned long cons' | |||||
221 | * the furthest reaching path using the (i1 + i2) measure. |
|
221 | * the furthest reaching path using the (i1 + i2) measure. | |
222 | */ |
|
222 | */ | |
223 | if (ec >= xenv->mxcost) { |
|
223 | if (ec >= xenv->mxcost) { | |
224 |
|
|
224 | int64_t fbest, fbest1, bbest, bbest1; | |
225 |
|
225 | |||
226 | fbest = fbest1 = -1; |
|
226 | fbest = fbest1 = -1; | |
227 | for (d = fmax; d >= fmin; d -= 2) { |
|
227 | for (d = fmax; d >= fmin; d -= 2) { | |
@@ -269,10 +269,10 b' static long xdl_split(unsigned long cons' | |||||
269 | * the box splitting function. Note that the real job (marking changed lines) |
|
269 | * the box splitting function. Note that the real job (marking changed lines) | |
270 | * is done in the two boundary reaching checks. |
|
270 | * is done in the two boundary reaching checks. | |
271 | */ |
|
271 | */ | |
272 |
int xdl_recs_cmp(diffdata_t *dd1, |
|
272 | int xdl_recs_cmp(diffdata_t *dd1, int64_t off1, int64_t lim1, | |
273 |
diffdata_t *dd2, |
|
273 | diffdata_t *dd2, int64_t off2, int64_t lim2, | |
274 |
|
|
274 | int64_t *kvdf, int64_t *kvdb, int need_min, xdalgoenv_t *xenv) { | |
275 |
u |
|
275 | uint64_t const *ha1 = dd1->ha, *ha2 = dd2->ha; | |
276 |
|
276 | |||
277 | /* |
|
277 | /* | |
278 | * Shrink the box by walking through each diagonal snake (SW and NE). |
|
278 | * Shrink the box by walking through each diagonal snake (SW and NE). | |
@@ -286,13 +286,13 b' int xdl_recs_cmp(diffdata_t *dd1, long o' | |||||
286 | */ |
|
286 | */ | |
287 | if (off1 == lim1) { |
|
287 | if (off1 == lim1) { | |
288 | char *rchg2 = dd2->rchg; |
|
288 | char *rchg2 = dd2->rchg; | |
289 |
|
|
289 | int64_t *rindex2 = dd2->rindex; | |
290 |
|
290 | |||
291 | for (; off2 < lim2; off2++) |
|
291 | for (; off2 < lim2; off2++) | |
292 | rchg2[rindex2[off2]] = 1; |
|
292 | rchg2[rindex2[off2]] = 1; | |
293 | } else if (off2 == lim2) { |
|
293 | } else if (off2 == lim2) { | |
294 | char *rchg1 = dd1->rchg; |
|
294 | char *rchg1 = dd1->rchg; | |
295 |
|
|
295 | int64_t *rindex1 = dd1->rindex; | |
296 |
|
296 | |||
297 | for (; off1 < lim1; off1++) |
|
297 | for (; off1 < lim1; off1++) | |
298 | rchg1[rindex1[off1]] = 1; |
|
298 | rchg1[rindex1[off1]] = 1; | |
@@ -327,8 +327,8 b' int xdl_recs_cmp(diffdata_t *dd1, long o' | |||||
327 |
|
327 | |||
328 | int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, |
|
328 | int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, | |
329 | xdfenv_t *xe) { |
|
329 | xdfenv_t *xe) { | |
330 |
|
|
330 | int64_t ndiags; | |
331 |
|
|
331 | int64_t *kvd, *kvdf, *kvdb; | |
332 | xdalgoenv_t xenv; |
|
332 | xdalgoenv_t xenv; | |
333 | diffdata_t dd1, dd2; |
|
333 | diffdata_t dd1, dd2; | |
334 |
|
334 | |||
@@ -342,7 +342,7 b' int xdl_do_diff(mmfile_t *mf1, mmfile_t ' | |||||
342 | * One is to store the forward path and one to store the backward path. |
|
342 | * One is to store the forward path and one to store the backward path. | |
343 | */ |
|
343 | */ | |
344 | ndiags = xe->xdf1.nreff + xe->xdf2.nreff + 3; |
|
344 | ndiags = xe->xdf1.nreff + xe->xdf2.nreff + 3; | |
345 |
if (!(kvd = ( |
|
345 | if (!(kvd = (int64_t *) xdl_malloc((2 * ndiags + 2) * sizeof(long)))) { | |
346 |
|
346 | |||
347 | xdl_free_env(xe); |
|
347 | xdl_free_env(xe); | |
348 | return -1; |
|
348 | return -1; | |
@@ -381,7 +381,7 b' int xdl_do_diff(mmfile_t *mf1, mmfile_t ' | |||||
381 | } |
|
381 | } | |
382 |
|
382 | |||
383 |
|
383 | |||
384 |
static xdchange_t *xdl_add_change(xdchange_t *xscr, |
|
384 | static xdchange_t *xdl_add_change(xdchange_t *xscr, int64_t i1, int64_t i2, int64_t chg1, int64_t chg2) { | |
385 | xdchange_t *xch; |
|
385 | xdchange_t *xch; | |
386 |
|
386 | |||
387 | if (!(xch = (xdchange_t *) xdl_malloc(sizeof(xdchange_t)))) |
|
387 | if (!(xch = (xdchange_t *) xdl_malloc(sizeof(xdchange_t)))) | |
@@ -398,7 +398,7 b' static xdchange_t *xdl_add_change(xdchan' | |||||
398 | } |
|
398 | } | |
399 |
|
399 | |||
400 |
|
400 | |||
401 |
static int recs_match(xrecord_t *rec1, xrecord_t *rec2, |
|
401 | static int recs_match(xrecord_t *rec1, xrecord_t *rec2, int64_t flags) | |
402 | { |
|
402 | { | |
403 | return (rec1->ha == rec2->ha && |
|
403 | return (rec1->ha == rec2->ha && | |
404 | xdl_recmatch(rec1->ptr, rec1->size, |
|
404 | xdl_recmatch(rec1->ptr, rec1->size, | |
@@ -421,7 +421,7 b' static int recs_match(xrecord_t *rec1, x' | |||||
421 | */ |
|
421 | */ | |
422 | static int get_indent(xrecord_t *rec) |
|
422 | static int get_indent(xrecord_t *rec) | |
423 | { |
|
423 | { | |
424 | long i; |
|
424 | int64_t i; | |
425 | int ret = 0; |
|
425 | int ret = 0; | |
426 |
|
426 | |||
427 | for (i = 0; i < rec->size; i++) { |
|
427 | for (i = 0; i < rec->size; i++) { | |
@@ -497,10 +497,10 b' struct split_score {' | |||||
497 | /* |
|
497 | /* | |
498 | * Fill m with information about a hypothetical split of xdf above line split. |
|
498 | * Fill m with information about a hypothetical split of xdf above line split. | |
499 | */ |
|
499 | */ | |
500 |
static void measure_split(const xdfile_t *xdf, |
|
500 | static void measure_split(const xdfile_t *xdf, int64_t split, | |
501 | struct split_measurement *m) |
|
501 | struct split_measurement *m) | |
502 | { |
|
502 | { | |
503 | long i; |
|
503 | int64_t i; | |
504 |
|
504 | |||
505 | if (split >= xdf->nrec) { |
|
505 | if (split >= xdf->nrec) { | |
506 | m->end_of_file = 1; |
|
506 | m->end_of_file = 1; | |
@@ -706,13 +706,13 b' struct xdlgroup {' | |||||
706 | * The index of the first changed line in the group, or the index of |
|
706 | * The index of the first changed line in the group, or the index of | |
707 | * the unchanged line above which the (empty) group is located. |
|
707 | * the unchanged line above which the (empty) group is located. | |
708 | */ |
|
708 | */ | |
709 |
|
|
709 | int64_t start; | |
710 |
|
710 | |||
711 | /* |
|
711 | /* | |
712 | * The index of the first unchanged line after the group. For an empty |
|
712 | * The index of the first unchanged line after the group. For an empty | |
713 | * group, end is equal to start. |
|
713 | * group, end is equal to start. | |
714 | */ |
|
714 | */ | |
715 |
|
|
715 | int64_t end; | |
716 | }; |
|
716 | }; | |
717 |
|
717 | |||
718 | /* |
|
718 | /* | |
@@ -762,7 +762,7 b' static inline int group_previous(xdfile_' | |||||
762 | * following group, expand this group to include it. Return 0 on success or -1 |
|
762 | * following group, expand this group to include it. Return 0 on success or -1 | |
763 | * if g cannot be slid down. |
|
763 | * if g cannot be slid down. | |
764 | */ |
|
764 | */ | |
765 |
static int group_slide_down(xdfile_t *xdf, struct xdlgroup *g, |
|
765 | static int group_slide_down(xdfile_t *xdf, struct xdlgroup *g, int64_t flags) | |
766 | { |
|
766 | { | |
767 | if (g->end < xdf->nrec && |
|
767 | if (g->end < xdf->nrec && | |
768 | recs_match(xdf->recs[g->start], xdf->recs[g->end], flags)) { |
|
768 | recs_match(xdf->recs[g->start], xdf->recs[g->end], flags)) { | |
@@ -783,7 +783,7 b' static int group_slide_down(xdfile_t *xd' | |||||
783 | * into a previous group, expand this group to include it. Return 0 on success |
|
783 | * into a previous group, expand this group to include it. Return 0 on success | |
784 | * or -1 if g cannot be slid up. |
|
784 | * or -1 if g cannot be slid up. | |
785 | */ |
|
785 | */ | |
786 |
static int group_slide_up(xdfile_t *xdf, struct xdlgroup *g, |
|
786 | static int group_slide_up(xdfile_t *xdf, struct xdlgroup *g, int64_t flags) | |
787 | { |
|
787 | { | |
788 | if (g->start > 0 && |
|
788 | if (g->start > 0 && | |
789 | recs_match(xdf->recs[g->start - 1], xdf->recs[g->end - 1], flags)) { |
|
789 | recs_match(xdf->recs[g->start - 1], xdf->recs[g->end - 1], flags)) { | |
@@ -818,10 +818,10 b' static void xdl_bug(const char *msg)' | |||||
818 | * This also helps in finding joinable change groups and reducing the diff |
|
818 | * This also helps in finding joinable change groups and reducing the diff | |
819 | * size. |
|
819 | * size. | |
820 | */ |
|
820 | */ | |
821 |
int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, |
|
821 | int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, int64_t flags) { | |
822 | struct xdlgroup g, go; |
|
822 | struct xdlgroup g, go; | |
823 |
|
|
823 | int64_t earliest_end, end_matching_other; | |
824 |
|
|
824 | int64_t groupsize; | |
825 |
|
825 | |||
826 | group_init(xdf, &g); |
|
826 | group_init(xdf, &g); | |
827 | group_init(xdfo, &go); |
|
827 | group_init(xdfo, &go); | |
@@ -906,7 +906,7 b' int xdl_change_compact(xdfile_t *xdf, xd' | |||||
906 | * "score" for each position that the group can be shifted |
|
906 | * "score" for each position that the group can be shifted | |
907 | * to. Then we pick the shift with the lowest score. |
|
907 | * to. Then we pick the shift with the lowest score. | |
908 | */ |
|
908 | */ | |
909 |
|
|
909 | int64_t shift, best_shift = -1; | |
910 | struct split_score best_score; |
|
910 | struct split_score best_score; | |
911 |
|
911 | |||
912 | /* |
|
912 | /* | |
@@ -975,7 +975,7 b' int xdl_change_compact(xdfile_t *xdf, xd' | |||||
975 | int xdl_build_script(xdfenv_t *xe, xdchange_t **xscr) { |
|
975 | int xdl_build_script(xdfenv_t *xe, xdchange_t **xscr) { | |
976 | xdchange_t *cscr = NULL, *xch; |
|
976 | xdchange_t *cscr = NULL, *xch; | |
977 | char *rchg1 = xe->xdf1.rchg, *rchg2 = xe->xdf2.rchg; |
|
977 | char *rchg1 = xe->xdf1.rchg, *rchg2 = xe->xdf2.rchg; | |
978 |
|
|
978 | int64_t i1, i2, l1, l2; | |
979 |
|
979 | |||
980 | /* |
|
980 | /* | |
981 | * Trivial. Collects "groups" of changes and creates an edit script. |
|
981 | * Trivial. Collects "groups" of changes and creates an edit script. | |
@@ -1016,9 +1016,9 b' void xdl_free_script(xdchange_t *xscr) {' | |||||
1016 | xdchange_t *xdl_get_hunk(xdchange_t **xscr, xdemitconf_t const *xecfg) |
|
1016 | xdchange_t *xdl_get_hunk(xdchange_t **xscr, xdemitconf_t const *xecfg) | |
1017 | { |
|
1017 | { | |
1018 | xdchange_t *xch, *xchp, *lxch; |
|
1018 | xdchange_t *xch, *xchp, *lxch; | |
1019 |
|
|
1019 | int64_t max_common = 0; | |
1020 |
|
|
1020 | int64_t max_ignorable = 0; | |
1021 |
u |
|
1021 | uint64_t ignored = 0; /* number of ignored blank lines */ | |
1022 |
|
1022 | |||
1023 | /* remove ignorable changes that are too far before other changes */ |
|
1023 | /* remove ignorable changes that are too far before other changes */ | |
1024 | for (xchp = *xscr; xchp && xchp->ignore; xchp = xchp->next) { |
|
1024 | for (xchp = *xscr; xchp && xchp->ignore; xchp = xchp->next) { | |
@@ -1035,7 +1035,7 b' xdchange_t *xdl_get_hunk(xdchange_t **xs' | |||||
1035 | lxch = *xscr; |
|
1035 | lxch = *xscr; | |
1036 |
|
1036 | |||
1037 | for (xchp = *xscr, xch = xchp->next; xch; xchp = xch, xch = xch->next) { |
|
1037 | for (xchp = *xscr, xch = xchp->next; xch; xchp = xch, xch = xch->next) { | |
1038 |
|
|
1038 | int64_t distance = xch->i1 - (xchp->i1 + xchp->chg1); | |
1039 | if (distance > max_common) |
|
1039 | if (distance > max_common) | |
1040 | break; |
|
1040 | break; | |
1041 |
|
1041 | |||
@@ -1062,14 +1062,14 b' xdchange_t *xdl_get_hunk(xdchange_t **xs' | |||||
1062 | static int xdl_call_hunk_func(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb, |
|
1062 | static int xdl_call_hunk_func(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb, | |
1063 | xdemitconf_t const *xecfg) |
|
1063 | xdemitconf_t const *xecfg) | |
1064 | { |
|
1064 | { | |
1065 |
|
|
1065 | int64_t p = xe->nprefix, s = xe->nsuffix; | |
1066 | xdchange_t *xch, *xche; |
|
1066 | xdchange_t *xch, *xche; | |
1067 |
|
1067 | |||
1068 | if (!xecfg->hunk_func) |
|
1068 | if (!xecfg->hunk_func) | |
1069 | return -1; |
|
1069 | return -1; | |
1070 |
|
1070 | |||
1071 | if ((xecfg->flags & XDL_EMIT_BDIFFHUNK) != 0) { |
|
1071 | if ((xecfg->flags & XDL_EMIT_BDIFFHUNK) != 0) { | |
1072 |
|
|
1072 | int64_t i1 = 0, i2 = 0, n1 = xe->xdf1.nrec, n2 = xe->xdf2.nrec; | |
1073 | for (xch = xscr; xch; xch = xche->next) { |
|
1073 | for (xch = xscr; xch; xch = xche->next) { | |
1074 | xche = xdl_get_hunk(&xch, xecfg); |
|
1074 | xche = xdl_get_hunk(&xch, xecfg); | |
1075 | if (!xch) |
|
1075 | if (!xch) |
@@ -25,33 +25,33 b'' | |||||
25 |
|
25 | |||
26 |
|
26 | |||
27 | typedef struct s_diffdata { |
|
27 | typedef struct s_diffdata { | |
28 |
|
|
28 | int64_t nrec; | |
29 |
u |
|
29 | uint64_t const *ha; | |
30 |
|
|
30 | int64_t *rindex; | |
31 | char *rchg; |
|
31 | char *rchg; | |
32 | } diffdata_t; |
|
32 | } diffdata_t; | |
33 |
|
33 | |||
34 | typedef struct s_xdalgoenv { |
|
34 | typedef struct s_xdalgoenv { | |
35 |
|
|
35 | int64_t mxcost; | |
36 |
|
|
36 | int64_t snake_cnt; | |
37 |
|
|
37 | int64_t heur_min; | |
38 | } xdalgoenv_t; |
|
38 | } xdalgoenv_t; | |
39 |
|
39 | |||
40 | typedef struct s_xdchange { |
|
40 | typedef struct s_xdchange { | |
41 | struct s_xdchange *next; |
|
41 | struct s_xdchange *next; | |
42 |
|
|
42 | int64_t i1, i2; | |
43 |
|
|
43 | int64_t chg1, chg2; | |
44 | int ignore; |
|
44 | int ignore; | |
45 | } xdchange_t; |
|
45 | } xdchange_t; | |
46 |
|
46 | |||
47 |
|
47 | |||
48 |
|
48 | |||
49 |
int xdl_recs_cmp(diffdata_t *dd1, |
|
49 | int xdl_recs_cmp(diffdata_t *dd1, int64_t off1, int64_t lim1, | |
50 |
diffdata_t *dd2, |
|
50 | diffdata_t *dd2, int64_t off2, int64_t lim2, | |
51 |
|
|
51 | int64_t *kvdf, int64_t *kvdb, int need_min, xdalgoenv_t *xenv); | |
52 | int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, |
|
52 | int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, | |
53 | xdfenv_t *xe); |
|
53 | xdfenv_t *xe); | |
54 |
int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, |
|
54 | int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, int64_t flags); | |
55 | int xdl_build_script(xdfenv_t *xe, xdchange_t **xscr); |
|
55 | int xdl_build_script(xdfenv_t *xe, xdchange_t **xscr); | |
56 | void xdl_free_script(xdchange_t *xscr); |
|
56 | void xdl_free_script(xdchange_t *xscr); | |
57 |
|
57 |
@@ -24,6 +24,7 b'' | |||||
24 | #define XINCLUDE_H |
|
24 | #define XINCLUDE_H | |
25 |
|
25 | |||
26 | #include <ctype.h> |
|
26 | #include <ctype.h> | |
|
27 | #include <stdint.h> | |||
27 | #include <stdio.h> |
|
28 | #include <stdio.h> | |
28 | #include <stdlib.h> |
|
29 | #include <stdlib.h> | |
29 | #include <string.h> |
|
30 | #include <string.h> |
@@ -31,35 +31,35 b'' | |||||
31 |
|
31 | |||
32 | typedef struct s_xdlclass { |
|
32 | typedef struct s_xdlclass { | |
33 | struct s_xdlclass *next; |
|
33 | struct s_xdlclass *next; | |
34 | unsigned long ha; |
|
34 | uint64_t ha; | |
35 | char const *line; |
|
35 | char const *line; | |
36 |
|
|
36 | int64_t size; | |
37 |
|
|
37 | int64_t idx; | |
38 |
|
|
38 | int64_t len1, len2; | |
39 | } xdlclass_t; |
|
39 | } xdlclass_t; | |
40 |
|
40 | |||
41 | typedef struct s_xdlclassifier { |
|
41 | typedef struct s_xdlclassifier { | |
42 | unsigned int hbits; |
|
42 | unsigned int hbits; | |
43 |
|
|
43 | int64_t hsize; | |
44 | xdlclass_t **rchash; |
|
44 | xdlclass_t **rchash; | |
45 | chastore_t ncha; |
|
45 | chastore_t ncha; | |
46 | xdlclass_t **rcrecs; |
|
46 | xdlclass_t **rcrecs; | |
47 |
|
|
47 | int64_t alloc; | |
48 |
|
|
48 | int64_t count; | |
49 |
|
|
49 | int64_t flags; | |
50 | } xdlclassifier_t; |
|
50 | } xdlclassifier_t; | |
51 |
|
51 | |||
52 |
|
52 | |||
53 |
|
53 | |||
54 |
|
54 | |||
55 |
static int xdl_init_classifier(xdlclassifier_t *cf, |
|
55 | static int xdl_init_classifier(xdlclassifier_t *cf, int64_t size, int64_t flags); | |
56 | static void xdl_free_classifier(xdlclassifier_t *cf); |
|
56 | static void xdl_free_classifier(xdlclassifier_t *cf); | |
57 | static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t **rhash, |
|
57 | static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t **rhash, | |
58 | unsigned int hbits, xrecord_t *rec); |
|
58 | unsigned int hbits, xrecord_t *rec); | |
59 |
static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, |
|
59 | static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, int64_t narec, xpparam_t const *xpp, | |
60 | xdlclassifier_t *cf, xdfile_t *xdf); |
|
60 | xdlclassifier_t *cf, xdfile_t *xdf); | |
61 | static void xdl_free_ctx(xdfile_t *xdf); |
|
61 | static void xdl_free_ctx(xdfile_t *xdf); | |
62 |
static int xdl_clean_mmatch(char const *dis, |
|
62 | static int xdl_clean_mmatch(char const *dis, int64_t i, int64_t s, int64_t e); | |
63 | static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2); |
|
63 | static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2); | |
64 | static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2); |
|
64 | static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2); | |
65 | static int xdl_optimize_ctxs(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2); |
|
65 | static int xdl_optimize_ctxs(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2); | |
@@ -67,7 +67,7 b' static int xdl_optimize_ctxs(xdlclassifi' | |||||
67 |
|
67 | |||
68 |
|
68 | |||
69 |
|
69 | |||
70 |
static int xdl_init_classifier(xdlclassifier_t *cf, |
|
70 | static int xdl_init_classifier(xdlclassifier_t *cf, int64_t size, int64_t flags) { | |
71 | cf->flags = flags; |
|
71 | cf->flags = flags; | |
72 |
|
72 | |||
73 | cf->hbits = xdl_hashbits((unsigned int) size); |
|
73 | cf->hbits = xdl_hashbits((unsigned int) size); | |
@@ -108,7 +108,7 b' static void xdl_free_classifier(xdlclass' | |||||
108 |
|
108 | |||
109 | static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t **rhash, |
|
109 | static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t **rhash, | |
110 | unsigned int hbits, xrecord_t *rec) { |
|
110 | unsigned int hbits, xrecord_t *rec) { | |
111 | long hi; |
|
111 | int64_t hi; | |
112 | char const *line; |
|
112 | char const *line; | |
113 | xdlclass_t *rcrec; |
|
113 | xdlclass_t *rcrec; | |
114 | xdlclass_t **rcrecs; |
|
114 | xdlclass_t **rcrecs; | |
@@ -163,11 +163,11 b' static int xdl_classify_record(unsigned ' | |||||
163 | * outweighs the shift change. A diff result with suboptimal shifting is still |
|
163 | * outweighs the shift change. A diff result with suboptimal shifting is still | |
164 | * valid. |
|
164 | * valid. | |
165 | */ |
|
165 | */ | |
166 |
static void xdl_trim_files(mmfile_t *mf1, mmfile_t *mf2, |
|
166 | static void xdl_trim_files(mmfile_t *mf1, mmfile_t *mf2, int64_t reserved, | |
167 | xdfenv_t *xe, mmfile_t *out_mf1, mmfile_t *out_mf2) { |
|
167 | xdfenv_t *xe, mmfile_t *out_mf1, mmfile_t *out_mf2) { | |
168 | mmfile_t msmall, mlarge; |
|
168 | mmfile_t msmall, mlarge; | |
169 | /* prefix lines, prefix bytes, suffix lines, suffix bytes */ |
|
169 | /* prefix lines, prefix bytes, suffix lines, suffix bytes */ | |
170 |
|
|
170 | int64_t plines = 0, pbytes = 0, slines = 0, sbytes = 0, i; | |
171 | /* prefix char pointer for msmall and mlarge */ |
|
171 | /* prefix char pointer for msmall and mlarge */ | |
172 | const char *pp1, *pp2; |
|
172 | const char *pp1, *pp2; | |
173 | /* suffix char pointer for msmall and mlarge */ |
|
173 | /* suffix char pointer for msmall and mlarge */ | |
@@ -237,18 +237,18 b' static void xdl_trim_files(mmfile_t *mf1' | |||||
237 | } |
|
237 | } | |
238 |
|
238 | |||
239 |
|
239 | |||
240 |
static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, |
|
240 | static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, int64_t narec, xpparam_t const *xpp, | |
241 | xdlclassifier_t *cf, xdfile_t *xdf) { |
|
241 | xdlclassifier_t *cf, xdfile_t *xdf) { | |
242 | unsigned int hbits; |
|
242 | unsigned int hbits; | |
243 |
|
|
243 | int64_t nrec, hsize, bsize; | |
244 | unsigned long hav; |
|
244 | uint64_t hav; | |
245 | char const *blk, *cur, *top, *prev; |
|
245 | char const *blk, *cur, *top, *prev; | |
246 | xrecord_t *crec; |
|
246 | xrecord_t *crec; | |
247 | xrecord_t **recs, **rrecs; |
|
247 | xrecord_t **recs, **rrecs; | |
248 | xrecord_t **rhash; |
|
248 | xrecord_t **rhash; | |
249 | unsigned long *ha; |
|
249 | uint64_t *ha; | |
250 | char *rchg; |
|
250 | char *rchg; | |
251 |
|
|
251 | int64_t *rindex; | |
252 |
|
252 | |||
253 | ha = NULL; |
|
253 | ha = NULL; | |
254 | rindex = NULL; |
|
254 | rindex = NULL; | |
@@ -296,9 +296,9 b' static int xdl_prepare_ctx(unsigned int ' | |||||
296 | goto abort; |
|
296 | goto abort; | |
297 | memset(rchg, 0, (nrec + 2) * sizeof(char)); |
|
297 | memset(rchg, 0, (nrec + 2) * sizeof(char)); | |
298 |
|
298 | |||
299 |
if (!(rindex = ( |
|
299 | if (!(rindex = (int64_t *) xdl_malloc((nrec + 1) * sizeof(long)))) | |
300 | goto abort; |
|
300 | goto abort; | |
301 |
if (!(ha = (u |
|
301 | if (!(ha = (uint64_t *) xdl_malloc((nrec + 1) * sizeof(unsigned long)))) | |
302 | goto abort; |
|
302 | goto abort; | |
303 |
|
303 | |||
304 | xdf->nrec = nrec; |
|
304 | xdf->nrec = nrec; | |
@@ -340,7 +340,7 b' static void xdl_free_ctx(xdfile_t *xdf) ' | |||||
340 |
|
340 | |||
341 | int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, |
|
341 | int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, | |
342 | xdfenv_t *xe) { |
|
342 | xdfenv_t *xe) { | |
343 |
|
|
343 | int64_t enl1, enl2, sample; | |
344 | mmfile_t tmf1, tmf2; |
|
344 | mmfile_t tmf1, tmf2; | |
345 | xdlclassifier_t cf; |
|
345 | xdlclassifier_t cf; | |
346 |
|
346 | |||
@@ -388,8 +388,8 b' void xdl_free_env(xdfenv_t *xe) {' | |||||
388 | } |
|
388 | } | |
389 |
|
389 | |||
390 |
|
390 | |||
391 |
static int xdl_clean_mmatch(char const *dis, |
|
391 | static int xdl_clean_mmatch(char const *dis, int64_t i, int64_t s, int64_t e) { | |
392 |
|
|
392 | int64_t r, rdis0, rpdis0, rdis1, rpdis1; | |
393 |
|
393 | |||
394 | /* |
|
394 | /* | |
395 | * Limits the window the is examined during the similar-lines |
|
395 | * Limits the window the is examined during the similar-lines | |
@@ -452,7 +452,7 b' static int xdl_clean_mmatch(char const *' | |||||
452 | * might be potentially discarded if they happear in a run of discardable. |
|
452 | * might be potentially discarded if they happear in a run of discardable. | |
453 | */ |
|
453 | */ | |
454 | static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2) { |
|
454 | static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2) { | |
455 |
|
|
455 | int64_t i, nm, nreff, mlim; | |
456 | xrecord_t **recs; |
|
456 | xrecord_t **recs; | |
457 | xdlclass_t *rcrec; |
|
457 | xdlclass_t *rcrec; | |
458 | char *dis, *dis1, *dis2; |
|
458 | char *dis, *dis1, *dis2; | |
@@ -515,7 +515,7 b' static int xdl_cleanup_records(xdlclassi' | |||||
515 | * Early trim initial and terminal matching records. |
|
515 | * Early trim initial and terminal matching records. | |
516 | */ |
|
516 | */ | |
517 | static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2) { |
|
517 | static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2) { | |
518 |
|
|
518 | int64_t i, lim; | |
519 | xrecord_t **recs1, **recs2; |
|
519 | xrecord_t **recs1, **recs2; | |
520 |
|
520 | |||
521 | recs1 = xdf1->recs; |
|
521 | recs1 = xdf1->recs; |
@@ -27,22 +27,22 b'' | |||||
27 |
|
27 | |||
28 | typedef struct s_chanode { |
|
28 | typedef struct s_chanode { | |
29 | struct s_chanode *next; |
|
29 | struct s_chanode *next; | |
30 |
|
|
30 | int64_t icurr; | |
31 | } chanode_t; |
|
31 | } chanode_t; | |
32 |
|
32 | |||
33 | typedef struct s_chastore { |
|
33 | typedef struct s_chastore { | |
34 | chanode_t *head, *tail; |
|
34 | chanode_t *head, *tail; | |
35 |
|
|
35 | int64_t isize, nsize; | |
36 | chanode_t *ancur; |
|
36 | chanode_t *ancur; | |
37 | chanode_t *sncur; |
|
37 | chanode_t *sncur; | |
38 |
|
|
38 | int64_t scurr; | |
39 | } chastore_t; |
|
39 | } chastore_t; | |
40 |
|
40 | |||
41 | typedef struct s_xrecord { |
|
41 | typedef struct s_xrecord { | |
42 | struct s_xrecord *next; |
|
42 | struct s_xrecord *next; | |
43 | char const *ptr; |
|
43 | char const *ptr; | |
44 |
|
|
44 | int64_t size; | |
45 | unsigned long ha; |
|
45 | uint64_t ha; | |
46 | } xrecord_t; |
|
46 | } xrecord_t; | |
47 |
|
47 | |||
48 | typedef struct s_xdfile { |
|
48 | typedef struct s_xdfile { | |
@@ -50,7 +50,7 b' typedef struct s_xdfile {' | |||||
50 | chastore_t rcha; |
|
50 | chastore_t rcha; | |
51 |
|
51 | |||
52 | /* number of records (lines) */ |
|
52 | /* number of records (lines) */ | |
53 |
|
|
53 | int64_t nrec; | |
54 |
|
54 | |||
55 | /* hash table size |
|
55 | /* hash table size | |
56 | * the maximum hash value in the table is (1 << hbits) */ |
|
56 | * the maximum hash value in the table is (1 << hbits) */ | |
@@ -64,7 +64,7 b' typedef struct s_xdfile {' | |||||
64 | * [recs[i] for i in range(0, dstart)] are common prefix. |
|
64 | * [recs[i] for i in range(0, dstart)] are common prefix. | |
65 | * [recs[i] for i in range(dstart, dend + 1 - dstart)] are interesting |
|
65 | * [recs[i] for i in range(dstart, dend + 1 - dstart)] are interesting | |
66 | * lines */ |
|
66 | * lines */ | |
67 |
|
|
67 | int64_t dstart, dend; | |
68 |
|
68 | |||
69 | /* pointer to records (lines) */ |
|
69 | /* pointer to records (lines) */ | |
70 | xrecord_t **recs; |
|
70 | xrecord_t **recs; | |
@@ -82,14 +82,14 b' typedef struct s_xdfile {' | |||||
82 | * rindex[0] is likely dstart, if not removed up by rule 2. |
|
82 | * rindex[0] is likely dstart, if not removed up by rule 2. | |
83 | * rindex[nreff - 1] is likely dend, if not removed by rule 2. |
|
83 | * rindex[nreff - 1] is likely dend, if not removed by rule 2. | |
84 | */ |
|
84 | */ | |
85 |
|
|
85 | int64_t *rindex; | |
86 |
|
86 | |||
87 | /* rindex size */ |
|
87 | /* rindex size */ | |
88 |
|
|
88 | int64_t nreff; | |
89 |
|
89 | |||
90 | /* cleaned-up record index => hash value |
|
90 | /* cleaned-up record index => hash value | |
91 | * ha[i] = recs[rindex[i]]->ha */ |
|
91 | * ha[i] = recs[rindex[i]]->ha */ | |
92 | unsigned long *ha; |
|
92 | uint64_t *ha; | |
93 | } xdfile_t; |
|
93 | } xdfile_t; | |
94 |
|
94 | |||
95 | typedef struct s_xdfenv { |
|
95 | typedef struct s_xdfenv { | |
@@ -97,7 +97,7 b' typedef struct s_xdfenv {' | |||||
97 |
|
97 | |||
98 | /* number of lines for common prefix and suffix that are removed |
|
98 | /* number of lines for common prefix and suffix that are removed | |
99 | * from xdf1 and xdf2 as a preprocessing step */ |
|
99 | * from xdf1 and xdf2 as a preprocessing step */ | |
100 |
|
|
100 | int64_t nprefix, nsuffix; | |
101 | } xdfenv_t; |
|
101 | } xdfenv_t; | |
102 |
|
102 | |||
103 |
|
103 |
@@ -27,8 +27,8 b'' | |||||
27 |
|
27 | |||
28 |
|
28 | |||
29 |
|
29 | |||
30 |
|
|
30 | int64_t xdl_bogosqrt(int64_t n) { | |
31 | long i; |
|
31 | int64_t i; | |
32 |
|
32 | |||
33 | /* |
|
33 | /* | |
34 | * Classical integer square root approximation using shifts. |
|
34 | * Classical integer square root approximation using shifts. | |
@@ -40,20 +40,20 b' long xdl_bogosqrt(long n) {' | |||||
40 | } |
|
40 | } | |
41 |
|
41 | |||
42 |
|
42 | |||
43 |
void *xdl_mmfile_first(mmfile_t *mmf, |
|
43 | void *xdl_mmfile_first(mmfile_t *mmf, int64_t *size) | |
44 | { |
|
44 | { | |
45 | *size = mmf->size; |
|
45 | *size = mmf->size; | |
46 | return mmf->ptr; |
|
46 | return mmf->ptr; | |
47 | } |
|
47 | } | |
48 |
|
48 | |||
49 |
|
49 | |||
50 |
|
|
50 | int64_t xdl_mmfile_size(mmfile_t *mmf) | |
51 | { |
|
51 | { | |
52 | return mmf->size; |
|
52 | return mmf->size; | |
53 | } |
|
53 | } | |
54 |
|
54 | |||
55 |
|
55 | |||
56 |
int xdl_cha_init(chastore_t *cha, |
|
56 | int xdl_cha_init(chastore_t *cha, int64_t isize, int64_t icount) { | |
57 |
|
57 | |||
58 | cha->head = cha->tail = NULL; |
|
58 | cha->head = cha->tail = NULL; | |
59 | cha->isize = isize; |
|
59 | cha->isize = isize; | |
@@ -100,8 +100,8 b' void *xdl_cha_alloc(chastore_t *cha) {' | |||||
100 | return data; |
|
100 | return data; | |
101 | } |
|
101 | } | |
102 |
|
102 | |||
103 |
|
|
103 | int64_t xdl_guess_lines(mmfile_t *mf, int64_t sample) { | |
104 |
|
|
104 | int64_t nl = 0, size, tsize = 0; | |
105 | char const *data, *cur, *top; |
|
105 | char const *data, *cur, *top; | |
106 |
|
106 | |||
107 | if ((cur = data = xdl_mmfile_first(mf, &size)) != NULL) { |
|
107 | if ((cur = data = xdl_mmfile_first(mf, &size)) != NULL) { | |
@@ -121,15 +121,15 b' long xdl_guess_lines(mmfile_t *mf, long ' | |||||
121 | return nl + 1; |
|
121 | return nl + 1; | |
122 | } |
|
122 | } | |
123 |
|
123 | |||
124 |
int xdl_recmatch(const char *l1, |
|
124 | int xdl_recmatch(const char *l1, int64_t s1, const char *l2, int64_t s2, int64_t flags) | |
125 | { |
|
125 | { | |
126 | if (s1 == s2 && !memcmp(l1, l2, s1)) |
|
126 | if (s1 == s2 && !memcmp(l1, l2, s1)) | |
127 | return 1; |
|
127 | return 1; | |
128 | return 0; |
|
128 | return 0; | |
129 | } |
|
129 | } | |
130 |
|
130 | |||
131 |
u |
|
131 | uint64_t xdl_hash_record(char const **data, char const *top, int64_t flags) { | |
132 |
u |
|
132 | uint64_t ha = 5381; | |
133 | char const *ptr = *data; |
|
133 | char const *ptr = *data; | |
134 |
|
134 | |||
135 | for (; ptr < top && *ptr != '\n'; ptr++) { |
|
135 | for (; ptr < top && *ptr != '\n'; ptr++) { |
@@ -25,13 +25,13 b'' | |||||
25 |
|
25 | |||
26 |
|
26 | |||
27 |
|
27 | |||
28 |
|
|
28 | int64_t xdl_bogosqrt(int64_t n); | |
29 |
int xdl_cha_init(chastore_t *cha, |
|
29 | int xdl_cha_init(chastore_t *cha, int64_t isize, int64_t icount); | |
30 | void xdl_cha_free(chastore_t *cha); |
|
30 | void xdl_cha_free(chastore_t *cha); | |
31 | void *xdl_cha_alloc(chastore_t *cha); |
|
31 | void *xdl_cha_alloc(chastore_t *cha); | |
32 |
|
|
32 | int64_t xdl_guess_lines(mmfile_t *mf, int64_t sample); | |
33 |
int xdl_recmatch(const char *l1, |
|
33 | int xdl_recmatch(const char *l1, int64_t s1, const char *l2, int64_t s2, int64_t flags); | |
34 |
u |
|
34 | uint64_t xdl_hash_record(char const **data, char const *top, int64_t flags); | |
35 | unsigned int xdl_hashbits(unsigned int size); |
|
35 | unsigned int xdl_hashbits(unsigned int size); | |
36 |
|
36 | |||
37 |
|
37 |
General Comments 0
You need to be logged in to leave comments.
Login now