Show More
@@ -40,28 +40,28 b' extern "C" {' | |||
|
40 | 40 | |
|
41 | 41 | typedef struct s_mmfile { |
|
42 | 42 | char *ptr; |
|
43 |
|
|
|
43 | int64_t size; | |
|
44 | 44 | } mmfile_t; |
|
45 | 45 | |
|
46 | 46 | typedef struct s_mmbuffer { |
|
47 | 47 | char *ptr; |
|
48 |
|
|
|
48 | int64_t size; | |
|
49 | 49 | } mmbuffer_t; |
|
50 | 50 | |
|
51 | 51 | typedef struct s_xpparam { |
|
52 | unsigned long flags; | |
|
52 | uint64_t flags; | |
|
53 | 53 | } xpparam_t; |
|
54 | 54 | |
|
55 | 55 | typedef struct s_xdemitcb { |
|
56 | 56 | void *priv; |
|
57 | 57 | } xdemitcb_t; |
|
58 | 58 | |
|
59 |
typedef int (*xdl_emit_hunk_consume_func_t)( |
|
|
60 |
|
|
|
59 | typedef int (*xdl_emit_hunk_consume_func_t)(int64_t start_a, int64_t count_a, | |
|
60 | int64_t start_b, int64_t count_b, | |
|
61 | 61 | void *cb_data); |
|
62 | 62 | |
|
63 | 63 | typedef struct s_xdemitconf { |
|
64 | unsigned long flags; | |
|
64 | uint64_t flags; | |
|
65 | 65 | xdl_emit_hunk_consume_func_t hunk_func; |
|
66 | 66 | } xdemitconf_t; |
|
67 | 67 | |
@@ -70,8 +70,8 b' typedef struct s_xdemitconf {' | |||
|
70 | 70 | #define xdl_free(ptr) free(ptr) |
|
71 | 71 | #define xdl_realloc(ptr,x) realloc(ptr,x) |
|
72 | 72 | |
|
73 |
void *xdl_mmfile_first(mmfile_t *mmf, |
|
|
74 |
|
|
|
73 | void *xdl_mmfile_first(mmfile_t *mmf, int64_t *size); | |
|
74 | int64_t xdl_mmfile_size(mmfile_t *mmf); | |
|
75 | 75 | |
|
76 | 76 | int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, |
|
77 | 77 | xdemitconf_t const *xecfg, xdemitcb_t *ecb); |
@@ -37,18 +37,18 b'' | |||
|
37 | 37 | |
|
38 | 38 | |
|
39 | 39 | typedef struct s_xdpsplit { |
|
40 |
|
|
|
40 | int64_t i1, i2; | |
|
41 | 41 | int min_lo, min_hi; |
|
42 | 42 | } xdpsplit_t; |
|
43 | 43 | |
|
44 | 44 | |
|
45 | 45 | |
|
46 | 46 | |
|
47 |
static |
|
|
48 |
u |
|
|
49 |
|
|
|
47 | static int64_t xdl_split(uint64_t const *ha1, int64_t off1, int64_t lim1, | |
|
48 | uint64_t const *ha2, int64_t off2, int64_t lim2, | |
|
49 | int64_t *kvdf, int64_t *kvdb, int need_min, xdpsplit_t *spl, | |
|
50 | 50 | xdalgoenv_t *xenv); |
|
51 |
static xdchange_t *xdl_add_change(xdchange_t *xscr, |
|
|
51 | static xdchange_t *xdl_add_change(xdchange_t *xscr, int64_t i1, int64_t i2, int64_t chg1, int64_t chg2); | |
|
52 | 52 | |
|
53 | 53 | |
|
54 | 54 | |
@@ -63,16 +63,16 b' static xdchange_t *xdl_add_change(xdchan' | |||
|
63 | 63 | * cases using this algorithm is full, so a little bit of heuristic is needed |
|
64 | 64 | * to cut the search and to return a suboptimal point. |
|
65 | 65 | */ |
|
66 |
static |
|
|
67 |
u |
|
|
68 |
|
|
|
66 | static int64_t xdl_split(uint64_t const *ha1, int64_t off1, int64_t lim1, | |
|
67 | uint64_t const *ha2, int64_t off2, int64_t lim2, | |
|
68 | int64_t *kvdf, int64_t *kvdb, int need_min, xdpsplit_t *spl, | |
|
69 | 69 | xdalgoenv_t *xenv) { |
|
70 |
|
|
|
71 |
|
|
|
72 |
|
|
|
73 |
|
|
|
74 |
|
|
|
75 |
|
|
|
70 | int64_t dmin = off1 - lim2, dmax = lim1 - off2; | |
|
71 | int64_t fmid = off1 - off2, bmid = lim1 - lim2; | |
|
72 | int64_t odd = (fmid - bmid) & 1; | |
|
73 | int64_t fmin = fmid, fmax = fmid; | |
|
74 | int64_t bmin = bmid, bmax = bmid; | |
|
75 | int64_t ec, d, i1, i2, prev1, best, dd, v, k; | |
|
76 | 76 | |
|
77 | 77 | /* |
|
78 | 78 | * Set initial diagonal values for both forward and backward path. |
@@ -221,7 +221,7 b' static long xdl_split(unsigned long cons' | |||
|
221 | 221 | * the furthest reaching path using the (i1 + i2) measure. |
|
222 | 222 | */ |
|
223 | 223 | if (ec >= xenv->mxcost) { |
|
224 |
|
|
|
224 | int64_t fbest, fbest1, bbest, bbest1; | |
|
225 | 225 | |
|
226 | 226 | fbest = fbest1 = -1; |
|
227 | 227 | for (d = fmax; d >= fmin; d -= 2) { |
@@ -269,10 +269,10 b' static long xdl_split(unsigned long cons' | |||
|
269 | 269 | * the box splitting function. Note that the real job (marking changed lines) |
|
270 | 270 | * is done in the two boundary reaching checks. |
|
271 | 271 | */ |
|
272 |
int xdl_recs_cmp(diffdata_t *dd1, |
|
|
273 |
diffdata_t *dd2, |
|
|
274 |
|
|
|
275 |
u |
|
|
272 | int xdl_recs_cmp(diffdata_t *dd1, int64_t off1, int64_t lim1, | |
|
273 | diffdata_t *dd2, int64_t off2, int64_t lim2, | |
|
274 | int64_t *kvdf, int64_t *kvdb, int need_min, xdalgoenv_t *xenv) { | |
|
275 | uint64_t const *ha1 = dd1->ha, *ha2 = dd2->ha; | |
|
276 | 276 | |
|
277 | 277 | /* |
|
278 | 278 | * Shrink the box by walking through each diagonal snake (SW and NE). |
@@ -286,13 +286,13 b' int xdl_recs_cmp(diffdata_t *dd1, long o' | |||
|
286 | 286 | */ |
|
287 | 287 | if (off1 == lim1) { |
|
288 | 288 | char *rchg2 = dd2->rchg; |
|
289 |
|
|
|
289 | int64_t *rindex2 = dd2->rindex; | |
|
290 | 290 | |
|
291 | 291 | for (; off2 < lim2; off2++) |
|
292 | 292 | rchg2[rindex2[off2]] = 1; |
|
293 | 293 | } else if (off2 == lim2) { |
|
294 | 294 | char *rchg1 = dd1->rchg; |
|
295 |
|
|
|
295 | int64_t *rindex1 = dd1->rindex; | |
|
296 | 296 | |
|
297 | 297 | for (; off1 < lim1; off1++) |
|
298 | 298 | rchg1[rindex1[off1]] = 1; |
@@ -327,8 +327,8 b' int xdl_recs_cmp(diffdata_t *dd1, long o' | |||
|
327 | 327 | |
|
328 | 328 | int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, |
|
329 | 329 | xdfenv_t *xe) { |
|
330 |
|
|
|
331 |
|
|
|
330 | int64_t ndiags; | |
|
331 | int64_t *kvd, *kvdf, *kvdb; | |
|
332 | 332 | xdalgoenv_t xenv; |
|
333 | 333 | diffdata_t dd1, dd2; |
|
334 | 334 | |
@@ -342,7 +342,7 b' int xdl_do_diff(mmfile_t *mf1, mmfile_t ' | |||
|
342 | 342 | * One is to store the forward path and one to store the backward path. |
|
343 | 343 | */ |
|
344 | 344 | ndiags = xe->xdf1.nreff + xe->xdf2.nreff + 3; |
|
345 |
if (!(kvd = ( |
|
|
345 | if (!(kvd = (int64_t *) xdl_malloc((2 * ndiags + 2) * sizeof(long)))) { | |
|
346 | 346 | |
|
347 | 347 | xdl_free_env(xe); |
|
348 | 348 | return -1; |
@@ -381,7 +381,7 b' int xdl_do_diff(mmfile_t *mf1, mmfile_t ' | |||
|
381 | 381 | } |
|
382 | 382 | |
|
383 | 383 | |
|
384 |
static xdchange_t *xdl_add_change(xdchange_t *xscr, |
|
|
384 | static xdchange_t *xdl_add_change(xdchange_t *xscr, int64_t i1, int64_t i2, int64_t chg1, int64_t chg2) { | |
|
385 | 385 | xdchange_t *xch; |
|
386 | 386 | |
|
387 | 387 | if (!(xch = (xdchange_t *) xdl_malloc(sizeof(xdchange_t)))) |
@@ -398,7 +398,7 b' static xdchange_t *xdl_add_change(xdchan' | |||
|
398 | 398 | } |
|
399 | 399 | |
|
400 | 400 | |
|
401 |
static int recs_match(xrecord_t *rec1, xrecord_t *rec2, |
|
|
401 | static int recs_match(xrecord_t *rec1, xrecord_t *rec2, int64_t flags) | |
|
402 | 402 | { |
|
403 | 403 | return (rec1->ha == rec2->ha && |
|
404 | 404 | xdl_recmatch(rec1->ptr, rec1->size, |
@@ -421,7 +421,7 b' static int recs_match(xrecord_t *rec1, x' | |||
|
421 | 421 | */ |
|
422 | 422 | static int get_indent(xrecord_t *rec) |
|
423 | 423 | { |
|
424 | long i; | |
|
424 | int64_t i; | |
|
425 | 425 | int ret = 0; |
|
426 | 426 | |
|
427 | 427 | for (i = 0; i < rec->size; i++) { |
@@ -497,10 +497,10 b' struct split_score {' | |||
|
497 | 497 | /* |
|
498 | 498 | * Fill m with information about a hypothetical split of xdf above line split. |
|
499 | 499 | */ |
|
500 |
static void measure_split(const xdfile_t *xdf, |
|
|
500 | static void measure_split(const xdfile_t *xdf, int64_t split, | |
|
501 | 501 | struct split_measurement *m) |
|
502 | 502 | { |
|
503 | long i; | |
|
503 | int64_t i; | |
|
504 | 504 | |
|
505 | 505 | if (split >= xdf->nrec) { |
|
506 | 506 | m->end_of_file = 1; |
@@ -706,13 +706,13 b' struct xdlgroup {' | |||
|
706 | 706 | * The index of the first changed line in the group, or the index of |
|
707 | 707 | * the unchanged line above which the (empty) group is located. |
|
708 | 708 | */ |
|
709 |
|
|
|
709 | int64_t start; | |
|
710 | 710 | |
|
711 | 711 | /* |
|
712 | 712 | * The index of the first unchanged line after the group. For an empty |
|
713 | 713 | * group, end is equal to start. |
|
714 | 714 | */ |
|
715 |
|
|
|
715 | int64_t end; | |
|
716 | 716 | }; |
|
717 | 717 | |
|
718 | 718 | /* |
@@ -762,7 +762,7 b' static inline int group_previous(xdfile_' | |||
|
762 | 762 | * following group, expand this group to include it. Return 0 on success or -1 |
|
763 | 763 | * if g cannot be slid down. |
|
764 | 764 | */ |
|
765 |
static int group_slide_down(xdfile_t *xdf, struct xdlgroup *g, |
|
|
765 | static int group_slide_down(xdfile_t *xdf, struct xdlgroup *g, int64_t flags) | |
|
766 | 766 | { |
|
767 | 767 | if (g->end < xdf->nrec && |
|
768 | 768 | recs_match(xdf->recs[g->start], xdf->recs[g->end], flags)) { |
@@ -783,7 +783,7 b' static int group_slide_down(xdfile_t *xd' | |||
|
783 | 783 | * into a previous group, expand this group to include it. Return 0 on success |
|
784 | 784 | * or -1 if g cannot be slid up. |
|
785 | 785 | */ |
|
786 |
static int group_slide_up(xdfile_t *xdf, struct xdlgroup *g, |
|
|
786 | static int group_slide_up(xdfile_t *xdf, struct xdlgroup *g, int64_t flags) | |
|
787 | 787 | { |
|
788 | 788 | if (g->start > 0 && |
|
789 | 789 | recs_match(xdf->recs[g->start - 1], xdf->recs[g->end - 1], flags)) { |
@@ -818,10 +818,10 b' static void xdl_bug(const char *msg)' | |||
|
818 | 818 | * This also helps in finding joinable change groups and reducing the diff |
|
819 | 819 | * size. |
|
820 | 820 | */ |
|
821 |
int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, |
|
|
821 | int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, int64_t flags) { | |
|
822 | 822 | struct xdlgroup g, go; |
|
823 |
|
|
|
824 |
|
|
|
823 | int64_t earliest_end, end_matching_other; | |
|
824 | int64_t groupsize; | |
|
825 | 825 | |
|
826 | 826 | group_init(xdf, &g); |
|
827 | 827 | group_init(xdfo, &go); |
@@ -906,7 +906,7 b' int xdl_change_compact(xdfile_t *xdf, xd' | |||
|
906 | 906 | * "score" for each position that the group can be shifted |
|
907 | 907 | * to. Then we pick the shift with the lowest score. |
|
908 | 908 | */ |
|
909 |
|
|
|
909 | int64_t shift, best_shift = -1; | |
|
910 | 910 | struct split_score best_score; |
|
911 | 911 | |
|
912 | 912 | /* |
@@ -975,7 +975,7 b' int xdl_change_compact(xdfile_t *xdf, xd' | |||
|
975 | 975 | int xdl_build_script(xdfenv_t *xe, xdchange_t **xscr) { |
|
976 | 976 | xdchange_t *cscr = NULL, *xch; |
|
977 | 977 | char *rchg1 = xe->xdf1.rchg, *rchg2 = xe->xdf2.rchg; |
|
978 |
|
|
|
978 | int64_t i1, i2, l1, l2; | |
|
979 | 979 | |
|
980 | 980 | /* |
|
981 | 981 | * Trivial. Collects "groups" of changes and creates an edit script. |
@@ -1016,9 +1016,9 b' void xdl_free_script(xdchange_t *xscr) {' | |||
|
1016 | 1016 | xdchange_t *xdl_get_hunk(xdchange_t **xscr, xdemitconf_t const *xecfg) |
|
1017 | 1017 | { |
|
1018 | 1018 | xdchange_t *xch, *xchp, *lxch; |
|
1019 |
|
|
|
1020 |
|
|
|
1021 |
u |
|
|
1019 | int64_t max_common = 0; | |
|
1020 | int64_t max_ignorable = 0; | |
|
1021 | uint64_t ignored = 0; /* number of ignored blank lines */ | |
|
1022 | 1022 | |
|
1023 | 1023 | /* remove ignorable changes that are too far before other changes */ |
|
1024 | 1024 | for (xchp = *xscr; xchp && xchp->ignore; xchp = xchp->next) { |
@@ -1035,7 +1035,7 b' xdchange_t *xdl_get_hunk(xdchange_t **xs' | |||
|
1035 | 1035 | lxch = *xscr; |
|
1036 | 1036 | |
|
1037 | 1037 | for (xchp = *xscr, xch = xchp->next; xch; xchp = xch, xch = xch->next) { |
|
1038 |
|
|
|
1038 | int64_t distance = xch->i1 - (xchp->i1 + xchp->chg1); | |
|
1039 | 1039 | if (distance > max_common) |
|
1040 | 1040 | break; |
|
1041 | 1041 | |
@@ -1062,14 +1062,14 b' xdchange_t *xdl_get_hunk(xdchange_t **xs' | |||
|
1062 | 1062 | static int xdl_call_hunk_func(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb, |
|
1063 | 1063 | xdemitconf_t const *xecfg) |
|
1064 | 1064 | { |
|
1065 |
|
|
|
1065 | int64_t p = xe->nprefix, s = xe->nsuffix; | |
|
1066 | 1066 | xdchange_t *xch, *xche; |
|
1067 | 1067 | |
|
1068 | 1068 | if (!xecfg->hunk_func) |
|
1069 | 1069 | return -1; |
|
1070 | 1070 | |
|
1071 | 1071 | if ((xecfg->flags & XDL_EMIT_BDIFFHUNK) != 0) { |
|
1072 |
|
|
|
1072 | int64_t i1 = 0, i2 = 0, n1 = xe->xdf1.nrec, n2 = xe->xdf2.nrec; | |
|
1073 | 1073 | for (xch = xscr; xch; xch = xche->next) { |
|
1074 | 1074 | xche = xdl_get_hunk(&xch, xecfg); |
|
1075 | 1075 | if (!xch) |
@@ -25,33 +25,33 b'' | |||
|
25 | 25 | |
|
26 | 26 | |
|
27 | 27 | typedef struct s_diffdata { |
|
28 |
|
|
|
29 |
u |
|
|
30 |
|
|
|
28 | int64_t nrec; | |
|
29 | uint64_t const *ha; | |
|
30 | int64_t *rindex; | |
|
31 | 31 | char *rchg; |
|
32 | 32 | } diffdata_t; |
|
33 | 33 | |
|
34 | 34 | typedef struct s_xdalgoenv { |
|
35 |
|
|
|
36 |
|
|
|
37 |
|
|
|
35 | int64_t mxcost; | |
|
36 | int64_t snake_cnt; | |
|
37 | int64_t heur_min; | |
|
38 | 38 | } xdalgoenv_t; |
|
39 | 39 | |
|
40 | 40 | typedef struct s_xdchange { |
|
41 | 41 | struct s_xdchange *next; |
|
42 |
|
|
|
43 |
|
|
|
42 | int64_t i1, i2; | |
|
43 | int64_t chg1, chg2; | |
|
44 | 44 | int ignore; |
|
45 | 45 | } xdchange_t; |
|
46 | 46 | |
|
47 | 47 | |
|
48 | 48 | |
|
49 |
int xdl_recs_cmp(diffdata_t *dd1, |
|
|
50 |
diffdata_t *dd2, |
|
|
51 |
|
|
|
49 | int xdl_recs_cmp(diffdata_t *dd1, int64_t off1, int64_t lim1, | |
|
50 | diffdata_t *dd2, int64_t off2, int64_t lim2, | |
|
51 | int64_t *kvdf, int64_t *kvdb, int need_min, xdalgoenv_t *xenv); | |
|
52 | 52 | int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, |
|
53 | 53 | xdfenv_t *xe); |
|
54 |
int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, |
|
|
54 | int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, int64_t flags); | |
|
55 | 55 | int xdl_build_script(xdfenv_t *xe, xdchange_t **xscr); |
|
56 | 56 | void xdl_free_script(xdchange_t *xscr); |
|
57 | 57 |
@@ -24,6 +24,7 b'' | |||
|
24 | 24 | #define XINCLUDE_H |
|
25 | 25 | |
|
26 | 26 | #include <ctype.h> |
|
27 | #include <stdint.h> | |
|
27 | 28 | #include <stdio.h> |
|
28 | 29 | #include <stdlib.h> |
|
29 | 30 | #include <string.h> |
@@ -31,35 +31,35 b'' | |||
|
31 | 31 | |
|
32 | 32 | typedef struct s_xdlclass { |
|
33 | 33 | struct s_xdlclass *next; |
|
34 | unsigned long ha; | |
|
34 | uint64_t ha; | |
|
35 | 35 | char const *line; |
|
36 |
|
|
|
37 |
|
|
|
38 |
|
|
|
36 | int64_t size; | |
|
37 | int64_t idx; | |
|
38 | int64_t len1, len2; | |
|
39 | 39 | } xdlclass_t; |
|
40 | 40 | |
|
41 | 41 | typedef struct s_xdlclassifier { |
|
42 | 42 | unsigned int hbits; |
|
43 |
|
|
|
43 | int64_t hsize; | |
|
44 | 44 | xdlclass_t **rchash; |
|
45 | 45 | chastore_t ncha; |
|
46 | 46 | xdlclass_t **rcrecs; |
|
47 |
|
|
|
48 |
|
|
|
49 |
|
|
|
47 | int64_t alloc; | |
|
48 | int64_t count; | |
|
49 | int64_t flags; | |
|
50 | 50 | } xdlclassifier_t; |
|
51 | 51 | |
|
52 | 52 | |
|
53 | 53 | |
|
54 | 54 | |
|
55 |
static int xdl_init_classifier(xdlclassifier_t *cf, |
|
|
55 | static int xdl_init_classifier(xdlclassifier_t *cf, int64_t size, int64_t flags); | |
|
56 | 56 | static void xdl_free_classifier(xdlclassifier_t *cf); |
|
57 | 57 | static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t **rhash, |
|
58 | 58 | unsigned int hbits, xrecord_t *rec); |
|
59 |
static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, |
|
|
59 | static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, int64_t narec, xpparam_t const *xpp, | |
|
60 | 60 | xdlclassifier_t *cf, xdfile_t *xdf); |
|
61 | 61 | static void xdl_free_ctx(xdfile_t *xdf); |
|
62 |
static int xdl_clean_mmatch(char const *dis, |
|
|
62 | static int xdl_clean_mmatch(char const *dis, int64_t i, int64_t s, int64_t e); | |
|
63 | 63 | static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2); |
|
64 | 64 | static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2); |
|
65 | 65 | static int xdl_optimize_ctxs(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2); |
@@ -67,7 +67,7 b' static int xdl_optimize_ctxs(xdlclassifi' | |||
|
67 | 67 | |
|
68 | 68 | |
|
69 | 69 | |
|
70 |
static int xdl_init_classifier(xdlclassifier_t *cf, |
|
|
70 | static int xdl_init_classifier(xdlclassifier_t *cf, int64_t size, int64_t flags) { | |
|
71 | 71 | cf->flags = flags; |
|
72 | 72 | |
|
73 | 73 | cf->hbits = xdl_hashbits((unsigned int) size); |
@@ -108,7 +108,7 b' static void xdl_free_classifier(xdlclass' | |||
|
108 | 108 | |
|
109 | 109 | static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t **rhash, |
|
110 | 110 | unsigned int hbits, xrecord_t *rec) { |
|
111 | long hi; | |
|
111 | int64_t hi; | |
|
112 | 112 | char const *line; |
|
113 | 113 | xdlclass_t *rcrec; |
|
114 | 114 | xdlclass_t **rcrecs; |
@@ -163,11 +163,11 b' static int xdl_classify_record(unsigned ' | |||
|
163 | 163 | * outweighs the shift change. A diff result with suboptimal shifting is still |
|
164 | 164 | * valid. |
|
165 | 165 | */ |
|
166 |
static void xdl_trim_files(mmfile_t *mf1, mmfile_t *mf2, |
|
|
166 | static void xdl_trim_files(mmfile_t *mf1, mmfile_t *mf2, int64_t reserved, | |
|
167 | 167 | xdfenv_t *xe, mmfile_t *out_mf1, mmfile_t *out_mf2) { |
|
168 | 168 | mmfile_t msmall, mlarge; |
|
169 | 169 | /* prefix lines, prefix bytes, suffix lines, suffix bytes */ |
|
170 |
|
|
|
170 | int64_t plines = 0, pbytes = 0, slines = 0, sbytes = 0, i; | |
|
171 | 171 | /* prefix char pointer for msmall and mlarge */ |
|
172 | 172 | const char *pp1, *pp2; |
|
173 | 173 | /* suffix char pointer for msmall and mlarge */ |
@@ -237,18 +237,18 b' static void xdl_trim_files(mmfile_t *mf1' | |||
|
237 | 237 | } |
|
238 | 238 | |
|
239 | 239 | |
|
240 |
static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, |
|
|
240 | static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, int64_t narec, xpparam_t const *xpp, | |
|
241 | 241 | xdlclassifier_t *cf, xdfile_t *xdf) { |
|
242 | 242 | unsigned int hbits; |
|
243 |
|
|
|
244 | unsigned long hav; | |
|
243 | int64_t nrec, hsize, bsize; | |
|
244 | uint64_t hav; | |
|
245 | 245 | char const *blk, *cur, *top, *prev; |
|
246 | 246 | xrecord_t *crec; |
|
247 | 247 | xrecord_t **recs, **rrecs; |
|
248 | 248 | xrecord_t **rhash; |
|
249 | unsigned long *ha; | |
|
249 | uint64_t *ha; | |
|
250 | 250 | char *rchg; |
|
251 |
|
|
|
251 | int64_t *rindex; | |
|
252 | 252 | |
|
253 | 253 | ha = NULL; |
|
254 | 254 | rindex = NULL; |
@@ -296,9 +296,9 b' static int xdl_prepare_ctx(unsigned int ' | |||
|
296 | 296 | goto abort; |
|
297 | 297 | memset(rchg, 0, (nrec + 2) * sizeof(char)); |
|
298 | 298 | |
|
299 |
if (!(rindex = ( |
|
|
299 | if (!(rindex = (int64_t *) xdl_malloc((nrec + 1) * sizeof(long)))) | |
|
300 | 300 | goto abort; |
|
301 |
if (!(ha = (u |
|
|
301 | if (!(ha = (uint64_t *) xdl_malloc((nrec + 1) * sizeof(unsigned long)))) | |
|
302 | 302 | goto abort; |
|
303 | 303 | |
|
304 | 304 | xdf->nrec = nrec; |
@@ -340,7 +340,7 b' static void xdl_free_ctx(xdfile_t *xdf) ' | |||
|
340 | 340 | |
|
341 | 341 | int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, |
|
342 | 342 | xdfenv_t *xe) { |
|
343 |
|
|
|
343 | int64_t enl1, enl2, sample; | |
|
344 | 344 | mmfile_t tmf1, tmf2; |
|
345 | 345 | xdlclassifier_t cf; |
|
346 | 346 | |
@@ -388,8 +388,8 b' void xdl_free_env(xdfenv_t *xe) {' | |||
|
388 | 388 | } |
|
389 | 389 | |
|
390 | 390 | |
|
391 |
static int xdl_clean_mmatch(char const *dis, |
|
|
392 |
|
|
|
391 | static int xdl_clean_mmatch(char const *dis, int64_t i, int64_t s, int64_t e) { | |
|
392 | int64_t r, rdis0, rpdis0, rdis1, rpdis1; | |
|
393 | 393 | |
|
394 | 394 | /* |
|
395 | 395 | * Limits the window the is examined during the similar-lines |
@@ -452,7 +452,7 b' static int xdl_clean_mmatch(char const *' | |||
|
452 | 452 | * might be potentially discarded if they happear in a run of discardable. |
|
453 | 453 | */ |
|
454 | 454 | static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2) { |
|
455 |
|
|
|
455 | int64_t i, nm, nreff, mlim; | |
|
456 | 456 | xrecord_t **recs; |
|
457 | 457 | xdlclass_t *rcrec; |
|
458 | 458 | char *dis, *dis1, *dis2; |
@@ -515,7 +515,7 b' static int xdl_cleanup_records(xdlclassi' | |||
|
515 | 515 | * Early trim initial and terminal matching records. |
|
516 | 516 | */ |
|
517 | 517 | static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2) { |
|
518 |
|
|
|
518 | int64_t i, lim; | |
|
519 | 519 | xrecord_t **recs1, **recs2; |
|
520 | 520 | |
|
521 | 521 | recs1 = xdf1->recs; |
@@ -27,22 +27,22 b'' | |||
|
27 | 27 | |
|
28 | 28 | typedef struct s_chanode { |
|
29 | 29 | struct s_chanode *next; |
|
30 |
|
|
|
30 | int64_t icurr; | |
|
31 | 31 | } chanode_t; |
|
32 | 32 | |
|
33 | 33 | typedef struct s_chastore { |
|
34 | 34 | chanode_t *head, *tail; |
|
35 |
|
|
|
35 | int64_t isize, nsize; | |
|
36 | 36 | chanode_t *ancur; |
|
37 | 37 | chanode_t *sncur; |
|
38 |
|
|
|
38 | int64_t scurr; | |
|
39 | 39 | } chastore_t; |
|
40 | 40 | |
|
41 | 41 | typedef struct s_xrecord { |
|
42 | 42 | struct s_xrecord *next; |
|
43 | 43 | char const *ptr; |
|
44 |
|
|
|
45 | unsigned long ha; | |
|
44 | int64_t size; | |
|
45 | uint64_t ha; | |
|
46 | 46 | } xrecord_t; |
|
47 | 47 | |
|
48 | 48 | typedef struct s_xdfile { |
@@ -50,7 +50,7 b' typedef struct s_xdfile {' | |||
|
50 | 50 | chastore_t rcha; |
|
51 | 51 | |
|
52 | 52 | /* number of records (lines) */ |
|
53 |
|
|
|
53 | int64_t nrec; | |
|
54 | 54 | |
|
55 | 55 | /* hash table size |
|
56 | 56 | * the maximum hash value in the table is (1 << hbits) */ |
@@ -64,7 +64,7 b' typedef struct s_xdfile {' | |||
|
64 | 64 | * [recs[i] for i in range(0, dstart)] are common prefix. |
|
65 | 65 | * [recs[i] for i in range(dstart, dend + 1 - dstart)] are interesting |
|
66 | 66 | * lines */ |
|
67 |
|
|
|
67 | int64_t dstart, dend; | |
|
68 | 68 | |
|
69 | 69 | /* pointer to records (lines) */ |
|
70 | 70 | xrecord_t **recs; |
@@ -82,14 +82,14 b' typedef struct s_xdfile {' | |||
|
82 | 82 | * rindex[0] is likely dstart, if not removed up by rule 2. |
|
83 | 83 | * rindex[nreff - 1] is likely dend, if not removed by rule 2. |
|
84 | 84 | */ |
|
85 |
|
|
|
85 | int64_t *rindex; | |
|
86 | 86 | |
|
87 | 87 | /* rindex size */ |
|
88 |
|
|
|
88 | int64_t nreff; | |
|
89 | 89 | |
|
90 | 90 | /* cleaned-up record index => hash value |
|
91 | 91 | * ha[i] = recs[rindex[i]]->ha */ |
|
92 | unsigned long *ha; | |
|
92 | uint64_t *ha; | |
|
93 | 93 | } xdfile_t; |
|
94 | 94 | |
|
95 | 95 | typedef struct s_xdfenv { |
@@ -97,7 +97,7 b' typedef struct s_xdfenv {' | |||
|
97 | 97 | |
|
98 | 98 | /* number of lines for common prefix and suffix that are removed |
|
99 | 99 | * from xdf1 and xdf2 as a preprocessing step */ |
|
100 |
|
|
|
100 | int64_t nprefix, nsuffix; | |
|
101 | 101 | } xdfenv_t; |
|
102 | 102 | |
|
103 | 103 |
@@ -27,8 +27,8 b'' | |||
|
27 | 27 | |
|
28 | 28 | |
|
29 | 29 | |
|
30 |
|
|
|
31 | long i; | |
|
30 | int64_t xdl_bogosqrt(int64_t n) { | |
|
31 | int64_t i; | |
|
32 | 32 | |
|
33 | 33 | /* |
|
34 | 34 | * Classical integer square root approximation using shifts. |
@@ -40,20 +40,20 b' long xdl_bogosqrt(long n) {' | |||
|
40 | 40 | } |
|
41 | 41 | |
|
42 | 42 | |
|
43 |
void *xdl_mmfile_first(mmfile_t *mmf, |
|
|
43 | void *xdl_mmfile_first(mmfile_t *mmf, int64_t *size) | |
|
44 | 44 | { |
|
45 | 45 | *size = mmf->size; |
|
46 | 46 | return mmf->ptr; |
|
47 | 47 | } |
|
48 | 48 | |
|
49 | 49 | |
|
50 |
|
|
|
50 | int64_t xdl_mmfile_size(mmfile_t *mmf) | |
|
51 | 51 | { |
|
52 | 52 | return mmf->size; |
|
53 | 53 | } |
|
54 | 54 | |
|
55 | 55 | |
|
56 |
int xdl_cha_init(chastore_t *cha, |
|
|
56 | int xdl_cha_init(chastore_t *cha, int64_t isize, int64_t icount) { | |
|
57 | 57 | |
|
58 | 58 | cha->head = cha->tail = NULL; |
|
59 | 59 | cha->isize = isize; |
@@ -100,8 +100,8 b' void *xdl_cha_alloc(chastore_t *cha) {' | |||
|
100 | 100 | return data; |
|
101 | 101 | } |
|
102 | 102 | |
|
103 |
|
|
|
104 |
|
|
|
103 | int64_t xdl_guess_lines(mmfile_t *mf, int64_t sample) { | |
|
104 | int64_t nl = 0, size, tsize = 0; | |
|
105 | 105 | char const *data, *cur, *top; |
|
106 | 106 | |
|
107 | 107 | if ((cur = data = xdl_mmfile_first(mf, &size)) != NULL) { |
@@ -121,15 +121,15 b' long xdl_guess_lines(mmfile_t *mf, long ' | |||
|
121 | 121 | return nl + 1; |
|
122 | 122 | } |
|
123 | 123 | |
|
124 |
int xdl_recmatch(const char *l1, |
|
|
124 | int xdl_recmatch(const char *l1, int64_t s1, const char *l2, int64_t s2, int64_t flags) | |
|
125 | 125 | { |
|
126 | 126 | if (s1 == s2 && !memcmp(l1, l2, s1)) |
|
127 | 127 | return 1; |
|
128 | 128 | return 0; |
|
129 | 129 | } |
|
130 | 130 | |
|
131 |
u |
|
|
132 |
u |
|
|
131 | uint64_t xdl_hash_record(char const **data, char const *top, int64_t flags) { | |
|
132 | uint64_t ha = 5381; | |
|
133 | 133 | char const *ptr = *data; |
|
134 | 134 | |
|
135 | 135 | for (; ptr < top && *ptr != '\n'; ptr++) { |
@@ -25,13 +25,13 b'' | |||
|
25 | 25 | |
|
26 | 26 | |
|
27 | 27 | |
|
28 |
|
|
|
29 |
int xdl_cha_init(chastore_t *cha, |
|
|
28 | int64_t xdl_bogosqrt(int64_t n); | |
|
29 | int xdl_cha_init(chastore_t *cha, int64_t isize, int64_t icount); | |
|
30 | 30 | void xdl_cha_free(chastore_t *cha); |
|
31 | 31 | void *xdl_cha_alloc(chastore_t *cha); |
|
32 |
|
|
|
33 |
int xdl_recmatch(const char *l1, |
|
|
34 |
u |
|
|
32 | int64_t xdl_guess_lines(mmfile_t *mf, int64_t sample); | |
|
33 | int xdl_recmatch(const char *l1, int64_t s1, const char *l2, int64_t s2, int64_t flags); | |
|
34 | uint64_t xdl_hash_record(char const **data, char const *top, int64_t flags); | |
|
35 | 35 | unsigned int xdl_hashbits(unsigned int size); |
|
36 | 36 | |
|
37 | 37 |
General Comments 0
You need to be logged in to leave comments.
Login now