Show More
@@ -46,15 +46,49 b' typedef struct s_xrecord {' | |||
|
46 | 46 | } xrecord_t; |
|
47 | 47 | |
|
48 | 48 | typedef struct s_xdfile { |
|
49 | /* manual memory management */ | |
|
49 | 50 | chastore_t rcha; |
|
51 | ||
|
52 | /* number of records (lines) */ | |
|
50 | 53 | long nrec; |
|
54 | ||
|
55 | /* hash table size | |
|
56 | * the maximum hash value in the table is (1 << hbits) */ | |
|
51 | 57 | unsigned int hbits; |
|
58 | ||
|
59 | /* hash table, hash value => xrecord_t | |
|
60 | * note: xrecord_t is a linked list. */ | |
|
52 | 61 | xrecord_t **rhash; |
|
62 | ||
|
63 | /* range excluding common prefix and suffix | |
|
64 | * [recs[i] for i in range(0, dstart)] are common prefix. | |
|
65 | * [recs[i] for i in range(dstart, dend + 1 - dstart)] are interesting | |
|
66 | * lines */ | |
|
53 | 67 | long dstart, dend; |
|
68 | ||
|
69 | /* pointer to records (lines) */ | |
|
54 | 70 | xrecord_t **recs; |
|
71 | ||
|
72 | /* record changed, use original "recs" index | |
|
73 | * rchag[i] can be either 0 or 1. 1 means recs[i] (line i) is marked | |
|
74 | * "changed". */ | |
|
55 | 75 | char *rchg; |
|
76 | ||
|
77 | /* cleaned-up record index => original "recs" index | |
|
78 | * clean-up means: | |
|
79 | * rule 1. remove common prefix and suffix | |
|
80 | * rule 2. remove records that are only on one side, since they can | |
|
81 | * not match the other side | |
|
82 | * rindex[0] is likely dstart, if not removed up by rule 2. | |
|
83 | * rindex[nreff - 1] is likely dend, if not removed by rule 2. | |
|
84 | */ | |
|
56 | 85 | long *rindex; |
|
86 | ||
|
87 | /* rindex size */ | |
|
57 | 88 | long nreff; |
|
89 | ||
|
90 | /* cleaned-up record index => hash value | |
|
91 | * ha[i] = recs[rindex[i]]->ha */ | |
|
58 | 92 | unsigned long *ha; |
|
59 | 93 | } xdfile_t; |
|
60 | 94 |
General Comments 0
You need to be logged in to leave comments.
Login now